1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
5 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
7 define amdgpu_kernel void @v_uint_to_fp_i64_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
8 ; SI-LABEL: v_uint_to_fp_i64_to_f64:
10 ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
11 ; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
12 ; SI-NEXT: s_waitcnt lgkmcnt(0)
13 ; SI-NEXT: v_mov_b32_e32 v1, s3
14 ; SI-NEXT: v_add_i32_e32 v0, vcc, s2, v0
15 ; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
16 ; SI-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
17 ; SI-NEXT: s_waitcnt vmcnt(0)
18 ; SI-NEXT: v_cvt_f64_u32_e32 v[1:2], v1
19 ; SI-NEXT: v_cvt_f64_u32_e32 v[3:4], v0
20 ; SI-NEXT: v_ldexp_f64 v[0:1], v[1:2], 32
21 ; SI-NEXT: v_mov_b32_e32 v2, s0
22 ; SI-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
23 ; SI-NEXT: v_mov_b32_e32 v3, s1
24 ; SI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
27 ; VI-LABEL: v_uint_to_fp_i64_to_f64:
29 ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
30 ; VI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
31 ; VI-NEXT: s_waitcnt lgkmcnt(0)
32 ; VI-NEXT: v_mov_b32_e32 v1, s3
33 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0
34 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
35 ; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
36 ; VI-NEXT: s_waitcnt vmcnt(0)
37 ; VI-NEXT: v_cvt_f64_u32_e32 v[1:2], v1
38 ; VI-NEXT: v_cvt_f64_u32_e32 v[3:4], v0
39 ; VI-NEXT: v_ldexp_f64 v[1:2], v[1:2], 32
40 ; VI-NEXT: v_add_f64 v[0:1], v[1:2], v[3:4]
41 ; VI-NEXT: v_mov_b32_e32 v2, s0
42 ; VI-NEXT: v_mov_b32_e32 v3, s1
43 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
45 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
46 %gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
47 %val = load i64, ptr addrspace(1) %gep, align 8
48 %result = uitofp i64 %val to double
49 store double %result, ptr addrspace(1) %out
53 define amdgpu_kernel void @s_uint_to_fp_i64_to_f64(ptr addrspace(1) %out, i64 %in) {
54 ; SI-LABEL: s_uint_to_fp_i64_to_f64:
56 ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
57 ; SI-NEXT: s_waitcnt lgkmcnt(0)
58 ; SI-NEXT: v_cvt_f64_u32_e32 v[0:1], s3
59 ; SI-NEXT: v_cvt_f64_u32_e32 v[2:3], s2
60 ; SI-NEXT: v_mov_b32_e32 v4, s0
61 ; SI-NEXT: v_mov_b32_e32 v5, s1
62 ; SI-NEXT: v_ldexp_f64 v[0:1], v[0:1], 32
63 ; SI-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
64 ; SI-NEXT: flat_store_dwordx2 v[4:5], v[0:1]
67 ; VI-LABEL: s_uint_to_fp_i64_to_f64:
69 ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
70 ; VI-NEXT: s_waitcnt lgkmcnt(0)
71 ; VI-NEXT: v_cvt_f64_u32_e32 v[0:1], s3
72 ; VI-NEXT: v_cvt_f64_u32_e32 v[2:3], s2
73 ; VI-NEXT: v_ldexp_f64 v[0:1], v[0:1], 32
74 ; VI-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
75 ; VI-NEXT: v_mov_b32_e32 v2, s0
76 ; VI-NEXT: v_mov_b32_e32 v3, s1
77 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
79 %cast = uitofp i64 %in to double
80 store double %cast, ptr addrspace(1) %out, align 8
84 define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f64(ptr addrspace(1) %out, <2 x i64> %in) {
85 ; SI-LABEL: s_uint_to_fp_v2i64_to_v2f64:
87 ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x4
88 ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
89 ; SI-NEXT: s_waitcnt lgkmcnt(0)
90 ; SI-NEXT: v_cvt_f64_u32_e32 v[0:1], s3
91 ; SI-NEXT: v_cvt_f64_u32_e32 v[2:3], s1
92 ; SI-NEXT: v_cvt_f64_u32_e32 v[4:5], s2
93 ; SI-NEXT: v_cvt_f64_u32_e32 v[6:7], s0
94 ; SI-NEXT: v_ldexp_f64 v[0:1], v[0:1], 32
95 ; SI-NEXT: v_ldexp_f64 v[8:9], v[2:3], 32
96 ; SI-NEXT: v_add_f64 v[2:3], v[0:1], v[4:5]
97 ; SI-NEXT: v_add_f64 v[0:1], v[8:9], v[6:7]
98 ; SI-NEXT: v_mov_b32_e32 v4, s4
99 ; SI-NEXT: v_mov_b32_e32 v5, s5
100 ; SI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
103 ; VI-LABEL: s_uint_to_fp_v2i64_to_v2f64:
105 ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10
106 ; VI-NEXT: s_waitcnt lgkmcnt(0)
107 ; VI-NEXT: v_cvt_f64_u32_e32 v[0:1], s3
108 ; VI-NEXT: v_cvt_f64_u32_e32 v[2:3], s1
109 ; VI-NEXT: v_cvt_f64_u32_e32 v[6:7], s0
110 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
111 ; VI-NEXT: v_ldexp_f64 v[0:1], v[0:1], 32
112 ; VI-NEXT: v_ldexp_f64 v[4:5], v[2:3], 32
113 ; VI-NEXT: v_cvt_f64_u32_e32 v[2:3], s2
114 ; VI-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3]
115 ; VI-NEXT: v_add_f64 v[0:1], v[4:5], v[6:7]
116 ; VI-NEXT: s_waitcnt lgkmcnt(0)
117 ; VI-NEXT: v_mov_b32_e32 v5, s1
118 ; VI-NEXT: v_mov_b32_e32 v4, s0
119 ; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
121 %cast = uitofp <2 x i64> %in to <2 x double>
122 store <2 x double> %cast, ptr addrspace(1) %out, align 16
126 define amdgpu_kernel void @s_uint_to_fp_v4i64_to_v4f64(ptr addrspace(1) %out, <4 x i64> %in) {
127 ; SI-LABEL: s_uint_to_fp_v4i64_to_v4f64:
129 ; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x8
130 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
131 ; SI-NEXT: s_waitcnt lgkmcnt(0)
132 ; SI-NEXT: v_cvt_f64_u32_e32 v[0:1], s11
133 ; SI-NEXT: v_cvt_f64_u32_e32 v[4:5], s9
134 ; SI-NEXT: v_cvt_f64_u32_e32 v[2:3], s10
135 ; SI-NEXT: v_cvt_f64_u32_e32 v[6:7], s8
136 ; SI-NEXT: v_cvt_f64_u32_e32 v[8:9], s15
137 ; SI-NEXT: v_cvt_f64_u32_e32 v[10:11], s13
138 ; SI-NEXT: v_ldexp_f64 v[0:1], v[0:1], 32
139 ; SI-NEXT: v_ldexp_f64 v[4:5], v[4:5], 32
140 ; SI-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3]
141 ; SI-NEXT: v_add_f64 v[0:1], v[4:5], v[6:7]
142 ; SI-NEXT: v_cvt_f64_u32_e32 v[4:5], s14
143 ; SI-NEXT: v_cvt_f64_u32_e32 v[12:13], s12
144 ; SI-NEXT: v_ldexp_f64 v[6:7], v[8:9], 32
145 ; SI-NEXT: v_ldexp_f64 v[8:9], v[10:11], 32
146 ; SI-NEXT: s_add_u32 s2, s0, 16
147 ; SI-NEXT: s_addc_u32 s3, s1, 0
148 ; SI-NEXT: v_add_f64 v[6:7], v[6:7], v[4:5]
149 ; SI-NEXT: v_add_f64 v[4:5], v[8:9], v[12:13]
150 ; SI-NEXT: v_mov_b32_e32 v9, s3
151 ; SI-NEXT: v_mov_b32_e32 v8, s2
152 ; SI-NEXT: flat_store_dwordx4 v[8:9], v[4:7]
154 ; SI-NEXT: v_mov_b32_e32 v5, s1
155 ; SI-NEXT: v_mov_b32_e32 v4, s0
156 ; SI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
159 ; VI-LABEL: s_uint_to_fp_v4i64_to_v4f64:
161 ; VI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x20
162 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
163 ; VI-NEXT: s_waitcnt lgkmcnt(0)
164 ; VI-NEXT: v_cvt_f64_u32_e32 v[2:3], s15
165 ; VI-NEXT: v_cvt_f64_u32_e32 v[4:5], s13
166 ; VI-NEXT: v_cvt_f64_u32_e32 v[0:1], s11
167 ; VI-NEXT: v_cvt_f64_u32_e32 v[6:7], s9
168 ; VI-NEXT: v_ldexp_f64 v[8:9], v[2:3], 32
169 ; VI-NEXT: v_ldexp_f64 v[4:5], v[4:5], 32
170 ; VI-NEXT: v_ldexp_f64 v[0:1], v[0:1], 32
171 ; VI-NEXT: v_ldexp_f64 v[10:11], v[6:7], 32
172 ; VI-NEXT: v_cvt_f64_u32_e32 v[6:7], s14
173 ; VI-NEXT: v_cvt_f64_u32_e32 v[12:13], s12
174 ; VI-NEXT: v_cvt_f64_u32_e32 v[2:3], s10
175 ; VI-NEXT: v_cvt_f64_u32_e32 v[14:15], s8
176 ; VI-NEXT: v_add_f64 v[6:7], v[8:9], v[6:7]
177 ; VI-NEXT: v_add_f64 v[4:5], v[4:5], v[12:13]
178 ; VI-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3]
179 ; VI-NEXT: v_add_f64 v[0:1], v[10:11], v[14:15]
180 ; VI-NEXT: s_add_u32 s2, s0, 16
181 ; VI-NEXT: s_addc_u32 s3, s1, 0
182 ; VI-NEXT: v_mov_b32_e32 v11, s3
183 ; VI-NEXT: v_mov_b32_e32 v9, s1
184 ; VI-NEXT: v_mov_b32_e32 v10, s2
185 ; VI-NEXT: v_mov_b32_e32 v8, s0
186 ; VI-NEXT: flat_store_dwordx4 v[10:11], v[4:7]
187 ; VI-NEXT: flat_store_dwordx4 v[8:9], v[0:3]
189 %cast = uitofp <4 x i64> %in to <4 x double>
190 store <4 x double> %cast, ptr addrspace(1) %out, align 16
194 define amdgpu_kernel void @s_uint_to_fp_i32_to_f64(ptr addrspace(1) %out, i32 %in) {
195 ; SI-LABEL: s_uint_to_fp_i32_to_f64:
197 ; SI-NEXT: s_load_dword s2, s[4:5], 0x2
198 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
199 ; SI-NEXT: s_waitcnt lgkmcnt(0)
200 ; SI-NEXT: v_cvt_f64_u32_e32 v[0:1], s2
201 ; SI-NEXT: v_mov_b32_e32 v3, s1
202 ; SI-NEXT: v_mov_b32_e32 v2, s0
203 ; SI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
206 ; VI-LABEL: s_uint_to_fp_i32_to_f64:
208 ; VI-NEXT: s_load_dword s2, s[4:5], 0x8
209 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
210 ; VI-NEXT: s_waitcnt lgkmcnt(0)
211 ; VI-NEXT: v_cvt_f64_u32_e32 v[0:1], s2
212 ; VI-NEXT: v_mov_b32_e32 v3, s1
213 ; VI-NEXT: v_mov_b32_e32 v2, s0
214 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
216 %cast = uitofp i32 %in to double
217 store double %cast, ptr addrspace(1) %out, align 8
221 define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f64(ptr addrspace(1) %out, <2 x i32> %in) {
222 ; GCN-LABEL: s_uint_to_fp_v2i32_to_v2f64:
224 ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
225 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
226 ; GCN-NEXT: v_cvt_f64_u32_e32 v[2:3], s3
227 ; GCN-NEXT: v_cvt_f64_u32_e32 v[0:1], s2
228 ; GCN-NEXT: v_mov_b32_e32 v5, s1
229 ; GCN-NEXT: v_mov_b32_e32 v4, s0
230 ; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
232 %cast = uitofp <2 x i32> %in to <2 x double>
233 store <2 x double> %cast, ptr addrspace(1) %out, align 16
237 define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f64(ptr addrspace(1) %out, <4 x i32> %in) {
238 ; SI-LABEL: s_uint_to_fp_v4i32_to_v4f64:
240 ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x4
241 ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
242 ; SI-NEXT: s_waitcnt lgkmcnt(0)
243 ; SI-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
244 ; SI-NEXT: v_cvt_f64_u32_e32 v[6:7], s3
245 ; SI-NEXT: v_cvt_f64_u32_e32 v[4:5], s2
246 ; SI-NEXT: s_add_u32 s0, s4, 16
247 ; SI-NEXT: v_cvt_f64_u32_e32 v[2:3], s1
248 ; SI-NEXT: s_addc_u32 s1, s5, 0
249 ; SI-NEXT: v_mov_b32_e32 v9, s1
250 ; SI-NEXT: v_mov_b32_e32 v8, s0
251 ; SI-NEXT: flat_store_dwordx4 v[8:9], v[4:7]
253 ; SI-NEXT: v_mov_b32_e32 v4, s4
254 ; SI-NEXT: v_mov_b32_e32 v5, s5
255 ; SI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
258 ; VI-LABEL: s_uint_to_fp_v4i32_to_v4f64:
260 ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10
261 ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
262 ; VI-NEXT: s_waitcnt lgkmcnt(0)
263 ; VI-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
264 ; VI-NEXT: v_cvt_f64_u32_e32 v[6:7], s3
265 ; VI-NEXT: v_cvt_f64_u32_e32 v[4:5], s2
266 ; VI-NEXT: s_add_u32 s0, s4, 16
267 ; VI-NEXT: v_cvt_f64_u32_e32 v[2:3], s1
268 ; VI-NEXT: s_addc_u32 s1, s5, 0
269 ; VI-NEXT: v_mov_b32_e32 v9, s1
270 ; VI-NEXT: v_mov_b32_e32 v8, s0
271 ; VI-NEXT: flat_store_dwordx4 v[8:9], v[4:7]
273 ; VI-NEXT: v_mov_b32_e32 v4, s4
274 ; VI-NEXT: v_mov_b32_e32 v5, s5
275 ; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
277 %cast = uitofp <4 x i32> %in to <4 x double>
278 store <4 x double> %cast, ptr addrspace(1) %out, align 16
282 ; We can't fold the SGPRs into v_cndmask_b32_e32, because it already
283 ; uses an SGPR (implicit vcc).
284 define amdgpu_kernel void @uint_to_fp_i1_to_f64(ptr addrspace(1) %out, i32 %in) {
285 ; SI-LABEL: uint_to_fp_i1_to_f64:
287 ; SI-NEXT: s_load_dword s2, s[4:5], 0x2
288 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
289 ; SI-NEXT: v_mov_b32_e32 v0, 0
290 ; SI-NEXT: s_waitcnt lgkmcnt(0)
291 ; SI-NEXT: s_cmp_eq_u32 s2, 0
292 ; SI-NEXT: s_cselect_b32 s2, 0x3ff00000, 0
293 ; SI-NEXT: v_mov_b32_e32 v3, s1
294 ; SI-NEXT: v_mov_b32_e32 v1, s2
295 ; SI-NEXT: v_mov_b32_e32 v2, s0
296 ; SI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
299 ; VI-LABEL: uint_to_fp_i1_to_f64:
301 ; VI-NEXT: s_load_dword s2, s[4:5], 0x8
302 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
303 ; VI-NEXT: v_mov_b32_e32 v0, 0
304 ; VI-NEXT: s_waitcnt lgkmcnt(0)
305 ; VI-NEXT: s_cmp_eq_u32 s2, 0
306 ; VI-NEXT: s_cselect_b32 s2, 0x3ff00000, 0
307 ; VI-NEXT: v_mov_b32_e32 v3, s1
308 ; VI-NEXT: v_mov_b32_e32 v1, s2
309 ; VI-NEXT: v_mov_b32_e32 v2, s0
310 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
312 %cmp = icmp eq i32 %in, 0
313 %fp = uitofp i1 %cmp to double
314 store double %fp, ptr addrspace(1) %out, align 4
318 define amdgpu_kernel void @uint_to_fp_i1_to_f64_load(ptr addrspace(1) %out, i1 %in) {
319 ; SI-LABEL: uint_to_fp_i1_to_f64_load:
321 ; SI-NEXT: s_load_dword s2, s[4:5], 0x2
322 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
323 ; SI-NEXT: s_waitcnt lgkmcnt(0)
324 ; SI-NEXT: s_bitcmp1_b32 s2, 0
325 ; SI-NEXT: s_cselect_b64 s[2:3], -1, 0
326 ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[2:3]
327 ; SI-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
328 ; SI-NEXT: v_mov_b32_e32 v3, s1
329 ; SI-NEXT: v_mov_b32_e32 v2, s0
330 ; SI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
333 ; VI-LABEL: uint_to_fp_i1_to_f64_load:
335 ; VI-NEXT: s_load_dword s2, s[4:5], 0x8
336 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
337 ; VI-NEXT: s_waitcnt lgkmcnt(0)
338 ; VI-NEXT: s_bitcmp1_b32 s2, 0
339 ; VI-NEXT: s_cselect_b64 s[2:3], -1, 0
340 ; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[2:3]
341 ; VI-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
342 ; VI-NEXT: v_mov_b32_e32 v3, s1
343 ; VI-NEXT: v_mov_b32_e32 v2, s0
344 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
346 %fp = uitofp i1 %in to double
347 store double %fp, ptr addrspace(1) %out, align 8
351 define amdgpu_kernel void @s_uint_to_fp_i8_to_f64(ptr addrspace(1) %out, i8 %in) {
352 ; SI-LABEL: s_uint_to_fp_i8_to_f64:
354 ; SI-NEXT: s_load_dword s2, s[4:5], 0x2
355 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
356 ; SI-NEXT: s_waitcnt lgkmcnt(0)
357 ; SI-NEXT: s_and_b32 s2, s2, 0xff
358 ; SI-NEXT: v_cvt_f64_u32_e32 v[0:1], s2
359 ; SI-NEXT: v_mov_b32_e32 v3, s1
360 ; SI-NEXT: v_mov_b32_e32 v2, s0
361 ; SI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
364 ; VI-LABEL: s_uint_to_fp_i8_to_f64:
366 ; VI-NEXT: s_load_dword s2, s[4:5], 0x8
367 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
368 ; VI-NEXT: s_waitcnt lgkmcnt(0)
369 ; VI-NEXT: s_and_b32 s2, s2, 0xff
370 ; VI-NEXT: v_cvt_f64_u32_e32 v[0:1], s2
371 ; VI-NEXT: v_mov_b32_e32 v3, s1
372 ; VI-NEXT: v_mov_b32_e32 v2, s0
373 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
375 %fp = uitofp i8 %in to double
376 store double %fp, ptr addrspace(1) %out
381 define double @v_uint_to_fp_i8_to_f64(i8 %in) {
382 ; SI-LABEL: v_uint_to_fp_i8_to_f64:
384 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
385 ; SI-NEXT: v_and_b32_e32 v0, 0xff, v0
386 ; SI-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
387 ; SI-NEXT: s_setpc_b64 s[30:31]
389 ; VI-LABEL: v_uint_to_fp_i8_to_f64:
391 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
392 ; VI-NEXT: v_mov_b32_e32 v1, 0xffff
393 ; VI-NEXT: v_and_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
394 ; VI-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
395 ; VI-NEXT: s_setpc_b64 s[30:31]
396 %fp = uitofp i8 %in to double
400 define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
401 ; SI-LABEL: s_select_uint_to_fp_i1_vals_f64:
403 ; SI-NEXT: s_load_dword s2, s[4:5], 0x2
404 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
405 ; SI-NEXT: v_mov_b32_e32 v0, 0
406 ; SI-NEXT: s_waitcnt lgkmcnt(0)
407 ; SI-NEXT: s_cmp_eq_u32 s2, 0
408 ; SI-NEXT: s_cselect_b32 s2, 0x3ff00000, 0
409 ; SI-NEXT: v_mov_b32_e32 v3, s1
410 ; SI-NEXT: v_mov_b32_e32 v1, s2
411 ; SI-NEXT: v_mov_b32_e32 v2, s0
412 ; SI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
415 ; VI-LABEL: s_select_uint_to_fp_i1_vals_f64:
417 ; VI-NEXT: s_load_dword s2, s[4:5], 0x8
418 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
419 ; VI-NEXT: v_mov_b32_e32 v0, 0
420 ; VI-NEXT: s_waitcnt lgkmcnt(0)
421 ; VI-NEXT: s_cmp_eq_u32 s2, 0
422 ; VI-NEXT: s_cselect_b32 s2, 0x3ff00000, 0
423 ; VI-NEXT: v_mov_b32_e32 v3, s1
424 ; VI-NEXT: v_mov_b32_e32 v1, s2
425 ; VI-NEXT: v_mov_b32_e32 v2, s0
426 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
428 %cmp = icmp eq i32 %in, 0
429 %select = select i1 %cmp, double 1.0, double 0.0
430 store double %select, ptr addrspace(1) %out, align 8
434 define void @v_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
435 ; GCN-LABEL: v_select_uint_to_fp_i1_vals_f64:
437 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
438 ; GCN-NEXT: v_mov_b32_e32 v4, 0x3ff00000
439 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
440 ; GCN-NEXT: v_mov_b32_e32 v3, 0
441 ; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
442 ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
443 ; GCN-NEXT: s_waitcnt vmcnt(0)
444 ; GCN-NEXT: s_setpc_b64 s[30:31]
445 %cmp = icmp eq i32 %in, 0
446 %select = select i1 %cmp, double 1.0, double 0.0
447 store double %select, ptr addrspace(1) %out, align 8
451 define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) {
452 ; SI-LABEL: s_select_uint_to_fp_i1_vals_i64:
454 ; SI-NEXT: s_load_dword s2, s[4:5], 0x2
455 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
456 ; SI-NEXT: v_mov_b32_e32 v0, 0
457 ; SI-NEXT: s_waitcnt lgkmcnt(0)
458 ; SI-NEXT: s_cmp_eq_u32 s2, 0
459 ; SI-NEXT: s_cselect_b32 s2, 0x3ff00000, 0
460 ; SI-NEXT: v_mov_b32_e32 v3, s1
461 ; SI-NEXT: v_mov_b32_e32 v1, s2
462 ; SI-NEXT: v_mov_b32_e32 v2, s0
463 ; SI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
466 ; VI-LABEL: s_select_uint_to_fp_i1_vals_i64:
468 ; VI-NEXT: s_load_dword s2, s[4:5], 0x8
469 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
470 ; VI-NEXT: v_mov_b32_e32 v0, 0
471 ; VI-NEXT: s_waitcnt lgkmcnt(0)
472 ; VI-NEXT: s_cmp_eq_u32 s2, 0
473 ; VI-NEXT: s_cselect_b32 s2, 0x3ff00000, 0
474 ; VI-NEXT: v_mov_b32_e32 v3, s1
475 ; VI-NEXT: v_mov_b32_e32 v1, s2
476 ; VI-NEXT: v_mov_b32_e32 v2, s0
477 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
479 %cmp = icmp eq i32 %in, 0
480 %select = select i1 %cmp, i64 u0x3ff0000000000000, i64 0
481 store i64 %select, ptr addrspace(1) %out, align 8
485 define void @v_select_uint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) {
486 ; GCN-LABEL: v_select_uint_to_fp_i1_vals_i64:
488 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
489 ; GCN-NEXT: v_mov_b32_e32 v4, 0x3ff00000
490 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
491 ; GCN-NEXT: v_mov_b32_e32 v3, 0
492 ; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
493 ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
494 ; GCN-NEXT: s_waitcnt vmcnt(0)
495 ; GCN-NEXT: s_setpc_b64 s[30:31]
496 %cmp = icmp eq i32 %in, 0
497 %select = select i1 %cmp, i64 u0x3ff0000000000000, i64 0
498 store i64 %select, ptr addrspace(1) %out, align 8
502 ; TODO: This should swap the selected order / invert the compare and do it.
503 define amdgpu_kernel void @s_swap_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
504 ; SI-LABEL: s_swap_select_uint_to_fp_i1_vals_f64:
506 ; SI-NEXT: s_load_dword s2, s[4:5], 0x2
507 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
508 ; SI-NEXT: v_mov_b32_e32 v0, 0
509 ; SI-NEXT: s_waitcnt lgkmcnt(0)
510 ; SI-NEXT: s_cmp_eq_u32 s2, 0
511 ; SI-NEXT: s_cselect_b32 s2, 0, 0x3ff00000
512 ; SI-NEXT: v_mov_b32_e32 v3, s1
513 ; SI-NEXT: v_mov_b32_e32 v1, s2
514 ; SI-NEXT: v_mov_b32_e32 v2, s0
515 ; SI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
518 ; VI-LABEL: s_swap_select_uint_to_fp_i1_vals_f64:
520 ; VI-NEXT: s_load_dword s2, s[4:5], 0x8
521 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
522 ; VI-NEXT: v_mov_b32_e32 v0, 0
523 ; VI-NEXT: s_waitcnt lgkmcnt(0)
524 ; VI-NEXT: s_cmp_eq_u32 s2, 0
525 ; VI-NEXT: s_cselect_b32 s2, 0, 0x3ff00000
526 ; VI-NEXT: v_mov_b32_e32 v3, s1
527 ; VI-NEXT: v_mov_b32_e32 v1, s2
528 ; VI-NEXT: v_mov_b32_e32 v2, s0
529 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
531 %cmp = icmp eq i32 %in, 0
532 %select = select i1 %cmp, double 0.0, double 1.0
533 store double %select, ptr addrspace(1) %out, align 8
537 define void @v_swap_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
538 ; GCN-LABEL: v_swap_select_uint_to_fp_i1_vals_f64:
540 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
541 ; GCN-NEXT: v_mov_b32_e32 v4, 0x3ff00000
542 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
543 ; GCN-NEXT: v_mov_b32_e32 v3, 0
544 ; GCN-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc
545 ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
546 ; GCN-NEXT: s_waitcnt vmcnt(0)
547 ; GCN-NEXT: s_setpc_b64 s[30:31]
548 %cmp = icmp eq i32 %in, 0
549 %select = select i1 %cmp, double 0.0, double 1.0
550 store double %select, ptr addrspace(1) %out, align 8