1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
5 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
7 define amdgpu_kernel void @sint_to_fp_i32_to_f64(ptr addrspace(1) %out, i32 %in) {
8 ; CI-LABEL: sint_to_fp_i32_to_f64:
10 ; CI-NEXT: s_load_dword s2, s[4:5], 0x2
11 ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
12 ; CI-NEXT: s_waitcnt lgkmcnt(0)
13 ; CI-NEXT: v_cvt_f64_i32_e32 v[0:1], s2
14 ; CI-NEXT: v_mov_b32_e32 v3, s1
15 ; CI-NEXT: v_mov_b32_e32 v2, s0
16 ; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
19 ; VI-LABEL: sint_to_fp_i32_to_f64:
21 ; VI-NEXT: s_load_dword s2, s[4:5], 0x8
22 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
23 ; VI-NEXT: s_waitcnt lgkmcnt(0)
24 ; VI-NEXT: v_cvt_f64_i32_e32 v[0:1], s2
25 ; VI-NEXT: v_mov_b32_e32 v3, s1
26 ; VI-NEXT: v_mov_b32_e32 v2, s0
27 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
29 %result = sitofp i32 %in to double
30 store double %result, ptr addrspace(1) %out
34 ; We can't fold the SGPRs into v_cndmask_b32_e64, because it already
35 ; uses an SGPR (implicit vcc).
36 define amdgpu_kernel void @sint_to_fp_i1_f64(ptr addrspace(1) %out, i32 %in) {
37 ; CI-LABEL: sint_to_fp_i1_f64:
39 ; CI-NEXT: s_load_dword s2, s[4:5], 0x2
40 ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
41 ; CI-NEXT: v_mov_b32_e32 v0, 0
42 ; CI-NEXT: s_waitcnt lgkmcnt(0)
43 ; CI-NEXT: s_cmp_eq_u32 s2, 0
44 ; CI-NEXT: s_cselect_b32 s2, 0xbff00000, 0
45 ; CI-NEXT: v_mov_b32_e32 v3, s1
46 ; CI-NEXT: v_mov_b32_e32 v1, s2
47 ; CI-NEXT: v_mov_b32_e32 v2, s0
48 ; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
51 ; VI-LABEL: sint_to_fp_i1_f64:
53 ; VI-NEXT: s_load_dword s2, s[4:5], 0x8
54 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
55 ; VI-NEXT: v_mov_b32_e32 v0, 0
56 ; VI-NEXT: s_waitcnt lgkmcnt(0)
57 ; VI-NEXT: s_cmp_eq_u32 s2, 0
58 ; VI-NEXT: s_cselect_b32 s2, 0xbff00000, 0
59 ; VI-NEXT: v_mov_b32_e32 v3, s1
60 ; VI-NEXT: v_mov_b32_e32 v1, s2
61 ; VI-NEXT: v_mov_b32_e32 v2, s0
62 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
64 %cmp = icmp eq i32 %in, 0
65 %fp = sitofp i1 %cmp to double
66 store double %fp, ptr addrspace(1) %out, align 4
70 define amdgpu_kernel void @sint_to_fp_i1_f64_load(ptr addrspace(1) %out, i1 %in) {
71 ; CI-LABEL: sint_to_fp_i1_f64_load:
73 ; CI-NEXT: s_load_dword s2, s[4:5], 0x2
74 ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
75 ; CI-NEXT: s_waitcnt lgkmcnt(0)
76 ; CI-NEXT: s_bitcmp1_b32 s2, 0
77 ; CI-NEXT: s_cselect_b64 s[2:3], -1, 0
78 ; CI-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[2:3]
79 ; CI-NEXT: v_cvt_f64_i32_e32 v[0:1], v0
80 ; CI-NEXT: v_mov_b32_e32 v3, s1
81 ; CI-NEXT: v_mov_b32_e32 v2, s0
82 ; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
85 ; VI-LABEL: sint_to_fp_i1_f64_load:
87 ; VI-NEXT: s_load_dword s2, s[4:5], 0x8
88 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
89 ; VI-NEXT: s_waitcnt lgkmcnt(0)
90 ; VI-NEXT: s_bitcmp1_b32 s2, 0
91 ; VI-NEXT: s_cselect_b64 s[2:3], -1, 0
92 ; VI-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[2:3]
93 ; VI-NEXT: v_cvt_f64_i32_e32 v[0:1], v0
94 ; VI-NEXT: v_mov_b32_e32 v3, s1
95 ; VI-NEXT: v_mov_b32_e32 v2, s0
96 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
98 %fp = sitofp i1 %in to double
99 store double %fp, ptr addrspace(1) %out, align 8
103 define amdgpu_kernel void @s_sint_to_fp_i64_to_f64(ptr addrspace(1) %out, i64 %in) {
104 ; CI-LABEL: s_sint_to_fp_i64_to_f64:
106 ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
107 ; CI-NEXT: s_waitcnt lgkmcnt(0)
108 ; CI-NEXT: v_cvt_f64_i32_e32 v[0:1], s3
109 ; CI-NEXT: v_cvt_f64_u32_e32 v[2:3], s2
110 ; CI-NEXT: v_mov_b32_e32 v4, s0
111 ; CI-NEXT: v_mov_b32_e32 v5, s1
112 ; CI-NEXT: v_ldexp_f64 v[0:1], v[0:1], 32
113 ; CI-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
114 ; CI-NEXT: flat_store_dwordx2 v[4:5], v[0:1]
117 ; VI-LABEL: s_sint_to_fp_i64_to_f64:
119 ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
120 ; VI-NEXT: s_waitcnt lgkmcnt(0)
121 ; VI-NEXT: v_cvt_f64_i32_e32 v[0:1], s3
122 ; VI-NEXT: v_cvt_f64_u32_e32 v[2:3], s2
123 ; VI-NEXT: v_ldexp_f64 v[0:1], v[0:1], 32
124 ; VI-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
125 ; VI-NEXT: v_mov_b32_e32 v2, s0
126 ; VI-NEXT: v_mov_b32_e32 v3, s1
127 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
129 %result = sitofp i64 %in to double
130 store double %result, ptr addrspace(1) %out
134 define amdgpu_kernel void @v_sint_to_fp_i64_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
135 ; CI-LABEL: v_sint_to_fp_i64_to_f64:
137 ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
138 ; CI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
139 ; CI-NEXT: s_waitcnt lgkmcnt(0)
140 ; CI-NEXT: v_mov_b32_e32 v1, s3
141 ; CI-NEXT: v_add_i32_e32 v0, vcc, s2, v0
142 ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
143 ; CI-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
144 ; CI-NEXT: s_waitcnt vmcnt(0)
145 ; CI-NEXT: v_cvt_f64_i32_e32 v[1:2], v1
146 ; CI-NEXT: v_cvt_f64_u32_e32 v[3:4], v0
147 ; CI-NEXT: v_ldexp_f64 v[0:1], v[1:2], 32
148 ; CI-NEXT: v_mov_b32_e32 v2, s0
149 ; CI-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
150 ; CI-NEXT: v_mov_b32_e32 v3, s1
151 ; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
154 ; VI-LABEL: v_sint_to_fp_i64_to_f64:
156 ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
157 ; VI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
158 ; VI-NEXT: s_waitcnt lgkmcnt(0)
159 ; VI-NEXT: v_mov_b32_e32 v1, s3
160 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0
161 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
162 ; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
163 ; VI-NEXT: s_waitcnt vmcnt(0)
164 ; VI-NEXT: v_cvt_f64_i32_e32 v[1:2], v1
165 ; VI-NEXT: v_cvt_f64_u32_e32 v[3:4], v0
166 ; VI-NEXT: v_ldexp_f64 v[1:2], v[1:2], 32
167 ; VI-NEXT: v_add_f64 v[0:1], v[1:2], v[3:4]
168 ; VI-NEXT: v_mov_b32_e32 v2, s0
169 ; VI-NEXT: v_mov_b32_e32 v3, s1
170 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
172 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
173 %gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
174 %val = load i64, ptr addrspace(1) %gep, align 8
175 %result = sitofp i64 %val to double
176 store double %result, ptr addrspace(1) %out
180 ; FIXME: bfe and sext on VI+
181 define amdgpu_kernel void @s_sint_to_fp_i8_to_f64(ptr addrspace(1) %out, i8 %in) {
182 ; CI-LABEL: s_sint_to_fp_i8_to_f64:
184 ; CI-NEXT: s_load_dword s2, s[4:5], 0x2
185 ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
186 ; CI-NEXT: s_waitcnt lgkmcnt(0)
187 ; CI-NEXT: s_sext_i32_i8 s2, s2
188 ; CI-NEXT: v_cvt_f64_i32_e32 v[0:1], s2
189 ; CI-NEXT: v_mov_b32_e32 v3, s1
190 ; CI-NEXT: v_mov_b32_e32 v2, s0
191 ; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
194 ; VI-LABEL: s_sint_to_fp_i8_to_f64:
196 ; VI-NEXT: s_load_dword s2, s[4:5], 0x8
197 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
198 ; VI-NEXT: s_waitcnt lgkmcnt(0)
199 ; VI-NEXT: s_bfe_i32 s2, s2, 0x80000
200 ; VI-NEXT: s_sext_i32_i16 s2, s2
201 ; VI-NEXT: v_cvt_f64_i32_e32 v[0:1], s2
202 ; VI-NEXT: v_mov_b32_e32 v3, s1
203 ; VI-NEXT: v_mov_b32_e32 v2, s0
204 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
206 %fp = sitofp i8 %in to double
207 store double %fp, ptr addrspace(1) %out
211 define double @v_sint_to_fp_i8_to_f64(i8 %in) {
212 ; CI-LABEL: v_sint_to_fp_i8_to_f64:
214 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
215 ; CI-NEXT: v_bfe_i32 v0, v0, 0, 8
216 ; CI-NEXT: v_cvt_f64_i32_e32 v[0:1], v0
217 ; CI-NEXT: s_setpc_b64 s[30:31]
219 ; VI-LABEL: v_sint_to_fp_i8_to_f64:
221 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222 ; VI-NEXT: v_bfe_i32 v0, v0, 0, 8
223 ; VI-NEXT: v_bfe_i32 v0, v0, 0, 16
224 ; VI-NEXT: v_cvt_f64_i32_e32 v[0:1], v0
225 ; VI-NEXT: s_setpc_b64 s[30:31]
226 %fp = sitofp i8 %in to double
230 define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
231 ; CI-LABEL: s_select_sint_to_fp_i1_vals_f64:
233 ; CI-NEXT: s_load_dword s2, s[4:5], 0x2
234 ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
235 ; CI-NEXT: v_mov_b32_e32 v0, 0
236 ; CI-NEXT: s_waitcnt lgkmcnt(0)
237 ; CI-NEXT: s_cmp_eq_u32 s2, 0
238 ; CI-NEXT: s_cselect_b32 s2, 0xbff00000, 0
239 ; CI-NEXT: v_mov_b32_e32 v3, s1
240 ; CI-NEXT: v_mov_b32_e32 v1, s2
241 ; CI-NEXT: v_mov_b32_e32 v2, s0
242 ; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
245 ; VI-LABEL: s_select_sint_to_fp_i1_vals_f64:
247 ; VI-NEXT: s_load_dword s2, s[4:5], 0x8
248 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
249 ; VI-NEXT: v_mov_b32_e32 v0, 0
250 ; VI-NEXT: s_waitcnt lgkmcnt(0)
251 ; VI-NEXT: s_cmp_eq_u32 s2, 0
252 ; VI-NEXT: s_cselect_b32 s2, 0xbff00000, 0
253 ; VI-NEXT: v_mov_b32_e32 v3, s1
254 ; VI-NEXT: v_mov_b32_e32 v1, s2
255 ; VI-NEXT: v_mov_b32_e32 v2, s0
256 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
258 %cmp = icmp eq i32 %in, 0
259 %select = select i1 %cmp, double -1.0, double 0.0
260 store double %select, ptr addrspace(1) %out, align 8
264 define void @v_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
265 ; GCN-LABEL: v_select_sint_to_fp_i1_vals_f64:
267 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
268 ; GCN-NEXT: v_mov_b32_e32 v4, 0xbff00000
269 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
270 ; GCN-NEXT: v_mov_b32_e32 v3, 0
271 ; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
272 ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
273 ; GCN-NEXT: s_waitcnt vmcnt(0)
274 ; GCN-NEXT: s_setpc_b64 s[30:31]
275 %cmp = icmp eq i32 %in, 0
276 %select = select i1 %cmp, double -1.0, double 0.0
277 store double %select, ptr addrspace(1) %out, align 8
281 define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) {
282 ; CI-LABEL: s_select_sint_to_fp_i1_vals_i64:
284 ; CI-NEXT: s_load_dword s2, s[4:5], 0x2
285 ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
286 ; CI-NEXT: v_mov_b32_e32 v0, 0
287 ; CI-NEXT: s_waitcnt lgkmcnt(0)
288 ; CI-NEXT: s_cmp_eq_u32 s2, 0
289 ; CI-NEXT: s_cselect_b32 s2, 0xbff00000, 0
290 ; CI-NEXT: v_mov_b32_e32 v3, s1
291 ; CI-NEXT: v_mov_b32_e32 v1, s2
292 ; CI-NEXT: v_mov_b32_e32 v2, s0
293 ; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
296 ; VI-LABEL: s_select_sint_to_fp_i1_vals_i64:
298 ; VI-NEXT: s_load_dword s2, s[4:5], 0x8
299 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
300 ; VI-NEXT: v_mov_b32_e32 v0, 0
301 ; VI-NEXT: s_waitcnt lgkmcnt(0)
302 ; VI-NEXT: s_cmp_eq_u32 s2, 0
303 ; VI-NEXT: s_cselect_b32 s2, 0xbff00000, 0
304 ; VI-NEXT: v_mov_b32_e32 v3, s1
305 ; VI-NEXT: v_mov_b32_e32 v1, s2
306 ; VI-NEXT: v_mov_b32_e32 v2, s0
307 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
309 %cmp = icmp eq i32 %in, 0
310 %select = select i1 %cmp, i64 u0xbff0000000000000, i64 0
311 store i64 %select, ptr addrspace(1) %out, align 8
315 define void @v_select_sint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) {
316 ; GCN-LABEL: v_select_sint_to_fp_i1_vals_i64:
318 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
319 ; GCN-NEXT: v_mov_b32_e32 v4, 0xbff00000
320 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
321 ; GCN-NEXT: v_mov_b32_e32 v3, 0
322 ; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
323 ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
324 ; GCN-NEXT: s_waitcnt vmcnt(0)
325 ; GCN-NEXT: s_setpc_b64 s[30:31]
326 %cmp = icmp eq i32 %in, 0
327 %select = select i1 %cmp, i64 u0xbff0000000000000, i64 0
328 store i64 %select, ptr addrspace(1) %out, align 8
332 ; TODO: This should swap the selected order / invert the compare and do it.
333 define void @v_swap_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
334 ; GCN-LABEL: v_swap_select_sint_to_fp_i1_vals_f64:
336 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
337 ; GCN-NEXT: v_mov_b32_e32 v4, 0xbff00000
338 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
339 ; GCN-NEXT: v_mov_b32_e32 v3, 0
340 ; GCN-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc
341 ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
342 ; GCN-NEXT: s_waitcnt vmcnt(0)
343 ; GCN-NEXT: s_setpc_b64 s[30:31]
344 %cmp = icmp eq i32 %in, 0
345 %select = select i1 %cmp, double 0.0, double -1.0
346 store double %select, ptr addrspace(1) %out, align 8
350 ; TODO: This should swap the selected order / invert the compare and do it.
351 define amdgpu_kernel void @s_swap_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
352 ; CI-LABEL: s_swap_select_sint_to_fp_i1_vals_f64:
354 ; CI-NEXT: s_load_dword s2, s[4:5], 0x2
355 ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
356 ; CI-NEXT: v_mov_b32_e32 v0, 0
357 ; CI-NEXT: s_waitcnt lgkmcnt(0)
358 ; CI-NEXT: s_cmp_eq_u32 s2, 0
359 ; CI-NEXT: s_cselect_b32 s2, 0, 0xbff00000
360 ; CI-NEXT: v_mov_b32_e32 v3, s1
361 ; CI-NEXT: v_mov_b32_e32 v1, s2
362 ; CI-NEXT: v_mov_b32_e32 v2, s0
363 ; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
366 ; VI-LABEL: s_swap_select_sint_to_fp_i1_vals_f64:
368 ; VI-NEXT: s_load_dword s2, s[4:5], 0x8
369 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
370 ; VI-NEXT: v_mov_b32_e32 v0, 0
371 ; VI-NEXT: s_waitcnt lgkmcnt(0)
372 ; VI-NEXT: s_cmp_eq_u32 s2, 0
373 ; VI-NEXT: s_cselect_b32 s2, 0, 0xbff00000
374 ; VI-NEXT: v_mov_b32_e32 v3, s1
375 ; VI-NEXT: v_mov_b32_e32 v1, s2
376 ; VI-NEXT: v_mov_b32_e32 v2, s0
377 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
379 %cmp = icmp eq i32 %in, 0
380 %select = select i1 %cmp, double 0.0, double -1.0
381 store double %select, ptr addrspace(1) %out, align 8