1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck %s -check-prefixes=SI
3 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -check-prefixes=VI
4 ; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck %s -check-prefixes=EG
6 declare float @llvm.fabs.f32(float) #1
8 define amdgpu_kernel void @fp_to_uint_f32_to_i32 (ptr addrspace(1) %out, float %in) {
9 ; SI-LABEL: fp_to_uint_f32_to_i32:
11 ; SI-NEXT: s_load_dword s6, s[4:5], 0xb
12 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
13 ; SI-NEXT: s_mov_b32 s3, 0xf000
14 ; SI-NEXT: s_mov_b32 s2, -1
15 ; SI-NEXT: s_waitcnt lgkmcnt(0)
16 ; SI-NEXT: v_cvt_u32_f32_e32 v0, s6
17 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
20 ; VI-LABEL: fp_to_uint_f32_to_i32:
22 ; VI-NEXT: s_load_dword s2, s[4:5], 0x2c
23 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
24 ; VI-NEXT: s_mov_b32 s3, 0xf000
25 ; VI-NEXT: s_waitcnt lgkmcnt(0)
26 ; VI-NEXT: v_cvt_u32_f32_e32 v0, s2
27 ; VI-NEXT: s_mov_b32 s2, -1
28 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
31 ; EG-LABEL: fp_to_uint_f32_to_i32:
33 ; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[]
34 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
37 ; EG-NEXT: ALU clause starting at 4:
38 ; EG-NEXT: TRUNC * T0.W, KC0[2].Z,
39 ; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
40 ; EG-NEXT: FLT_TO_UINT * T1.X, PV.W,
41 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
42 %conv = fptoui float %in to i32
43 store i32 %conv, ptr addrspace(1) %out
47 define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i32(ptr addrspace(1) %out, <2 x float> %in) {
48 ; SI-LABEL: fp_to_uint_v2f32_to_v2i32:
50 ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
51 ; SI-NEXT: s_mov_b32 s7, 0xf000
52 ; SI-NEXT: s_mov_b32 s6, -1
53 ; SI-NEXT: s_waitcnt lgkmcnt(0)
54 ; SI-NEXT: s_mov_b32 s4, s0
55 ; SI-NEXT: s_mov_b32 s5, s1
56 ; SI-NEXT: v_cvt_u32_f32_e32 v1, s3
57 ; SI-NEXT: v_cvt_u32_f32_e32 v0, s2
58 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
61 ; VI-LABEL: fp_to_uint_v2f32_to_v2i32:
63 ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
64 ; VI-NEXT: s_mov_b32 s7, 0xf000
65 ; VI-NEXT: s_mov_b32 s6, -1
66 ; VI-NEXT: s_waitcnt lgkmcnt(0)
67 ; VI-NEXT: v_cvt_u32_f32_e32 v1, s3
68 ; VI-NEXT: v_cvt_u32_f32_e32 v0, s2
69 ; VI-NEXT: s_mov_b32 s4, s0
70 ; VI-NEXT: s_mov_b32 s5, s1
71 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
74 ; EG-LABEL: fp_to_uint_v2f32_to_v2i32:
76 ; EG-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[]
77 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
80 ; EG-NEXT: ALU clause starting at 4:
81 ; EG-NEXT: TRUNC T0.W, KC0[3].X,
82 ; EG-NEXT: TRUNC * T1.W, KC0[2].W,
83 ; EG-NEXT: FLT_TO_UINT * T0.Y, PV.W,
84 ; EG-NEXT: LSHR T1.X, KC0[2].Y, literal.x,
85 ; EG-NEXT: FLT_TO_UINT * T0.X, T1.W,
86 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
87 %result = fptoui <2 x float> %in to <2 x i32>
88 store <2 x i32> %result, ptr addrspace(1) %out
92 define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
93 ; SI-LABEL: fp_to_uint_v4f32_to_v4i32:
95 ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
96 ; SI-NEXT: s_waitcnt lgkmcnt(0)
97 ; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
98 ; SI-NEXT: s_mov_b32 s3, 0xf000
99 ; SI-NEXT: s_mov_b32 s2, -1
100 ; SI-NEXT: s_waitcnt lgkmcnt(0)
101 ; SI-NEXT: v_cvt_u32_f32_e32 v3, s7
102 ; SI-NEXT: v_cvt_u32_f32_e32 v2, s6
103 ; SI-NEXT: v_cvt_u32_f32_e32 v1, s5
104 ; SI-NEXT: v_cvt_u32_f32_e32 v0, s4
105 ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
108 ; VI-LABEL: fp_to_uint_v4f32_to_v4i32:
110 ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
111 ; VI-NEXT: s_waitcnt lgkmcnt(0)
112 ; VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
113 ; VI-NEXT: s_mov_b32 s3, 0xf000
114 ; VI-NEXT: s_mov_b32 s2, -1
115 ; VI-NEXT: s_waitcnt lgkmcnt(0)
116 ; VI-NEXT: v_cvt_u32_f32_e32 v3, s7
117 ; VI-NEXT: v_cvt_u32_f32_e32 v2, s6
118 ; VI-NEXT: v_cvt_u32_f32_e32 v1, s5
119 ; VI-NEXT: v_cvt_u32_f32_e32 v0, s4
120 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
123 ; EG-LABEL: fp_to_uint_v4f32_to_v4i32:
125 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
127 ; EG-NEXT: ALU 9, @9, KC0[CB0:0-32], KC1[]
128 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
131 ; EG-NEXT: Fetch clause starting at 6:
132 ; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
133 ; EG-NEXT: ALU clause starting at 8:
134 ; EG-NEXT: MOV * T0.X, KC0[2].Z,
135 ; EG-NEXT: ALU clause starting at 9:
136 ; EG-NEXT: TRUNC T0.W, T0.W,
137 ; EG-NEXT: TRUNC * T1.W, T0.Z,
138 ; EG-NEXT: FLT_TO_UINT * T0.W, PV.W,
139 ; EG-NEXT: TRUNC T2.W, T0.Y,
140 ; EG-NEXT: FLT_TO_UINT * T0.Z, T1.W,
141 ; EG-NEXT: TRUNC T1.W, T0.X,
142 ; EG-NEXT: FLT_TO_UINT * T0.Y, PV.W,
143 ; EG-NEXT: LSHR T1.X, KC0[2].Y, literal.x,
144 ; EG-NEXT: FLT_TO_UINT * T0.X, PV.W,
145 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
146 %value = load <4 x float>, ptr addrspace(1) %in
147 %result = fptoui <4 x float> %value to <4 x i32>
148 store <4 x i32> %result, ptr addrspace(1) %out
152 define amdgpu_kernel void @fp_to_uint_f32_to_i64(ptr addrspace(1) %out, float %x) {
153 ; SI-LABEL: fp_to_uint_f32_to_i64:
155 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
156 ; SI-NEXT: s_load_dword s4, s[4:5], 0xb
157 ; SI-NEXT: s_mov_b32 s3, 0xf000
158 ; SI-NEXT: s_mov_b32 s2, -1
159 ; SI-NEXT: s_mov_b32 s5, 0xcf800000
160 ; SI-NEXT: s_waitcnt lgkmcnt(0)
161 ; SI-NEXT: v_trunc_f32_e32 v0, s4
162 ; SI-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
163 ; SI-NEXT: v_floor_f32_e32 v2, v1
164 ; SI-NEXT: v_cvt_u32_f32_e32 v1, v2
165 ; SI-NEXT: v_fma_f32 v0, v2, s5, v0
166 ; SI-NEXT: v_cvt_u32_f32_e32 v0, v0
167 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
170 ; VI-LABEL: fp_to_uint_f32_to_i64:
172 ; VI-NEXT: s_load_dword s2, s[4:5], 0x2c
173 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
174 ; VI-NEXT: s_mov_b32 s3, 0xcf800000
175 ; VI-NEXT: s_waitcnt lgkmcnt(0)
176 ; VI-NEXT: v_trunc_f32_e32 v0, s2
177 ; VI-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
178 ; VI-NEXT: v_floor_f32_e32 v2, v1
179 ; VI-NEXT: v_fma_f32 v0, v2, s3, v0
180 ; VI-NEXT: v_cvt_u32_f32_e32 v1, v2
181 ; VI-NEXT: v_cvt_u32_f32_e32 v0, v0
182 ; VI-NEXT: s_mov_b32 s3, 0xf000
183 ; VI-NEXT: s_mov_b32 s2, -1
184 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
187 ; EG-LABEL: fp_to_uint_f32_to_i64:
189 ; EG-NEXT: ALU 40, @4, KC0[CB0:0-32], KC1[]
190 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
193 ; EG-NEXT: ALU clause starting at 4:
194 ; EG-NEXT: MOV * T0.W, literal.x,
195 ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
196 ; EG-NEXT: BFE_UINT T0.W, KC0[2].Z, literal.x, PV.W,
197 ; EG-NEXT: AND_INT * T1.W, KC0[2].Z, literal.y,
198 ; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38)
199 ; EG-NEXT: OR_INT T1.W, PS, literal.x,
200 ; EG-NEXT: ADD_INT * T2.W, PV.W, literal.y,
201 ; EG-NEXT: 8388608(1.175494e-38), -150(nan)
202 ; EG-NEXT: ADD_INT T0.X, T0.W, literal.x,
203 ; EG-NEXT: AND_INT T0.Y, PS, literal.y,
204 ; EG-NEXT: SUB_INT T0.Z, literal.z, T0.W,
205 ; EG-NEXT: NOT_INT T0.W, PS,
206 ; EG-NEXT: LSHR * T3.W, PV.W, 1,
207 ; EG-NEXT: -127(nan), 31(4.344025e-44)
208 ; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00)
209 ; EG-NEXT: BIT_ALIGN_INT T1.X, 0.0, PS, PV.W,
210 ; EG-NEXT: AND_INT T1.Y, PV.Z, literal.x,
211 ; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, T1.W, PV.Z,
212 ; EG-NEXT: LSHL T0.W, T1.W, PV.Y,
213 ; EG-NEXT: AND_INT * T1.W, T2.W, literal.x,
214 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
215 ; EG-NEXT: CNDE_INT T0.Y, PS, PV.W, 0.0,
216 ; EG-NEXT: CNDE_INT T0.Z, PV.Y, PV.Z, 0.0,
217 ; EG-NEXT: CNDE_INT T0.W, PS, PV.X, PV.W,
218 ; EG-NEXT: SETGT_INT * T1.W, T0.X, literal.x,
219 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
220 ; EG-NEXT: CNDE_INT T1.Z, PS, 0.0, PV.W,
221 ; EG-NEXT: CNDE_INT T0.W, PS, PV.Z, PV.Y,
222 ; EG-NEXT: ASHR * T1.W, KC0[2].Z, literal.x,
223 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
224 ; EG-NEXT: XOR_INT T0.W, PV.W, PS,
225 ; EG-NEXT: XOR_INT * T2.W, PV.Z, PS,
226 ; EG-NEXT: SUB_INT T2.W, PS, T1.W,
227 ; EG-NEXT: SUBB_UINT * T3.W, PV.W, T1.W,
228 ; EG-NEXT: SUB_INT T2.W, PV.W, PS,
229 ; EG-NEXT: SETGT_INT * T3.W, 0.0, T0.X,
230 ; EG-NEXT: CNDE_INT T0.Y, PS, PV.W, 0.0,
231 ; EG-NEXT: SUB_INT * T0.W, T0.W, T1.W,
232 ; EG-NEXT: CNDE_INT T0.X, T3.W, PV.W, 0.0,
233 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
234 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
235 %conv = fptoui float %x to i64
236 store i64 %conv, ptr addrspace(1) %out
240 define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i64(ptr addrspace(1) %out, <2 x float> %x) {
241 ; SI-LABEL: fp_to_uint_v2f32_to_v2i64:
243 ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
244 ; SI-NEXT: s_mov_b32 s7, 0xf000
245 ; SI-NEXT: s_mov_b32 s6, -1
246 ; SI-NEXT: s_mov_b32 s8, 0xcf800000
247 ; SI-NEXT: s_waitcnt lgkmcnt(0)
248 ; SI-NEXT: s_mov_b32 s4, s0
249 ; SI-NEXT: s_mov_b32 s5, s1
250 ; SI-NEXT: v_trunc_f32_e32 v0, s3
251 ; SI-NEXT: v_trunc_f32_e32 v2, s2
252 ; SI-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
253 ; SI-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
254 ; SI-NEXT: v_floor_f32_e32 v4, v1
255 ; SI-NEXT: v_floor_f32_e32 v5, v3
256 ; SI-NEXT: v_cvt_u32_f32_e32 v3, v4
257 ; SI-NEXT: v_cvt_u32_f32_e32 v1, v5
258 ; SI-NEXT: v_fma_f32 v0, v4, s8, v0
259 ; SI-NEXT: v_fma_f32 v4, v5, s8, v2
260 ; SI-NEXT: v_cvt_u32_f32_e32 v2, v0
261 ; SI-NEXT: v_cvt_u32_f32_e32 v0, v4
262 ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
265 ; VI-LABEL: fp_to_uint_v2f32_to_v2i64:
267 ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
268 ; VI-NEXT: s_mov_b32 s7, 0xf000
269 ; VI-NEXT: s_mov_b32 s6, -1
270 ; VI-NEXT: s_waitcnt lgkmcnt(0)
271 ; VI-NEXT: v_trunc_f32_e32 v0, s3
272 ; VI-NEXT: v_trunc_f32_e32 v4, s2
273 ; VI-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
274 ; VI-NEXT: v_mul_f32_e32 v2, 0x2f800000, v4
275 ; VI-NEXT: v_floor_f32_e32 v5, v1
276 ; VI-NEXT: s_mov_b32 s2, 0xcf800000
277 ; VI-NEXT: v_floor_f32_e32 v6, v2
278 ; VI-NEXT: v_fma_f32 v0, v5, s2, v0
279 ; VI-NEXT: v_cvt_u32_f32_e32 v2, v0
280 ; VI-NEXT: v_fma_f32 v0, v6, s2, v4
281 ; VI-NEXT: v_cvt_u32_f32_e32 v3, v5
282 ; VI-NEXT: v_cvt_u32_f32_e32 v1, v6
283 ; VI-NEXT: v_cvt_u32_f32_e32 v0, v0
284 ; VI-NEXT: s_mov_b32 s4, s0
285 ; VI-NEXT: s_mov_b32 s5, s1
286 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
289 ; EG-LABEL: fp_to_uint_v2f32_to_v2i64:
291 ; EG-NEXT: ALU 74, @4, KC0[CB0:0-32], KC1[]
292 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
295 ; EG-NEXT: ALU clause starting at 4:
296 ; EG-NEXT: MOV * T0.W, literal.x,
297 ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
298 ; EG-NEXT: BFE_UINT T0.Z, KC0[3].X, literal.x, PV.W,
299 ; EG-NEXT: BFE_UINT T0.W, KC0[2].W, literal.x, PV.W,
300 ; EG-NEXT: AND_INT * T1.Z, KC0[2].W, literal.y,
301 ; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38)
302 ; EG-NEXT: ADD_INT T1.W, PV.W, literal.x,
303 ; EG-NEXT: ADD_INT * T2.W, PV.Z, literal.x,
304 ; EG-NEXT: -150(nan), 0(0.000000e+00)
305 ; EG-NEXT: AND_INT T0.X, PS, literal.x,
306 ; EG-NEXT: AND_INT T0.Y, PV.W, literal.x,
307 ; EG-NEXT: OR_INT T1.Z, T1.Z, literal.y,
308 ; EG-NEXT: SUB_INT T3.W, literal.z, T0.W,
309 ; EG-NEXT: AND_INT * T4.W, KC0[3].X, literal.w,
310 ; EG-NEXT: 31(4.344025e-44), 8388608(1.175494e-38)
311 ; EG-NEXT: 150(2.101948e-43), 8388607(1.175494e-38)
312 ; EG-NEXT: OR_INT T1.X, PS, literal.x,
313 ; EG-NEXT: AND_INT T1.Y, PV.W, literal.y,
314 ; EG-NEXT: BIT_ALIGN_INT T2.Z, 0.0, PV.Z, PV.W,
315 ; EG-NEXT: LSHL T3.W, PV.Z, PV.Y,
316 ; EG-NEXT: AND_INT * T4.W, T1.W, literal.y,
317 ; EG-NEXT: 8388608(1.175494e-38), 32(4.484155e-44)
318 ; EG-NEXT: CNDE_INT T0.Y, PS, PV.W, 0.0,
319 ; EG-NEXT: CNDE_INT T2.Z, PV.Y, PV.Z, 0.0,
320 ; EG-NEXT: LSHL T5.W, PV.X, T0.X,
321 ; EG-NEXT: AND_INT * T6.W, T2.W, literal.x,
322 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
323 ; EG-NEXT: CNDE_INT T0.X, PS, PV.W, 0.0,
324 ; EG-NEXT: NOT_INT T1.Y, T1.W,
325 ; EG-NEXT: SUB_INT T3.Z, literal.x, T0.Z,
326 ; EG-NEXT: NOT_INT T1.W, T2.W, BS:VEC_120/SCL_212
327 ; EG-NEXT: LSHR * T2.W, T1.X, 1,
328 ; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00)
329 ; EG-NEXT: LSHR T2.X, T1.Z, 1,
330 ; EG-NEXT: ADD_INT T2.Y, T0.Z, literal.x, BS:VEC_120/SCL_212
331 ; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, PS, PV.W,
332 ; EG-NEXT: BIT_ALIGN_INT T1.W, 0.0, T1.X, PV.Z,
333 ; EG-NEXT: AND_INT * T2.W, PV.Z, literal.y,
334 ; EG-NEXT: -127(nan), 32(4.484155e-44)
335 ; EG-NEXT: CNDE_INT T1.X, PS, PV.W, 0.0,
336 ; EG-NEXT: CNDE_INT T3.Y, T6.W, PV.Z, T5.W, BS:VEC_021/SCL_122
337 ; EG-NEXT: SETGT_INT T0.Z, PV.Y, literal.x,
338 ; EG-NEXT: BIT_ALIGN_INT T1.W, 0.0, PV.X, T1.Y,
339 ; EG-NEXT: ADD_INT * T0.W, T0.W, literal.y,
340 ; EG-NEXT: 23(3.222986e-44), -127(nan)
341 ; EG-NEXT: CNDE_INT T2.X, T4.W, PV.W, T3.W,
342 ; EG-NEXT: SETGT_INT T1.Y, PS, literal.x,
343 ; EG-NEXT: CNDE_INT T1.Z, PV.Z, 0.0, PV.Y,
344 ; EG-NEXT: CNDE_INT T1.W, PV.Z, PV.X, T0.X,
345 ; EG-NEXT: ASHR * T2.W, KC0[3].X, literal.y,
346 ; EG-NEXT: 23(3.222986e-44), 31(4.344025e-44)
347 ; EG-NEXT: XOR_INT T0.X, PV.W, PS,
348 ; EG-NEXT: XOR_INT T3.Y, PV.Z, PS,
349 ; EG-NEXT: CNDE_INT T0.Z, PV.Y, 0.0, PV.X,
350 ; EG-NEXT: CNDE_INT T1.W, PV.Y, T2.Z, T0.Y,
351 ; EG-NEXT: ASHR * T3.W, KC0[2].W, literal.x,
352 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
353 ; EG-NEXT: XOR_INT T0.Y, PV.W, PS,
354 ; EG-NEXT: XOR_INT T0.Z, PV.Z, PS,
355 ; EG-NEXT: SUB_INT T1.W, PV.Y, T2.W,
356 ; EG-NEXT: SUBB_UINT * T4.W, PV.X, T2.W,
357 ; EG-NEXT: SUB_INT T1.Y, PV.W, PS,
358 ; EG-NEXT: SETGT_INT T1.Z, 0.0, T2.Y,
359 ; EG-NEXT: SUB_INT T1.W, PV.Z, T3.W,
360 ; EG-NEXT: SUBB_UINT * T4.W, PV.Y, T3.W,
361 ; EG-NEXT: SUB_INT T0.Z, PV.W, PS,
362 ; EG-NEXT: SETGT_INT T0.W, 0.0, T0.W,
363 ; EG-NEXT: CNDE_INT * T1.W, PV.Z, PV.Y, 0.0,
364 ; EG-NEXT: CNDE_INT T1.Y, PV.W, PV.Z, 0.0,
365 ; EG-NEXT: SUB_INT * T2.W, T0.X, T2.W,
366 ; EG-NEXT: CNDE_INT T1.Z, T1.Z, PV.W, 0.0,
367 ; EG-NEXT: SUB_INT * T2.W, T0.Y, T3.W,
368 ; EG-NEXT: CNDE_INT T1.X, T0.W, PV.W, 0.0,
369 ; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
370 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
371 %conv = fptoui <2 x float> %x to <2 x i64>
372 store <2 x i64> %conv, ptr addrspace(1) %out
376 define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i64(ptr addrspace(1) %out, <4 x float> %x) {
377 ; SI-LABEL: fp_to_uint_v4f32_to_v4i64:
379 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
380 ; SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0xd
381 ; SI-NEXT: s_mov_b32 s3, 0xf000
382 ; SI-NEXT: s_mov_b32 s2, -1
383 ; SI-NEXT: s_mov_b32 s8, 0xcf800000
384 ; SI-NEXT: s_waitcnt lgkmcnt(0)
385 ; SI-NEXT: v_trunc_f32_e32 v0, s5
386 ; SI-NEXT: v_trunc_f32_e32 v2, s4
387 ; SI-NEXT: v_trunc_f32_e32 v4, s7
388 ; SI-NEXT: v_trunc_f32_e32 v6, s6
389 ; SI-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
390 ; SI-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
391 ; SI-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
392 ; SI-NEXT: v_mul_f32_e32 v7, 0x2f800000, v6
393 ; SI-NEXT: v_floor_f32_e32 v8, v1
394 ; SI-NEXT: v_floor_f32_e32 v9, v3
395 ; SI-NEXT: v_floor_f32_e32 v10, v5
396 ; SI-NEXT: v_floor_f32_e32 v11, v7
397 ; SI-NEXT: v_cvt_u32_f32_e32 v3, v8
398 ; SI-NEXT: v_cvt_u32_f32_e32 v1, v9
399 ; SI-NEXT: v_fma_f32 v0, v8, s8, v0
400 ; SI-NEXT: v_fma_f32 v8, v9, s8, v2
401 ; SI-NEXT: v_cvt_u32_f32_e32 v7, v10
402 ; SI-NEXT: v_cvt_u32_f32_e32 v5, v11
403 ; SI-NEXT: v_fma_f32 v4, v10, s8, v4
404 ; SI-NEXT: v_fma_f32 v9, v11, s8, v6
405 ; SI-NEXT: v_cvt_u32_f32_e32 v2, v0
406 ; SI-NEXT: v_cvt_u32_f32_e32 v0, v8
407 ; SI-NEXT: v_cvt_u32_f32_e32 v6, v4
408 ; SI-NEXT: v_cvt_u32_f32_e32 v4, v9
409 ; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
410 ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
413 ; VI-LABEL: fp_to_uint_v4f32_to_v4i64:
415 ; VI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34
416 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
417 ; VI-NEXT: s_mov_b32 s2, 0xcf800000
418 ; VI-NEXT: s_mov_b32 s3, 0xf000
419 ; VI-NEXT: s_waitcnt lgkmcnt(0)
420 ; VI-NEXT: v_trunc_f32_e32 v0, s9
421 ; VI-NEXT: v_trunc_f32_e32 v4, s8
422 ; VI-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
423 ; VI-NEXT: v_mul_f32_e32 v2, 0x2f800000, v4
424 ; VI-NEXT: v_floor_f32_e32 v5, v1
425 ; VI-NEXT: v_floor_f32_e32 v6, v2
426 ; VI-NEXT: v_fma_f32 v0, v5, s2, v0
427 ; VI-NEXT: v_cvt_u32_f32_e32 v2, v0
428 ; VI-NEXT: v_fma_f32 v0, v6, s2, v4
429 ; VI-NEXT: v_trunc_f32_e32 v4, s11
430 ; VI-NEXT: v_cvt_u32_f32_e32 v3, v5
431 ; VI-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
432 ; VI-NEXT: v_trunc_f32_e32 v8, s10
433 ; VI-NEXT: v_cvt_u32_f32_e32 v1, v6
434 ; VI-NEXT: v_floor_f32_e32 v6, v5
435 ; VI-NEXT: v_mul_f32_e32 v5, 0x2f800000, v8
436 ; VI-NEXT: v_floor_f32_e32 v9, v5
437 ; VI-NEXT: v_fma_f32 v4, v6, s2, v4
438 ; VI-NEXT: v_cvt_u32_f32_e32 v7, v6
439 ; VI-NEXT: v_cvt_u32_f32_e32 v6, v4
440 ; VI-NEXT: v_fma_f32 v4, v9, s2, v8
441 ; VI-NEXT: v_cvt_u32_f32_e32 v5, v9
442 ; VI-NEXT: v_cvt_u32_f32_e32 v4, v4
443 ; VI-NEXT: v_cvt_u32_f32_e32 v0, v0
444 ; VI-NEXT: s_mov_b32 s2, -1
445 ; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
446 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
449 ; EG-LABEL: fp_to_uint_v4f32_to_v4i64:
451 ; EG-NEXT: ALU 99, @6, KC0[CB0:0-32], KC1[]
452 ; EG-NEXT: ALU 54, @106, KC0[CB0:0-32], KC1[]
453 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
454 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T0.X, 1
457 ; EG-NEXT: ALU clause starting at 6:
458 ; EG-NEXT: MOV * T0.W, literal.x,
459 ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
460 ; EG-NEXT: BFE_UINT T1.W, KC0[3].Z, literal.x, PV.W,
461 ; EG-NEXT: AND_INT * T2.W, KC0[3].Z, literal.y,
462 ; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38)
463 ; EG-NEXT: OR_INT T2.W, PS, literal.x,
464 ; EG-NEXT: ADD_INT * T3.W, PV.W, literal.y,
465 ; EG-NEXT: 8388608(1.175494e-38), -150(nan)
466 ; EG-NEXT: ADD_INT T0.X, T1.W, literal.x,
467 ; EG-NEXT: BFE_UINT T0.Y, KC0[4].X, literal.y, T0.W,
468 ; EG-NEXT: AND_INT T0.Z, PS, literal.z,
469 ; EG-NEXT: NOT_INT T4.W, PS,
470 ; EG-NEXT: LSHR * T5.W, PV.W, 1,
471 ; EG-NEXT: -127(nan), 23(3.222986e-44)
472 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
473 ; EG-NEXT: BIT_ALIGN_INT T1.X, 0.0, PS, PV.W,
474 ; EG-NEXT: AND_INT T1.Y, T3.W, literal.x,
475 ; EG-NEXT: LSHL T0.Z, T2.W, PV.Z, BS:VEC_120/SCL_212
476 ; EG-NEXT: AND_INT T3.W, KC0[4].X, literal.y,
477 ; EG-NEXT: ADD_INT * T4.W, PV.Y, literal.z,
478 ; EG-NEXT: 32(4.484155e-44), 8388607(1.175494e-38)
479 ; EG-NEXT: -150(nan), 0(0.000000e+00)
480 ; EG-NEXT: AND_INT T2.Y, PS, literal.x,
481 ; EG-NEXT: OR_INT T1.Z, PV.W, literal.y,
482 ; EG-NEXT: CNDE_INT T3.W, PV.Y, PV.X, PV.Z,
483 ; EG-NEXT: SETGT_INT * T5.W, T0.X, literal.z,
484 ; EG-NEXT: 31(4.344025e-44), 8388608(1.175494e-38)
485 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
486 ; EG-NEXT: CNDE_INT T3.Y, PS, 0.0, PV.W,
487 ; EG-NEXT: SUB_INT T2.Z, literal.x, T1.W,
488 ; EG-NEXT: LSHL T1.W, PV.Z, PV.Y,
489 ; EG-NEXT: AND_INT * T3.W, T4.W, literal.y,
490 ; EG-NEXT: 150(2.101948e-43), 32(4.484155e-44)
491 ; EG-NEXT: CNDE_INT T1.X, PS, PV.W, 0.0,
492 ; EG-NEXT: AND_INT T2.Y, PV.Z, literal.x,
493 ; EG-NEXT: SUB_INT T3.Z, literal.y, T0.Y,
494 ; EG-NEXT: NOT_INT T4.W, T4.W,
495 ; EG-NEXT: LSHR * T6.W, T1.Z, 1,
496 ; EG-NEXT: 32(4.484155e-44), 150(2.101948e-43)
497 ; EG-NEXT: BIT_ALIGN_INT T2.X, 0.0, T2.W, T2.Z,
498 ; EG-NEXT: ADD_INT T0.Y, T0.Y, literal.x,
499 ; EG-NEXT: BIT_ALIGN_INT T2.Z, 0.0, PS, PV.W,
500 ; EG-NEXT: BIT_ALIGN_INT T2.W, 0.0, T1.Z, PV.Z,
501 ; EG-NEXT: AND_INT * T4.W, PV.Z, literal.y,
502 ; EG-NEXT: -127(nan), 32(4.484155e-44)
503 ; EG-NEXT: CNDE_INT T3.X, PS, PV.W, 0.0,
504 ; EG-NEXT: CNDE_INT T4.Y, T3.W, PV.Z, T1.W,
505 ; EG-NEXT: SETGT_INT T1.Z, PV.Y, literal.x,
506 ; EG-NEXT: CNDE_INT T1.W, T1.Y, T0.Z, 0.0,
507 ; EG-NEXT: CNDE_INT * T2.W, T2.Y, PV.X, 0.0,
508 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
509 ; EG-NEXT: CNDE_INT T2.X, T5.W, PS, PV.W,
510 ; EG-NEXT: ASHR T1.Y, KC0[3].Z, literal.x,
511 ; EG-NEXT: CNDE_INT T0.Z, PV.Z, 0.0, PV.Y,
512 ; EG-NEXT: CNDE_INT T1.W, PV.Z, PV.X, T1.X,
513 ; EG-NEXT: ASHR * T2.W, KC0[4].X, literal.x,
514 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
515 ; EG-NEXT: XOR_INT T2.Y, PV.W, PS,
516 ; EG-NEXT: XOR_INT T0.Z, PV.Z, PS,
517 ; EG-NEXT: XOR_INT T1.W, PV.X, PV.Y,
518 ; EG-NEXT: XOR_INT * T3.W, T3.Y, PV.Y,
519 ; EG-NEXT: SUB_INT T3.Y, PS, T1.Y,
520 ; EG-NEXT: SUBB_UINT T1.Z, PV.W, T1.Y,
521 ; EG-NEXT: SUB_INT T3.W, PV.Z, T2.W,
522 ; EG-NEXT: SUBB_UINT * T4.W, PV.Y, T2.W,
523 ; EG-NEXT: SUB_INT T4.Y, PV.W, PS,
524 ; EG-NEXT: SUB_INT T0.Z, PV.Y, PV.Z,
525 ; EG-NEXT: BFE_UINT T3.W, KC0[3].Y, literal.x, T0.W,
526 ; EG-NEXT: AND_INT * T4.W, KC0[3].Y, literal.y,
527 ; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38)
528 ; EG-NEXT: SETGT_INT T0.X, 0.0, T0.X,
529 ; EG-NEXT: ADD_INT T3.Y, PV.W, literal.x,
530 ; EG-NEXT: OR_INT T1.Z, PS, literal.y,
531 ; EG-NEXT: BFE_UINT T0.W, KC0[3].W, literal.z, T0.W,
532 ; EG-NEXT: ADD_INT * T4.W, PV.W, literal.w,
533 ; EG-NEXT: -127(nan), 8388608(1.175494e-38)
534 ; EG-NEXT: 23(3.222986e-44), -150(nan)
535 ; EG-NEXT: AND_INT T1.X, KC0[3].W, literal.x,
536 ; EG-NEXT: ADD_INT T5.Y, PV.W, literal.y,
537 ; EG-NEXT: SUB_INT T2.Z, literal.z, T3.W,
538 ; EG-NEXT: NOT_INT T3.W, PS,
539 ; EG-NEXT: LSHR * T5.W, PV.Z, 1,
540 ; EG-NEXT: 8388607(1.175494e-38), -150(nan)
541 ; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00)
542 ; EG-NEXT: BIT_ALIGN_INT T2.X, 0.0, PS, PV.W,
543 ; EG-NEXT: AND_INT T6.Y, PV.Z, literal.x,
544 ; EG-NEXT: AND_INT T3.Z, PV.Y, literal.y,
545 ; EG-NEXT: OR_INT T3.W, PV.X, literal.z,
546 ; EG-NEXT: AND_INT * T5.W, T4.W, literal.y,
547 ; EG-NEXT: 32(4.484155e-44), 31(4.344025e-44)
548 ; EG-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
549 ; EG-NEXT: BIT_ALIGN_INT T1.X, 0.0, T1.Z, T2.Z,
550 ; EG-NEXT: LSHL T7.Y, T1.Z, PS,
551 ; EG-NEXT: AND_INT T1.Z, T4.W, literal.x,
552 ; EG-NEXT: LSHL T4.W, PV.W, PV.Z,
553 ; EG-NEXT: AND_INT * T5.W, T5.Y, literal.x,
554 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
555 ; EG-NEXT: CNDE_INT T3.X, PS, PV.W, 0.0,
556 ; EG-NEXT: CNDE_INT T8.Y, PV.Z, PV.Y, 0.0,
557 ; EG-NEXT: CNDE_INT * T2.Z, T6.Y, PV.X, 0.0,
558 ; EG-NEXT: ALU clause starting at 106:
559 ; EG-NEXT: CNDE_INT T6.W, T1.Z, T2.X, T7.Y, BS:VEC_021/SCL_122
560 ; EG-NEXT: SETGT_INT * T7.W, T3.Y, literal.x,
561 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
562 ; EG-NEXT: CNDE_INT T1.X, PS, 0.0, PV.W,
563 ; EG-NEXT: CNDE_INT T6.Y, PS, T2.Z, T8.Y,
564 ; EG-NEXT: SUB_INT T1.Z, literal.x, T0.W,
565 ; EG-NEXT: NOT_INT T6.W, T5.Y,
566 ; EG-NEXT: LSHR * T7.W, T3.W, 1,
567 ; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00)
568 ; EG-NEXT: ASHR T2.X, KC0[3].Y, literal.x,
569 ; EG-NEXT: ADD_INT T5.Y, T0.W, literal.y,
570 ; EG-NEXT: BIT_ALIGN_INT T2.Z, 0.0, PS, PV.W,
571 ; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, T3.W, PV.Z,
572 ; EG-NEXT: AND_INT * T3.W, PV.Z, literal.z,
573 ; EG-NEXT: 31(4.344025e-44), -127(nan)
574 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
575 ; EG-NEXT: CNDE_INT T4.X, PS, PV.W, 0.0,
576 ; EG-NEXT: CNDE_INT T7.Y, T5.W, PV.Z, T4.W,
577 ; EG-NEXT: SETGT_INT T1.Z, PV.Y, literal.x,
578 ; EG-NEXT: XOR_INT T0.W, T6.Y, PV.X,
579 ; EG-NEXT: XOR_INT * T3.W, T1.X, PV.X,
580 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
581 ; EG-NEXT: SUB_INT T1.X, PS, T2.X,
582 ; EG-NEXT: SUBB_UINT T6.Y, PV.W, T2.X,
583 ; EG-NEXT: CNDE_INT T2.Z, PV.Z, 0.0, PV.Y,
584 ; EG-NEXT: CNDE_INT T3.W, PV.Z, PV.X, T3.X,
585 ; EG-NEXT: ASHR * T4.W, KC0[3].W, literal.x,
586 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
587 ; EG-NEXT: XOR_INT T3.X, PV.W, PS,
588 ; EG-NEXT: XOR_INT T7.Y, PV.Z, PS,
589 ; EG-NEXT: SUB_INT T1.Z, PV.X, PV.Y,
590 ; EG-NEXT: SETGT_INT T3.W, 0.0, T3.Y,
591 ; EG-NEXT: CNDE_INT * T6.W, T0.X, T0.Z, 0.0,
592 ; EG-NEXT: SETGT_INT T1.X, 0.0, T0.Y,
593 ; EG-NEXT: CNDE_INT T6.Y, PV.W, PV.Z, 0.0,
594 ; EG-NEXT: SUB_INT T0.Z, T1.W, T1.Y, BS:VEC_021/SCL_122
595 ; EG-NEXT: SUB_INT T1.W, PV.Y, T4.W,
596 ; EG-NEXT: SUBB_UINT * T5.W, PV.X, T4.W,
597 ; EG-NEXT: SUB_INT T4.X, PV.W, PS,
598 ; EG-NEXT: SETGT_INT T0.Y, 0.0, T5.Y, BS:VEC_021/SCL_122
599 ; EG-NEXT: CNDE_INT T6.Z, T0.X, PV.Z, 0.0,
600 ; EG-NEXT: SUB_INT T0.W, T0.W, T2.X,
601 ; EG-NEXT: CNDE_INT * T1.W, PV.X, T4.Y, 0.0,
602 ; EG-NEXT: CNDE_INT T6.X, T3.W, PV.W, 0.0,
603 ; EG-NEXT: CNDE_INT T1.Y, PV.Y, PV.X, 0.0,
604 ; EG-NEXT: SUB_INT T0.W, T2.Y, T2.W,
605 ; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
606 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
607 ; EG-NEXT: CNDE_INT T1.Z, T1.X, PV.W, 0.0,
608 ; EG-NEXT: SUB_INT * T0.W, T3.X, T4.W, BS:VEC_120/SCL_212
609 ; EG-NEXT: CNDE_INT T1.X, T0.Y, PV.W, 0.0,
610 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
611 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
612 ; EG-NEXT: LSHR * T2.X, PV.W, literal.x,
613 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
614 %conv = fptoui <4 x float> %x to <4 x i64>
615 store <4 x i64> %conv, ptr addrspace(1) %out
619 define amdgpu_kernel void @fp_to_uint_f32_to_i1(ptr addrspace(1) %out, float %in) #0 {
620 ; SI-LABEL: fp_to_uint_f32_to_i1:
622 ; SI-NEXT: s_load_dword s6, s[4:5], 0xb
623 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
624 ; SI-NEXT: s_mov_b32 s3, 0xf000
625 ; SI-NEXT: s_mov_b32 s2, -1
626 ; SI-NEXT: s_waitcnt lgkmcnt(0)
627 ; SI-NEXT: v_cmp_eq_f32_e64 s[4:5], 1.0, s6
628 ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
629 ; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0
632 ; VI-LABEL: fp_to_uint_f32_to_i1:
634 ; VI-NEXT: s_load_dword s6, s[4:5], 0x2c
635 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
636 ; VI-NEXT: s_mov_b32 s3, 0xf000
637 ; VI-NEXT: s_mov_b32 s2, -1
638 ; VI-NEXT: s_waitcnt lgkmcnt(0)
639 ; VI-NEXT: v_cmp_eq_f32_e64 s[4:5], 1.0, s6
640 ; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
641 ; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0
644 ; EG-LABEL: fp_to_uint_f32_to_i1:
646 ; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[]
647 ; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
650 ; EG-NEXT: ALU clause starting at 4:
651 ; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
652 ; EG-NEXT: SETE_DX10 * T1.W, KC0[2].Z, 1.0,
653 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
654 ; EG-NEXT: AND_INT T1.W, PS, 1,
655 ; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
656 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
657 ; EG-NEXT: LSHL T0.X, PV.W, PS,
658 ; EG-NEXT: LSHL * T0.W, literal.x, PS,
659 ; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
660 ; EG-NEXT: MOV T0.Y, 0.0,
661 ; EG-NEXT: MOV * T0.Z, 0.0,
662 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
663 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
664 %conv = fptoui float %in to i1
665 store i1 %conv, ptr addrspace(1) %out
669 define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(ptr addrspace(1) %out, float %in) #0 {
670 ; SI-LABEL: fp_to_uint_fabs_f32_to_i1:
672 ; SI-NEXT: s_load_dword s6, s[4:5], 0xb
673 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
674 ; SI-NEXT: s_mov_b32 s3, 0xf000
675 ; SI-NEXT: s_mov_b32 s2, -1
676 ; SI-NEXT: s_waitcnt lgkmcnt(0)
677 ; SI-NEXT: v_cmp_eq_f32_e64 s[4:5], 1.0, |s6|
678 ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
679 ; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0
682 ; VI-LABEL: fp_to_uint_fabs_f32_to_i1:
684 ; VI-NEXT: s_load_dword s6, s[4:5], 0x2c
685 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
686 ; VI-NEXT: s_mov_b32 s3, 0xf000
687 ; VI-NEXT: s_mov_b32 s2, -1
688 ; VI-NEXT: s_waitcnt lgkmcnt(0)
689 ; VI-NEXT: v_cmp_eq_f32_e64 s[4:5], 1.0, |s6|
690 ; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
691 ; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0
694 ; EG-LABEL: fp_to_uint_fabs_f32_to_i1:
696 ; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[]
697 ; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
700 ; EG-NEXT: ALU clause starting at 4:
701 ; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
702 ; EG-NEXT: SETE_DX10 * T1.W, |KC0[2].Z|, 1.0,
703 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
704 ; EG-NEXT: AND_INT T1.W, PS, 1,
705 ; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
706 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
707 ; EG-NEXT: LSHL T0.X, PV.W, PS,
708 ; EG-NEXT: LSHL * T0.W, literal.x, PS,
709 ; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
710 ; EG-NEXT: MOV T0.Y, 0.0,
711 ; EG-NEXT: MOV * T0.Z, 0.0,
712 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
713 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
714 %in.fabs = call float @llvm.fabs.f32(float %in)
715 %conv = fptoui float %in.fabs to i1
716 store i1 %conv, ptr addrspace(1) %out
720 define amdgpu_kernel void @fp_to_uint_f32_to_i16(ptr addrspace(1) %out, float %in) #0 {
721 ; SI-LABEL: fp_to_uint_f32_to_i16:
723 ; SI-NEXT: s_load_dword s6, s[4:5], 0xb
724 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
725 ; SI-NEXT: s_mov_b32 s3, 0xf000
726 ; SI-NEXT: s_mov_b32 s2, -1
727 ; SI-NEXT: s_waitcnt lgkmcnt(0)
728 ; SI-NEXT: v_cvt_u32_f32_e32 v0, s6
729 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
732 ; VI-LABEL: fp_to_uint_f32_to_i16:
734 ; VI-NEXT: s_load_dword s2, s[4:5], 0x2c
735 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
736 ; VI-NEXT: s_mov_b32 s3, 0xf000
737 ; VI-NEXT: s_waitcnt lgkmcnt(0)
738 ; VI-NEXT: v_cvt_u32_f32_e32 v0, s2
739 ; VI-NEXT: s_mov_b32 s2, -1
740 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
743 ; EG-LABEL: fp_to_uint_f32_to_i16:
745 ; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[]
746 ; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
749 ; EG-NEXT: ALU clause starting at 4:
750 ; EG-NEXT: TRUNC T0.W, KC0[2].Z,
751 ; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.x,
752 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
753 ; EG-NEXT: LSHL T1.W, PS, literal.x,
754 ; EG-NEXT: FLT_TO_UINT * T0.X, PV.W,
755 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
756 ; EG-NEXT: LSHL T0.X, PS, PV.W,
757 ; EG-NEXT: LSHL * T0.W, literal.x, PV.W,
758 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
759 ; EG-NEXT: MOV T0.Y, 0.0,
760 ; EG-NEXT: MOV * T0.Z, 0.0,
761 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
762 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
763 %uint = fptoui float %in to i16
764 store i16 %uint, ptr addrspace(1) %out
768 attributes #0 = { nounwind }
769 attributes #1 = { nounwind readnone }