1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -verify-machineinstrs < %s | FileCheck -check-prefix=SDAG %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -global-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL %s
4 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=SDAG %s
5 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -global-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL %s
7 define amdgpu_vs void @f32_olt(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
9 ; SDAG: ; %bb.0: ; %entry
10 ; SDAG-NEXT: s_cmp_lt_f32 s2, s3
11 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
12 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
13 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
14 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
15 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
18 ; GISEL-LABEL: f32_olt:
19 ; GISEL: ; %bb.0: ; %entry
20 ; GISEL-NEXT: s_cmp_lt_f32 s2, s3
21 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
22 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
23 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
24 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
25 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
26 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
27 ; GISEL-NEXT: s_endpgm
29 %0 = fcmp olt float %a, %b
30 %1 = sext i1 %0 to i32
31 store i32 %1, ptr addrspace(1) %out
35 define amdgpu_vs void @f32_oeq(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
36 ; SDAG-LABEL: f32_oeq:
37 ; SDAG: ; %bb.0: ; %entry
38 ; SDAG-NEXT: s_cmp_eq_f32 s2, s3
39 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
40 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
41 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
42 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
43 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
46 ; GISEL-LABEL: f32_oeq:
47 ; GISEL: ; %bb.0: ; %entry
48 ; GISEL-NEXT: s_cmp_eq_f32 s2, s3
49 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
50 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
51 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
52 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
53 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
54 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
55 ; GISEL-NEXT: s_endpgm
57 %0 = fcmp oeq float %a, %b
58 %1 = sext i1 %0 to i32
59 store i32 %1, ptr addrspace(1) %out
63 define amdgpu_vs void @f32_ole(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
64 ; SDAG-LABEL: f32_ole:
65 ; SDAG: ; %bb.0: ; %entry
66 ; SDAG-NEXT: s_cmp_le_f32 s2, s3
67 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
68 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
69 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
70 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
71 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
74 ; GISEL-LABEL: f32_ole:
75 ; GISEL: ; %bb.0: ; %entry
76 ; GISEL-NEXT: s_cmp_le_f32 s2, s3
77 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
78 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
79 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
80 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
81 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
82 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
83 ; GISEL-NEXT: s_endpgm
85 %0 = fcmp ole float %a, %b
86 %1 = sext i1 %0 to i32
87 store i32 %1, ptr addrspace(1) %out
91 define amdgpu_vs void @f32_ogt(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
92 ; SDAG-LABEL: f32_ogt:
93 ; SDAG: ; %bb.0: ; %entry
94 ; SDAG-NEXT: s_cmp_gt_f32 s2, s3
95 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
96 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
97 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
98 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
99 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
100 ; SDAG-NEXT: s_endpgm
102 ; GISEL-LABEL: f32_ogt:
103 ; GISEL: ; %bb.0: ; %entry
104 ; GISEL-NEXT: s_cmp_gt_f32 s2, s3
105 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
106 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
107 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
108 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
109 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
110 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
111 ; GISEL-NEXT: s_endpgm
113 %0 = fcmp ogt float %a, %b
114 %1 = sext i1 %0 to i32
115 store i32 %1, ptr addrspace(1) %out
119 define amdgpu_vs void @f32_one(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
120 ; SDAG-LABEL: f32_one:
121 ; SDAG: ; %bb.0: ; %entry
122 ; SDAG-NEXT: s_cmp_lg_f32 s2, s3
123 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
124 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
125 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
126 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
127 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
128 ; SDAG-NEXT: s_endpgm
130 ; GISEL-LABEL: f32_one:
131 ; GISEL: ; %bb.0: ; %entry
132 ; GISEL-NEXT: s_cmp_lg_f32 s2, s3
133 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
134 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
135 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
136 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
137 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
138 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
139 ; GISEL-NEXT: s_endpgm
141 %0 = fcmp one float %a, %b
142 %1 = sext i1 %0 to i32
143 store i32 %1, ptr addrspace(1) %out
147 define amdgpu_vs void @f32_oge(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
148 ; SDAG-LABEL: f32_oge:
149 ; SDAG: ; %bb.0: ; %entry
150 ; SDAG-NEXT: s_cmp_ge_f32 s2, s3
151 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
152 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
153 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
154 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
155 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
156 ; SDAG-NEXT: s_endpgm
158 ; GISEL-LABEL: f32_oge:
159 ; GISEL: ; %bb.0: ; %entry
160 ; GISEL-NEXT: s_cmp_ge_f32 s2, s3
161 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
162 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
163 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
164 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
165 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
166 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
167 ; GISEL-NEXT: s_endpgm
169 %0 = fcmp oge float %a, %b
170 %1 = sext i1 %0 to i32
171 store i32 %1, ptr addrspace(1) %out
175 define amdgpu_vs void @f32_ord(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
176 ; SDAG-LABEL: f32_ord:
177 ; SDAG: ; %bb.0: ; %entry
178 ; SDAG-NEXT: s_cmp_o_f32 s2, s3
179 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
180 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
181 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
182 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
183 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
184 ; SDAG-NEXT: s_endpgm
186 ; GISEL-LABEL: f32_ord:
187 ; GISEL: ; %bb.0: ; %entry
188 ; GISEL-NEXT: s_cmp_o_f32 s2, s3
189 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
190 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
191 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
192 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
193 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
194 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
195 ; GISEL-NEXT: s_endpgm
197 %0 = fcmp ord float %a, %b
198 %1 = sext i1 %0 to i32
199 store i32 %1, ptr addrspace(1) %out
203 define amdgpu_vs void @f32_uno(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
204 ; SDAG-LABEL: f32_uno:
205 ; SDAG: ; %bb.0: ; %entry
206 ; SDAG-NEXT: s_cmp_u_f32 s2, s3
207 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
208 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
209 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
210 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
211 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
212 ; SDAG-NEXT: s_endpgm
214 ; GISEL-LABEL: f32_uno:
215 ; GISEL: ; %bb.0: ; %entry
216 ; GISEL-NEXT: s_cmp_u_f32 s2, s3
217 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
218 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
219 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
220 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
221 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
222 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
223 ; GISEL-NEXT: s_endpgm
225 %0 = fcmp uno float %a, %b
226 %1 = sext i1 %0 to i32
227 store i32 %1, ptr addrspace(1) %out
231 define amdgpu_vs void @f32_ult(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
232 ; SDAG-LABEL: f32_ult:
233 ; SDAG: ; %bb.0: ; %entry
234 ; SDAG-NEXT: s_cmp_nge_f32 s2, s3
235 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
236 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
237 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
238 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
239 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
240 ; SDAG-NEXT: s_endpgm
242 ; GISEL-LABEL: f32_ult:
243 ; GISEL: ; %bb.0: ; %entry
244 ; GISEL-NEXT: s_cmp_nge_f32 s2, s3
245 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
246 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
247 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
248 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
249 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
250 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
251 ; GISEL-NEXT: s_endpgm
253 %0 = fcmp ult float %a, %b
254 %1 = sext i1 %0 to i32
255 store i32 %1, ptr addrspace(1) %out
259 define amdgpu_vs void @f32_ueq(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
260 ; SDAG-LABEL: f32_ueq:
261 ; SDAG: ; %bb.0: ; %entry
262 ; SDAG-NEXT: s_cmp_nlg_f32 s2, s3
263 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
264 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
265 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
266 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
267 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
268 ; SDAG-NEXT: s_endpgm
270 ; GISEL-LABEL: f32_ueq:
271 ; GISEL: ; %bb.0: ; %entry
272 ; GISEL-NEXT: s_cmp_nlg_f32 s2, s3
273 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
274 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
275 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
276 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
277 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
278 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
279 ; GISEL-NEXT: s_endpgm
281 %0 = fcmp ueq float %a, %b
282 %1 = sext i1 %0 to i32
283 store i32 %1, ptr addrspace(1) %out
287 define amdgpu_vs void @f32_ule(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
288 ; SDAG-LABEL: f32_ule:
289 ; SDAG: ; %bb.0: ; %entry
290 ; SDAG-NEXT: s_cmp_ngt_f32 s2, s3
291 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
292 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
293 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
294 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
295 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
296 ; SDAG-NEXT: s_endpgm
298 ; GISEL-LABEL: f32_ule:
299 ; GISEL: ; %bb.0: ; %entry
300 ; GISEL-NEXT: s_cmp_ngt_f32 s2, s3
301 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
302 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
303 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
304 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
305 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
306 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
307 ; GISEL-NEXT: s_endpgm
309 %0 = fcmp ule float %a, %b
310 %1 = sext i1 %0 to i32
311 store i32 %1, ptr addrspace(1) %out
315 define amdgpu_vs void @f32_ugt(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
316 ; SDAG-LABEL: f32_ugt:
317 ; SDAG: ; %bb.0: ; %entry
318 ; SDAG-NEXT: s_cmp_nle_f32 s2, s3
319 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
320 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
321 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
322 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
323 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
324 ; SDAG-NEXT: s_endpgm
326 ; GISEL-LABEL: f32_ugt:
327 ; GISEL: ; %bb.0: ; %entry
328 ; GISEL-NEXT: s_cmp_nle_f32 s2, s3
329 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
330 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
331 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
332 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
333 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
334 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
335 ; GISEL-NEXT: s_endpgm
337 %0 = fcmp ugt float %a, %b
338 %1 = sext i1 %0 to i32
339 store i32 %1, ptr addrspace(1) %out
343 define amdgpu_vs void @f32_une(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
344 ; SDAG-LABEL: f32_une:
345 ; SDAG: ; %bb.0: ; %entry
346 ; SDAG-NEXT: s_cmp_neq_f32 s2, s3
347 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
348 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
349 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
350 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
351 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
352 ; SDAG-NEXT: s_endpgm
354 ; GISEL-LABEL: f32_une:
355 ; GISEL: ; %bb.0: ; %entry
356 ; GISEL-NEXT: s_cmp_neq_f32 s2, s3
357 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
358 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
359 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
360 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
361 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
362 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
363 ; GISEL-NEXT: s_endpgm
365 %0 = fcmp une float %a, %b
366 %1 = sext i1 %0 to i32
367 store i32 %1, ptr addrspace(1) %out
371 define amdgpu_vs void @f32_uge(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
372 ; SDAG-LABEL: f32_uge:
373 ; SDAG: ; %bb.0: ; %entry
374 ; SDAG-NEXT: s_cmp_nlt_f32 s2, s3
375 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
376 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
377 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
378 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
379 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
380 ; SDAG-NEXT: s_endpgm
382 ; GISEL-LABEL: f32_uge:
383 ; GISEL: ; %bb.0: ; %entry
384 ; GISEL-NEXT: s_cmp_nlt_f32 s2, s3
385 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
386 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
387 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
388 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
389 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
390 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
391 ; GISEL-NEXT: s_endpgm
393 %0 = fcmp uge float %a, %b
394 %1 = sext i1 %0 to i32
395 store i32 %1, ptr addrspace(1) %out
399 define amdgpu_vs void @f16_olt(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
400 ; SDAG-LABEL: f16_olt:
401 ; SDAG: ; %bb.0: ; %entry
402 ; SDAG-NEXT: s_cmp_lt_f16 s2, s3
403 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
404 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
405 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
406 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
407 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
408 ; SDAG-NEXT: s_endpgm
410 ; GISEL-LABEL: f16_olt:
411 ; GISEL: ; %bb.0: ; %entry
412 ; GISEL-NEXT: s_cmp_lt_f16 s2, s3
413 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
414 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
415 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
416 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
417 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
418 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
419 ; GISEL-NEXT: s_endpgm
421 %0 = fcmp olt half %a, %b
422 %1 = sext i1 %0 to i32
423 store i32 %1, ptr addrspace(1) %out
427 define amdgpu_vs void @f16_oeq(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
428 ; SDAG-LABEL: f16_oeq:
429 ; SDAG: ; %bb.0: ; %entry
430 ; SDAG-NEXT: s_cmp_eq_f16 s2, s3
431 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
432 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
433 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
434 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
435 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
436 ; SDAG-NEXT: s_endpgm
438 ; GISEL-LABEL: f16_oeq:
439 ; GISEL: ; %bb.0: ; %entry
440 ; GISEL-NEXT: s_cmp_eq_f16 s2, s3
441 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
442 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
443 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
444 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
445 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
446 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
447 ; GISEL-NEXT: s_endpgm
449 %0 = fcmp oeq half %a, %b
450 %1 = sext i1 %0 to i32
451 store i32 %1, ptr addrspace(1) %out
455 define amdgpu_vs void @f16_ole(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
456 ; SDAG-LABEL: f16_ole:
457 ; SDAG: ; %bb.0: ; %entry
458 ; SDAG-NEXT: s_cmp_le_f16 s2, s3
459 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
460 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
461 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
462 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
463 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
464 ; SDAG-NEXT: s_endpgm
466 ; GISEL-LABEL: f16_ole:
467 ; GISEL: ; %bb.0: ; %entry
468 ; GISEL-NEXT: s_cmp_le_f16 s2, s3
469 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
470 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
471 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
472 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
473 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
474 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
475 ; GISEL-NEXT: s_endpgm
477 %0 = fcmp ole half %a, %b
478 %1 = sext i1 %0 to i32
479 store i32 %1, ptr addrspace(1) %out
483 define amdgpu_vs void @f16_ogt(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
484 ; SDAG-LABEL: f16_ogt:
485 ; SDAG: ; %bb.0: ; %entry
486 ; SDAG-NEXT: s_cmp_gt_f16 s2, s3
487 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
488 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
489 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
490 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
491 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
492 ; SDAG-NEXT: s_endpgm
494 ; GISEL-LABEL: f16_ogt:
495 ; GISEL: ; %bb.0: ; %entry
496 ; GISEL-NEXT: s_cmp_gt_f16 s2, s3
497 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
498 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
499 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
500 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
501 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
502 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
503 ; GISEL-NEXT: s_endpgm
505 %0 = fcmp ogt half %a, %b
506 %1 = sext i1 %0 to i32
507 store i32 %1, ptr addrspace(1) %out
511 define amdgpu_vs void @f16_one(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
512 ; SDAG-LABEL: f16_one:
513 ; SDAG: ; %bb.0: ; %entry
514 ; SDAG-NEXT: s_cmp_lg_f16 s2, s3
515 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
516 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
517 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
518 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
519 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
520 ; SDAG-NEXT: s_endpgm
522 ; GISEL-LABEL: f16_one:
523 ; GISEL: ; %bb.0: ; %entry
524 ; GISEL-NEXT: s_cmp_lg_f16 s2, s3
525 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
526 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
527 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
528 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
529 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
530 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
531 ; GISEL-NEXT: s_endpgm
533 %0 = fcmp one half %a, %b
534 %1 = sext i1 %0 to i32
535 store i32 %1, ptr addrspace(1) %out
539 define amdgpu_vs void @f16_oge(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
540 ; SDAG-LABEL: f16_oge:
541 ; SDAG: ; %bb.0: ; %entry
542 ; SDAG-NEXT: s_cmp_ge_f16 s2, s3
543 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
544 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
545 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
546 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
547 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
548 ; SDAG-NEXT: s_endpgm
550 ; GISEL-LABEL: f16_oge:
551 ; GISEL: ; %bb.0: ; %entry
552 ; GISEL-NEXT: s_cmp_ge_f16 s2, s3
553 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
554 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
555 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
556 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
557 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
558 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
559 ; GISEL-NEXT: s_endpgm
561 %0 = fcmp oge half %a, %b
562 %1 = sext i1 %0 to i32
563 store i32 %1, ptr addrspace(1) %out
567 define amdgpu_vs void @f16_ord(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
568 ; SDAG-LABEL: f16_ord:
569 ; SDAG: ; %bb.0: ; %entry
570 ; SDAG-NEXT: s_cmp_o_f16 s2, s3
571 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
572 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
573 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
574 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
575 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
576 ; SDAG-NEXT: s_endpgm
578 ; GISEL-LABEL: f16_ord:
579 ; GISEL: ; %bb.0: ; %entry
580 ; GISEL-NEXT: s_cmp_o_f16 s2, s3
581 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
582 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
583 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
584 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
585 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
586 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
587 ; GISEL-NEXT: s_endpgm
589 %0 = fcmp ord half %a, %b
590 %1 = sext i1 %0 to i32
591 store i32 %1, ptr addrspace(1) %out
595 define amdgpu_vs void @f16_uno(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
596 ; SDAG-LABEL: f16_uno:
597 ; SDAG: ; %bb.0: ; %entry
598 ; SDAG-NEXT: s_cmp_u_f16 s2, s3
599 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
600 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
601 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
602 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
603 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
604 ; SDAG-NEXT: s_endpgm
606 ; GISEL-LABEL: f16_uno:
607 ; GISEL: ; %bb.0: ; %entry
608 ; GISEL-NEXT: s_cmp_u_f16 s2, s3
609 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
610 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
611 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
612 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
613 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
614 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
615 ; GISEL-NEXT: s_endpgm
617 %0 = fcmp uno half %a, %b
618 %1 = sext i1 %0 to i32
619 store i32 %1, ptr addrspace(1) %out
623 define amdgpu_vs void @f16_ult(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
624 ; SDAG-LABEL: f16_ult:
625 ; SDAG: ; %bb.0: ; %entry
626 ; SDAG-NEXT: s_cmp_nge_f16 s2, s3
627 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
628 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
629 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
630 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
631 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
632 ; SDAG-NEXT: s_endpgm
634 ; GISEL-LABEL: f16_ult:
635 ; GISEL: ; %bb.0: ; %entry
636 ; GISEL-NEXT: s_cmp_nge_f16 s2, s3
637 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
638 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
639 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
640 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
641 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
642 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
643 ; GISEL-NEXT: s_endpgm
645 %0 = fcmp ult half %a, %b
646 %1 = sext i1 %0 to i32
647 store i32 %1, ptr addrspace(1) %out
651 define amdgpu_vs void @f16_ueq(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
652 ; SDAG-LABEL: f16_ueq:
653 ; SDAG: ; %bb.0: ; %entry
654 ; SDAG-NEXT: s_cmp_nlg_f16 s2, s3
655 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
656 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
657 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
658 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
659 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
660 ; SDAG-NEXT: s_endpgm
662 ; GISEL-LABEL: f16_ueq:
663 ; GISEL: ; %bb.0: ; %entry
664 ; GISEL-NEXT: s_cmp_nlg_f16 s2, s3
665 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
666 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
667 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
668 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
669 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
670 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
671 ; GISEL-NEXT: s_endpgm
673 %0 = fcmp ueq half %a, %b
674 %1 = sext i1 %0 to i32
675 store i32 %1, ptr addrspace(1) %out
679 define amdgpu_vs void @f16_ule(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
680 ; SDAG-LABEL: f16_ule:
681 ; SDAG: ; %bb.0: ; %entry
682 ; SDAG-NEXT: s_cmp_ngt_f16 s2, s3
683 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
684 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
685 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
686 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
687 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
688 ; SDAG-NEXT: s_endpgm
690 ; GISEL-LABEL: f16_ule:
691 ; GISEL: ; %bb.0: ; %entry
692 ; GISEL-NEXT: s_cmp_ngt_f16 s2, s3
693 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
694 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
695 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
696 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
697 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
698 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
699 ; GISEL-NEXT: s_endpgm
701 %0 = fcmp ule half %a, %b
702 %1 = sext i1 %0 to i32
703 store i32 %1, ptr addrspace(1) %out
707 define amdgpu_vs void @f16_ugt(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
708 ; SDAG-LABEL: f16_ugt:
709 ; SDAG: ; %bb.0: ; %entry
710 ; SDAG-NEXT: s_cmp_nle_f16 s2, s3
711 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
712 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
713 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
714 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
715 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
716 ; SDAG-NEXT: s_endpgm
718 ; GISEL-LABEL: f16_ugt:
719 ; GISEL: ; %bb.0: ; %entry
720 ; GISEL-NEXT: s_cmp_nle_f16 s2, s3
721 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
722 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
723 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
724 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
725 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
726 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
727 ; GISEL-NEXT: s_endpgm
729 %0 = fcmp ugt half %a, %b
730 %1 = sext i1 %0 to i32
731 store i32 %1, ptr addrspace(1) %out
735 define amdgpu_vs void @f16_une(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
736 ; SDAG-LABEL: f16_une:
737 ; SDAG: ; %bb.0: ; %entry
738 ; SDAG-NEXT: s_cmp_neq_f16 s2, s3
739 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
740 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
741 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
742 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
743 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
744 ; SDAG-NEXT: s_endpgm
746 ; GISEL-LABEL: f16_une:
747 ; GISEL: ; %bb.0: ; %entry
748 ; GISEL-NEXT: s_cmp_neq_f16 s2, s3
749 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
750 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
751 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
752 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
753 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
754 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
755 ; GISEL-NEXT: s_endpgm
757 %0 = fcmp une half %a, %b
758 %1 = sext i1 %0 to i32
759 store i32 %1, ptr addrspace(1) %out
763 define amdgpu_vs void @f16_uge(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
764 ; SDAG-LABEL: f16_uge:
765 ; SDAG: ; %bb.0: ; %entry
766 ; SDAG-NEXT: s_cmp_nlt_f16 s2, s3
767 ; SDAG-NEXT: v_mov_b32_e32 v0, 0
768 ; SDAG-NEXT: s_cselect_b32 s2, -1, 0
769 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
770 ; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
771 ; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
772 ; SDAG-NEXT: s_endpgm
774 ; GISEL-LABEL: f16_uge:
775 ; GISEL: ; %bb.0: ; %entry
776 ; GISEL-NEXT: s_cmp_nlt_f16 s2, s3
777 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
778 ; GISEL-NEXT: s_cselect_b32 s2, 1, 0
779 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
780 ; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
781 ; GISEL-NEXT: v_mov_b32_e32 v0, s2
782 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
783 ; GISEL-NEXT: s_endpgm
785 %0 = fcmp uge half %a, %b
786 %1 = sext i1 %0 to i32
787 store i32 %1, ptr addrspace(1) %out