1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c -O3 | FileCheck %s --check-prefixes=AVX
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=AVX
5 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK-32
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK-64
8 define i32 @test_f16_oeq_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
9 ; SSE2-LABEL: test_f16_oeq_q:
11 ; SSE2-NEXT: pushq %rbp
12 ; SSE2-NEXT: pushq %rbx
13 ; SSE2-NEXT: pushq %rax
14 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
15 ; SSE2-NEXT: movl %esi, %ebx
16 ; SSE2-NEXT: movl %edi, %ebp
17 ; SSE2-NEXT: movaps %xmm1, %xmm0
18 ; SSE2-NEXT: callq __extendhfsf2@PLT
19 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
20 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
21 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
22 ; SSE2-NEXT: callq __extendhfsf2@PLT
23 ; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
24 ; SSE2-NEXT: cmovnel %ebx, %ebp
25 ; SSE2-NEXT: cmovpl %ebx, %ebp
26 ; SSE2-NEXT: movl %ebp, %eax
27 ; SSE2-NEXT: addq $8, %rsp
28 ; SSE2-NEXT: popq %rbx
29 ; SSE2-NEXT: popq %rbp
32 ; AVX-LABEL: test_f16_oeq_q:
34 ; AVX-NEXT: movl %edi, %eax
35 ; AVX-NEXT: vpextrw $0, %xmm0, %ecx
36 ; AVX-NEXT: vpextrw $0, %xmm1, %edx
37 ; AVX-NEXT: movzwl %dx, %edx
38 ; AVX-NEXT: vmovd %edx, %xmm0
39 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
40 ; AVX-NEXT: movzwl %cx, %ecx
41 ; AVX-NEXT: vmovd %ecx, %xmm1
42 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
43 ; AVX-NEXT: vucomiss %xmm0, %xmm1
44 ; AVX-NEXT: cmovnel %esi, %eax
45 ; AVX-NEXT: cmovpl %esi, %eax
48 ; CHECK-32-LABEL: test_f16_oeq_q:
50 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
51 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0
52 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
53 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
54 ; CHECK-32-NEXT: cmovnel %eax, %ecx
55 ; CHECK-32-NEXT: cmovpl %eax, %ecx
56 ; CHECK-32-NEXT: movl (%ecx), %eax
59 ; CHECK-64-LABEL: test_f16_oeq_q:
61 ; CHECK-64-NEXT: movl %edi, %eax
62 ; CHECK-64-NEXT: vucomish %xmm1, %xmm0
63 ; CHECK-64-NEXT: cmovnel %esi, %eax
64 ; CHECK-64-NEXT: cmovpl %esi, %eax
66 %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
67 half %f1, half %f2, metadata !"oeq",
68 metadata !"fpexcept.strict") #0
69 %res = select i1 %cond, i32 %a, i32 %b
73 define i32 @test_f16_ogt_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
74 ; SSE2-LABEL: test_f16_ogt_q:
76 ; SSE2-NEXT: pushq %rbp
77 ; SSE2-NEXT: pushq %rbx
78 ; SSE2-NEXT: pushq %rax
79 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
80 ; SSE2-NEXT: movl %esi, %ebx
81 ; SSE2-NEXT: movl %edi, %ebp
82 ; SSE2-NEXT: movaps %xmm1, %xmm0
83 ; SSE2-NEXT: callq __extendhfsf2@PLT
84 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
85 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
86 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
87 ; SSE2-NEXT: callq __extendhfsf2@PLT
88 ; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
89 ; SSE2-NEXT: cmovbel %ebx, %ebp
90 ; SSE2-NEXT: movl %ebp, %eax
91 ; SSE2-NEXT: addq $8, %rsp
92 ; SSE2-NEXT: popq %rbx
93 ; SSE2-NEXT: popq %rbp
96 ; AVX-LABEL: test_f16_ogt_q:
98 ; AVX-NEXT: movl %edi, %eax
99 ; AVX-NEXT: vpextrw $0, %xmm0, %ecx
100 ; AVX-NEXT: vpextrw $0, %xmm1, %edx
101 ; AVX-NEXT: movzwl %dx, %edx
102 ; AVX-NEXT: vmovd %edx, %xmm0
103 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
104 ; AVX-NEXT: movzwl %cx, %ecx
105 ; AVX-NEXT: vmovd %ecx, %xmm1
106 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
107 ; AVX-NEXT: vucomiss %xmm0, %xmm1
108 ; AVX-NEXT: cmovbel %esi, %eax
111 ; CHECK-32-LABEL: test_f16_ogt_q:
113 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
114 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0
115 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
116 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
117 ; CHECK-32-NEXT: cmoval %eax, %ecx
118 ; CHECK-32-NEXT: movl (%ecx), %eax
119 ; CHECK-32-NEXT: retl
121 ; CHECK-64-LABEL: test_f16_ogt_q:
123 ; CHECK-64-NEXT: movl %edi, %eax
124 ; CHECK-64-NEXT: vucomish %xmm1, %xmm0
125 ; CHECK-64-NEXT: cmovbel %esi, %eax
126 ; CHECK-64-NEXT: retq
127 %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
128 half %f1, half %f2, metadata !"ogt",
129 metadata !"fpexcept.strict") #0
130 %res = select i1 %cond, i32 %a, i32 %b
134 define i32 @test_f16_oge_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
135 ; SSE2-LABEL: test_f16_oge_q:
137 ; SSE2-NEXT: pushq %rbp
138 ; SSE2-NEXT: pushq %rbx
139 ; SSE2-NEXT: pushq %rax
140 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
141 ; SSE2-NEXT: movl %esi, %ebx
142 ; SSE2-NEXT: movl %edi, %ebp
143 ; SSE2-NEXT: movaps %xmm1, %xmm0
144 ; SSE2-NEXT: callq __extendhfsf2@PLT
145 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
146 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
147 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
148 ; SSE2-NEXT: callq __extendhfsf2@PLT
149 ; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
150 ; SSE2-NEXT: cmovbl %ebx, %ebp
151 ; SSE2-NEXT: movl %ebp, %eax
152 ; SSE2-NEXT: addq $8, %rsp
153 ; SSE2-NEXT: popq %rbx
154 ; SSE2-NEXT: popq %rbp
157 ; AVX-LABEL: test_f16_oge_q:
159 ; AVX-NEXT: movl %edi, %eax
160 ; AVX-NEXT: vpextrw $0, %xmm0, %ecx
161 ; AVX-NEXT: vpextrw $0, %xmm1, %edx
162 ; AVX-NEXT: movzwl %dx, %edx
163 ; AVX-NEXT: vmovd %edx, %xmm0
164 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
165 ; AVX-NEXT: movzwl %cx, %ecx
166 ; AVX-NEXT: vmovd %ecx, %xmm1
167 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
168 ; AVX-NEXT: vucomiss %xmm0, %xmm1
169 ; AVX-NEXT: cmovbl %esi, %eax
172 ; CHECK-32-LABEL: test_f16_oge_q:
174 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
175 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0
176 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
177 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
178 ; CHECK-32-NEXT: cmovael %eax, %ecx
179 ; CHECK-32-NEXT: movl (%ecx), %eax
180 ; CHECK-32-NEXT: retl
182 ; CHECK-64-LABEL: test_f16_oge_q:
184 ; CHECK-64-NEXT: movl %edi, %eax
185 ; CHECK-64-NEXT: vucomish %xmm1, %xmm0
186 ; CHECK-64-NEXT: cmovbl %esi, %eax
187 ; CHECK-64-NEXT: retq
188 %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
189 half %f1, half %f2, metadata !"oge",
190 metadata !"fpexcept.strict") #0
191 %res = select i1 %cond, i32 %a, i32 %b
195 define i32 @test_f16_olt_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
196 ; SSE2-LABEL: test_f16_olt_q:
198 ; SSE2-NEXT: pushq %rbp
199 ; SSE2-NEXT: pushq %rbx
200 ; SSE2-NEXT: pushq %rax
201 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
202 ; SSE2-NEXT: movl %esi, %ebx
203 ; SSE2-NEXT: movl %edi, %ebp
204 ; SSE2-NEXT: movaps %xmm1, %xmm0
205 ; SSE2-NEXT: callq __extendhfsf2@PLT
206 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
207 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
208 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
209 ; SSE2-NEXT: callq __extendhfsf2@PLT
210 ; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload
211 ; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero
212 ; SSE2-NEXT: ucomiss %xmm0, %xmm1
213 ; SSE2-NEXT: cmovbel %ebx, %ebp
214 ; SSE2-NEXT: movl %ebp, %eax
215 ; SSE2-NEXT: addq $8, %rsp
216 ; SSE2-NEXT: popq %rbx
217 ; SSE2-NEXT: popq %rbp
220 ; AVX-LABEL: test_f16_olt_q:
222 ; AVX-NEXT: movl %edi, %eax
223 ; AVX-NEXT: vpextrw $0, %xmm1, %ecx
224 ; AVX-NEXT: vpextrw $0, %xmm0, %edx
225 ; AVX-NEXT: movzwl %dx, %edx
226 ; AVX-NEXT: vmovd %edx, %xmm0
227 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
228 ; AVX-NEXT: movzwl %cx, %ecx
229 ; AVX-NEXT: vmovd %ecx, %xmm1
230 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
231 ; AVX-NEXT: vucomiss %xmm0, %xmm1
232 ; AVX-NEXT: cmovbel %esi, %eax
235 ; CHECK-32-LABEL: test_f16_olt_q:
237 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
238 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0
239 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
240 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
241 ; CHECK-32-NEXT: cmoval %eax, %ecx
242 ; CHECK-32-NEXT: movl (%ecx), %eax
243 ; CHECK-32-NEXT: retl
245 ; CHECK-64-LABEL: test_f16_olt_q:
247 ; CHECK-64-NEXT: movl %edi, %eax
248 ; CHECK-64-NEXT: vucomish %xmm0, %xmm1
249 ; CHECK-64-NEXT: cmovbel %esi, %eax
250 ; CHECK-64-NEXT: retq
251 %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
252 half %f1, half %f2, metadata !"olt",
253 metadata !"fpexcept.strict") #0
254 %res = select i1 %cond, i32 %a, i32 %b
258 define i32 @test_f16_ole_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
259 ; SSE2-LABEL: test_f16_ole_q:
261 ; SSE2-NEXT: pushq %rbp
262 ; SSE2-NEXT: pushq %rbx
263 ; SSE2-NEXT: pushq %rax
264 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
265 ; SSE2-NEXT: movl %esi, %ebx
266 ; SSE2-NEXT: movl %edi, %ebp
267 ; SSE2-NEXT: movaps %xmm1, %xmm0
268 ; SSE2-NEXT: callq __extendhfsf2@PLT
269 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
270 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
271 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
272 ; SSE2-NEXT: callq __extendhfsf2@PLT
273 ; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload
274 ; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero
275 ; SSE2-NEXT: ucomiss %xmm0, %xmm1
276 ; SSE2-NEXT: cmovbl %ebx, %ebp
277 ; SSE2-NEXT: movl %ebp, %eax
278 ; SSE2-NEXT: addq $8, %rsp
279 ; SSE2-NEXT: popq %rbx
280 ; SSE2-NEXT: popq %rbp
283 ; AVX-LABEL: test_f16_ole_q:
285 ; AVX-NEXT: movl %edi, %eax
286 ; AVX-NEXT: vpextrw $0, %xmm1, %ecx
287 ; AVX-NEXT: vpextrw $0, %xmm0, %edx
288 ; AVX-NEXT: movzwl %dx, %edx
289 ; AVX-NEXT: vmovd %edx, %xmm0
290 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
291 ; AVX-NEXT: movzwl %cx, %ecx
292 ; AVX-NEXT: vmovd %ecx, %xmm1
293 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
294 ; AVX-NEXT: vucomiss %xmm0, %xmm1
295 ; AVX-NEXT: cmovbl %esi, %eax
298 ; CHECK-32-LABEL: test_f16_ole_q:
300 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
301 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0
302 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
303 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
304 ; CHECK-32-NEXT: cmovael %eax, %ecx
305 ; CHECK-32-NEXT: movl (%ecx), %eax
306 ; CHECK-32-NEXT: retl
308 ; CHECK-64-LABEL: test_f16_ole_q:
310 ; CHECK-64-NEXT: movl %edi, %eax
311 ; CHECK-64-NEXT: vucomish %xmm0, %xmm1
312 ; CHECK-64-NEXT: cmovbl %esi, %eax
313 ; CHECK-64-NEXT: retq
314 %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
315 half %f1, half %f2, metadata !"ole",
316 metadata !"fpexcept.strict") #0
317 %res = select i1 %cond, i32 %a, i32 %b
321 define i32 @test_f16_one_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
322 ; SSE2-LABEL: test_f16_one_q:
324 ; SSE2-NEXT: pushq %rbp
325 ; SSE2-NEXT: pushq %rbx
326 ; SSE2-NEXT: pushq %rax
327 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
328 ; SSE2-NEXT: movl %esi, %ebx
329 ; SSE2-NEXT: movl %edi, %ebp
330 ; SSE2-NEXT: movaps %xmm1, %xmm0
331 ; SSE2-NEXT: callq __extendhfsf2@PLT
332 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
333 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
334 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
335 ; SSE2-NEXT: callq __extendhfsf2@PLT
336 ; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
337 ; SSE2-NEXT: cmovel %ebx, %ebp
338 ; SSE2-NEXT: movl %ebp, %eax
339 ; SSE2-NEXT: addq $8, %rsp
340 ; SSE2-NEXT: popq %rbx
341 ; SSE2-NEXT: popq %rbp
344 ; AVX-LABEL: test_f16_one_q:
346 ; AVX-NEXT: movl %edi, %eax
347 ; AVX-NEXT: vpextrw $0, %xmm0, %ecx
348 ; AVX-NEXT: vpextrw $0, %xmm1, %edx
349 ; AVX-NEXT: movzwl %dx, %edx
350 ; AVX-NEXT: vmovd %edx, %xmm0
351 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
352 ; AVX-NEXT: movzwl %cx, %ecx
353 ; AVX-NEXT: vmovd %ecx, %xmm1
354 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
355 ; AVX-NEXT: vucomiss %xmm0, %xmm1
356 ; AVX-NEXT: cmovel %esi, %eax
359 ; CHECK-32-LABEL: test_f16_one_q:
361 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
362 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0
363 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
364 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
365 ; CHECK-32-NEXT: cmovnel %eax, %ecx
366 ; CHECK-32-NEXT: movl (%ecx), %eax
367 ; CHECK-32-NEXT: retl
369 ; CHECK-64-LABEL: test_f16_one_q:
371 ; CHECK-64-NEXT: movl %edi, %eax
372 ; CHECK-64-NEXT: vucomish %xmm1, %xmm0
373 ; CHECK-64-NEXT: cmovel %esi, %eax
374 ; CHECK-64-NEXT: retq
375 %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
376 half %f1, half %f2, metadata !"one",
377 metadata !"fpexcept.strict") #0
378 %res = select i1 %cond, i32 %a, i32 %b
382 define i32 @test_f16_ord_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
383 ; SSE2-LABEL: test_f16_ord_q:
385 ; SSE2-NEXT: pushq %rbp
386 ; SSE2-NEXT: pushq %rbx
387 ; SSE2-NEXT: pushq %rax
388 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
389 ; SSE2-NEXT: movl %esi, %ebx
390 ; SSE2-NEXT: movl %edi, %ebp
391 ; SSE2-NEXT: movaps %xmm1, %xmm0
392 ; SSE2-NEXT: callq __extendhfsf2@PLT
393 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
394 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
395 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
396 ; SSE2-NEXT: callq __extendhfsf2@PLT
397 ; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
398 ; SSE2-NEXT: cmovpl %ebx, %ebp
399 ; SSE2-NEXT: movl %ebp, %eax
400 ; SSE2-NEXT: addq $8, %rsp
401 ; SSE2-NEXT: popq %rbx
402 ; SSE2-NEXT: popq %rbp
405 ; AVX-LABEL: test_f16_ord_q:
407 ; AVX-NEXT: movl %edi, %eax
408 ; AVX-NEXT: vpextrw $0, %xmm0, %ecx
409 ; AVX-NEXT: vpextrw $0, %xmm1, %edx
410 ; AVX-NEXT: movzwl %dx, %edx
411 ; AVX-NEXT: vmovd %edx, %xmm0
412 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
413 ; AVX-NEXT: movzwl %cx, %ecx
414 ; AVX-NEXT: vmovd %ecx, %xmm1
415 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
416 ; AVX-NEXT: vucomiss %xmm0, %xmm1
417 ; AVX-NEXT: cmovpl %esi, %eax
420 ; CHECK-32-LABEL: test_f16_ord_q:
422 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
423 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0
424 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
425 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
426 ; CHECK-32-NEXT: cmovnpl %eax, %ecx
427 ; CHECK-32-NEXT: movl (%ecx), %eax
428 ; CHECK-32-NEXT: retl
430 ; CHECK-64-LABEL: test_f16_ord_q:
432 ; CHECK-64-NEXT: movl %edi, %eax
433 ; CHECK-64-NEXT: vucomish %xmm1, %xmm0
434 ; CHECK-64-NEXT: cmovpl %esi, %eax
435 ; CHECK-64-NEXT: retq
436 %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
437 half %f1, half %f2, metadata !"ord",
438 metadata !"fpexcept.strict") #0
439 %res = select i1 %cond, i32 %a, i32 %b
443 define i32 @test_f16_ueq_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
444 ; SSE2-LABEL: test_f16_ueq_q:
446 ; SSE2-NEXT: pushq %rbp
447 ; SSE2-NEXT: pushq %rbx
448 ; SSE2-NEXT: pushq %rax
449 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
450 ; SSE2-NEXT: movl %esi, %ebx
451 ; SSE2-NEXT: movl %edi, %ebp
452 ; SSE2-NEXT: movaps %xmm1, %xmm0
453 ; SSE2-NEXT: callq __extendhfsf2@PLT
454 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
455 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
456 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
457 ; SSE2-NEXT: callq __extendhfsf2@PLT
458 ; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
459 ; SSE2-NEXT: cmovnel %ebx, %ebp
460 ; SSE2-NEXT: movl %ebp, %eax
461 ; SSE2-NEXT: addq $8, %rsp
462 ; SSE2-NEXT: popq %rbx
463 ; SSE2-NEXT: popq %rbp
466 ; AVX-LABEL: test_f16_ueq_q:
468 ; AVX-NEXT: movl %edi, %eax
469 ; AVX-NEXT: vpextrw $0, %xmm0, %ecx
470 ; AVX-NEXT: vpextrw $0, %xmm1, %edx
471 ; AVX-NEXT: movzwl %dx, %edx
472 ; AVX-NEXT: vmovd %edx, %xmm0
473 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
474 ; AVX-NEXT: movzwl %cx, %ecx
475 ; AVX-NEXT: vmovd %ecx, %xmm1
476 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
477 ; AVX-NEXT: vucomiss %xmm0, %xmm1
478 ; AVX-NEXT: cmovnel %esi, %eax
481 ; CHECK-32-LABEL: test_f16_ueq_q:
483 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
484 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0
485 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
486 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
487 ; CHECK-32-NEXT: cmovel %eax, %ecx
488 ; CHECK-32-NEXT: movl (%ecx), %eax
489 ; CHECK-32-NEXT: retl
491 ; CHECK-64-LABEL: test_f16_ueq_q:
493 ; CHECK-64-NEXT: movl %edi, %eax
494 ; CHECK-64-NEXT: vucomish %xmm1, %xmm0
495 ; CHECK-64-NEXT: cmovnel %esi, %eax
496 ; CHECK-64-NEXT: retq
497 %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
498 half %f1, half %f2, metadata !"ueq",
499 metadata !"fpexcept.strict") #0
500 %res = select i1 %cond, i32 %a, i32 %b
504 define i32 @test_f16_ugt_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
505 ; SSE2-LABEL: test_f16_ugt_q:
507 ; SSE2-NEXT: pushq %rbp
508 ; SSE2-NEXT: pushq %rbx
509 ; SSE2-NEXT: pushq %rax
510 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
511 ; SSE2-NEXT: movl %esi, %ebx
512 ; SSE2-NEXT: movl %edi, %ebp
513 ; SSE2-NEXT: movaps %xmm1, %xmm0
514 ; SSE2-NEXT: callq __extendhfsf2@PLT
515 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
516 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
517 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
518 ; SSE2-NEXT: callq __extendhfsf2@PLT
519 ; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload
520 ; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero
521 ; SSE2-NEXT: ucomiss %xmm0, %xmm1
522 ; SSE2-NEXT: cmovael %ebx, %ebp
523 ; SSE2-NEXT: movl %ebp, %eax
524 ; SSE2-NEXT: addq $8, %rsp
525 ; SSE2-NEXT: popq %rbx
526 ; SSE2-NEXT: popq %rbp
529 ; AVX-LABEL: test_f16_ugt_q:
531 ; AVX-NEXT: movl %edi, %eax
532 ; AVX-NEXT: vpextrw $0, %xmm1, %ecx
533 ; AVX-NEXT: vpextrw $0, %xmm0, %edx
534 ; AVX-NEXT: movzwl %dx, %edx
535 ; AVX-NEXT: vmovd %edx, %xmm0
536 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
537 ; AVX-NEXT: movzwl %cx, %ecx
538 ; AVX-NEXT: vmovd %ecx, %xmm1
539 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
540 ; AVX-NEXT: vucomiss %xmm0, %xmm1
541 ; AVX-NEXT: cmovael %esi, %eax
544 ; CHECK-32-LABEL: test_f16_ugt_q:
546 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
547 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0
548 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
549 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
550 ; CHECK-32-NEXT: cmovbl %eax, %ecx
551 ; CHECK-32-NEXT: movl (%ecx), %eax
552 ; CHECK-32-NEXT: retl
554 ; CHECK-64-LABEL: test_f16_ugt_q:
556 ; CHECK-64-NEXT: movl %edi, %eax
557 ; CHECK-64-NEXT: vucomish %xmm0, %xmm1
558 ; CHECK-64-NEXT: cmovael %esi, %eax
559 ; CHECK-64-NEXT: retq
560 %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
561 half %f1, half %f2, metadata !"ugt",
562 metadata !"fpexcept.strict") #0
563 %res = select i1 %cond, i32 %a, i32 %b
567 define i32 @test_f16_uge_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
568 ; SSE2-LABEL: test_f16_uge_q:
570 ; SSE2-NEXT: pushq %rbp
571 ; SSE2-NEXT: pushq %rbx
572 ; SSE2-NEXT: pushq %rax
573 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
574 ; SSE2-NEXT: movl %esi, %ebx
575 ; SSE2-NEXT: movl %edi, %ebp
576 ; SSE2-NEXT: movaps %xmm1, %xmm0
577 ; SSE2-NEXT: callq __extendhfsf2@PLT
578 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
579 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
580 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
581 ; SSE2-NEXT: callq __extendhfsf2@PLT
582 ; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload
583 ; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero
584 ; SSE2-NEXT: ucomiss %xmm0, %xmm1
585 ; SSE2-NEXT: cmoval %ebx, %ebp
586 ; SSE2-NEXT: movl %ebp, %eax
587 ; SSE2-NEXT: addq $8, %rsp
588 ; SSE2-NEXT: popq %rbx
589 ; SSE2-NEXT: popq %rbp
592 ; AVX-LABEL: test_f16_uge_q:
594 ; AVX-NEXT: movl %edi, %eax
595 ; AVX-NEXT: vpextrw $0, %xmm1, %ecx
596 ; AVX-NEXT: vpextrw $0, %xmm0, %edx
597 ; AVX-NEXT: movzwl %dx, %edx
598 ; AVX-NEXT: vmovd %edx, %xmm0
599 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
600 ; AVX-NEXT: movzwl %cx, %ecx
601 ; AVX-NEXT: vmovd %ecx, %xmm1
602 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
603 ; AVX-NEXT: vucomiss %xmm0, %xmm1
604 ; AVX-NEXT: cmoval %esi, %eax
607 ; CHECK-32-LABEL: test_f16_uge_q:
609 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
610 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0
611 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
612 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
613 ; CHECK-32-NEXT: cmovbel %eax, %ecx
614 ; CHECK-32-NEXT: movl (%ecx), %eax
615 ; CHECK-32-NEXT: retl
617 ; CHECK-64-LABEL: test_f16_uge_q:
619 ; CHECK-64-NEXT: movl %edi, %eax
620 ; CHECK-64-NEXT: vucomish %xmm0, %xmm1
621 ; CHECK-64-NEXT: cmoval %esi, %eax
622 ; CHECK-64-NEXT: retq
623 %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
624 half %f1, half %f2, metadata !"uge",
625 metadata !"fpexcept.strict") #0
626 %res = select i1 %cond, i32 %a, i32 %b
630 define i32 @test_f16_ult_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
631 ; SSE2-LABEL: test_f16_ult_q:
633 ; SSE2-NEXT: pushq %rbp
634 ; SSE2-NEXT: pushq %rbx
635 ; SSE2-NEXT: pushq %rax
636 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
637 ; SSE2-NEXT: movl %esi, %ebx
638 ; SSE2-NEXT: movl %edi, %ebp
639 ; SSE2-NEXT: movaps %xmm1, %xmm0
640 ; SSE2-NEXT: callq __extendhfsf2@PLT
641 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
642 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
643 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
644 ; SSE2-NEXT: callq __extendhfsf2@PLT
645 ; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
646 ; SSE2-NEXT: cmovael %ebx, %ebp
647 ; SSE2-NEXT: movl %ebp, %eax
648 ; SSE2-NEXT: addq $8, %rsp
649 ; SSE2-NEXT: popq %rbx
650 ; SSE2-NEXT: popq %rbp
653 ; AVX-LABEL: test_f16_ult_q:
655 ; AVX-NEXT: movl %edi, %eax
656 ; AVX-NEXT: vpextrw $0, %xmm0, %ecx
657 ; AVX-NEXT: vpextrw $0, %xmm1, %edx
658 ; AVX-NEXT: movzwl %dx, %edx
659 ; AVX-NEXT: vmovd %edx, %xmm0
660 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
661 ; AVX-NEXT: movzwl %cx, %ecx
662 ; AVX-NEXT: vmovd %ecx, %xmm1
663 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
664 ; AVX-NEXT: vucomiss %xmm0, %xmm1
665 ; AVX-NEXT: cmovael %esi, %eax
668 ; CHECK-32-LABEL: test_f16_ult_q:
670 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
671 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0
672 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
673 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
674 ; CHECK-32-NEXT: cmovbl %eax, %ecx
675 ; CHECK-32-NEXT: movl (%ecx), %eax
676 ; CHECK-32-NEXT: retl
678 ; CHECK-64-LABEL: test_f16_ult_q:
680 ; CHECK-64-NEXT: movl %edi, %eax
681 ; CHECK-64-NEXT: vucomish %xmm1, %xmm0
682 ; CHECK-64-NEXT: cmovael %esi, %eax
683 ; CHECK-64-NEXT: retq
684 %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
685 half %f1, half %f2, metadata !"ult",
686 metadata !"fpexcept.strict") #0
687 %res = select i1 %cond, i32 %a, i32 %b
691 define i32 @test_f16_ule_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
692 ; SSE2-LABEL: test_f16_ule_q:
694 ; SSE2-NEXT: pushq %rbp
695 ; SSE2-NEXT: pushq %rbx
696 ; SSE2-NEXT: pushq %rax
697 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
698 ; SSE2-NEXT: movl %esi, %ebx
699 ; SSE2-NEXT: movl %edi, %ebp
700 ; SSE2-NEXT: movaps %xmm1, %xmm0
701 ; SSE2-NEXT: callq __extendhfsf2@PLT
702 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
703 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
704 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
705 ; SSE2-NEXT: callq __extendhfsf2@PLT
706 ; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
707 ; SSE2-NEXT: cmoval %ebx, %ebp
708 ; SSE2-NEXT: movl %ebp, %eax
709 ; SSE2-NEXT: addq $8, %rsp
710 ; SSE2-NEXT: popq %rbx
711 ; SSE2-NEXT: popq %rbp
714 ; AVX-LABEL: test_f16_ule_q:
716 ; AVX-NEXT: movl %edi, %eax
717 ; AVX-NEXT: vpextrw $0, %xmm0, %ecx
718 ; AVX-NEXT: vpextrw $0, %xmm1, %edx
719 ; AVX-NEXT: movzwl %dx, %edx
720 ; AVX-NEXT: vmovd %edx, %xmm0
721 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
722 ; AVX-NEXT: movzwl %cx, %ecx
723 ; AVX-NEXT: vmovd %ecx, %xmm1
724 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
725 ; AVX-NEXT: vucomiss %xmm0, %xmm1
726 ; AVX-NEXT: cmoval %esi, %eax
729 ; CHECK-32-LABEL: test_f16_ule_q:
731 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
732 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0
733 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
734 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
735 ; CHECK-32-NEXT: cmovbel %eax, %ecx
736 ; CHECK-32-NEXT: movl (%ecx), %eax
737 ; CHECK-32-NEXT: retl
739 ; CHECK-64-LABEL: test_f16_ule_q:
741 ; CHECK-64-NEXT: movl %edi, %eax
742 ; CHECK-64-NEXT: vucomish %xmm1, %xmm0
743 ; CHECK-64-NEXT: cmoval %esi, %eax
744 ; CHECK-64-NEXT: retq
745 %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
746 half %f1, half %f2, metadata !"ule",
747 metadata !"fpexcept.strict") #0
748 %res = select i1 %cond, i32 %a, i32 %b
752 define i32 @test_f16_une_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
753 ; SSE2-LABEL: test_f16_une_q:
755 ; SSE2-NEXT: pushq %rbp
756 ; SSE2-NEXT: pushq %rbx
757 ; SSE2-NEXT: pushq %rax
758 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
759 ; SSE2-NEXT: movl %esi, %ebx
760 ; SSE2-NEXT: movl %edi, %ebp
761 ; SSE2-NEXT: movaps %xmm1, %xmm0
762 ; SSE2-NEXT: callq __extendhfsf2@PLT
763 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
764 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
765 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
766 ; SSE2-NEXT: callq __extendhfsf2@PLT
767 ; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
768 ; SSE2-NEXT: cmovnel %ebp, %ebx
769 ; SSE2-NEXT: cmovpl %ebp, %ebx
770 ; SSE2-NEXT: movl %ebx, %eax
771 ; SSE2-NEXT: addq $8, %rsp
772 ; SSE2-NEXT: popq %rbx
773 ; SSE2-NEXT: popq %rbp
776 ; AVX-LABEL: test_f16_une_q:
778 ; AVX-NEXT: movl %esi, %eax
779 ; AVX-NEXT: vpextrw $0, %xmm0, %ecx
780 ; AVX-NEXT: vpextrw $0, %xmm1, %edx
781 ; AVX-NEXT: movzwl %dx, %edx
782 ; AVX-NEXT: vmovd %edx, %xmm0
783 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
784 ; AVX-NEXT: movzwl %cx, %ecx
785 ; AVX-NEXT: vmovd %ecx, %xmm1
786 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
787 ; AVX-NEXT: vucomiss %xmm0, %xmm1
788 ; AVX-NEXT: cmovnel %edi, %eax
789 ; AVX-NEXT: cmovpl %edi, %eax
792 ; CHECK-32-LABEL: test_f16_une_q:
794 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
795 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0
796 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
797 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
798 ; CHECK-32-NEXT: cmovnel %eax, %ecx
799 ; CHECK-32-NEXT: cmovpl %eax, %ecx
800 ; CHECK-32-NEXT: movl (%ecx), %eax
801 ; CHECK-32-NEXT: retl
803 ; CHECK-64-LABEL: test_f16_une_q:
805 ; CHECK-64-NEXT: movl %esi, %eax
806 ; CHECK-64-NEXT: vucomish %xmm1, %xmm0
807 ; CHECK-64-NEXT: cmovnel %edi, %eax
808 ; CHECK-64-NEXT: cmovpl %edi, %eax
809 ; CHECK-64-NEXT: retq
810 %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
811 half %f1, half %f2, metadata !"une",
812 metadata !"fpexcept.strict") #0
813 %res = select i1 %cond, i32 %a, i32 %b
817 define i32 @test_f16_uno_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
818 ; SSE2-LABEL: test_f16_uno_q:
820 ; SSE2-NEXT: pushq %rbp
821 ; SSE2-NEXT: pushq %rbx
822 ; SSE2-NEXT: pushq %rax
823 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
824 ; SSE2-NEXT: movl %esi, %ebx
825 ; SSE2-NEXT: movl %edi, %ebp
826 ; SSE2-NEXT: movaps %xmm1, %xmm0
827 ; SSE2-NEXT: callq __extendhfsf2@PLT
828 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
829 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
830 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
831 ; SSE2-NEXT: callq __extendhfsf2@PLT
832 ; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
833 ; SSE2-NEXT: cmovnpl %ebx, %ebp
834 ; SSE2-NEXT: movl %ebp, %eax
835 ; SSE2-NEXT: addq $8, %rsp
836 ; SSE2-NEXT: popq %rbx
837 ; SSE2-NEXT: popq %rbp
840 ; AVX-LABEL: test_f16_uno_q:
842 ; AVX-NEXT: movl %edi, %eax
843 ; AVX-NEXT: vpextrw $0, %xmm0, %ecx
844 ; AVX-NEXT: vpextrw $0, %xmm1, %edx
845 ; AVX-NEXT: movzwl %dx, %edx
846 ; AVX-NEXT: vmovd %edx, %xmm0
847 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
848 ; AVX-NEXT: movzwl %cx, %ecx
849 ; AVX-NEXT: vmovd %ecx, %xmm1
850 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
851 ; AVX-NEXT: vucomiss %xmm0, %xmm1
852 ; AVX-NEXT: cmovnpl %esi, %eax
855 ; CHECK-32-LABEL: test_f16_uno_q:
857 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
858 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0
859 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
860 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
861 ; CHECK-32-NEXT: cmovpl %eax, %ecx
862 ; CHECK-32-NEXT: movl (%ecx), %eax
863 ; CHECK-32-NEXT: retl
865 ; CHECK-64-LABEL: test_f16_uno_q:
867 ; CHECK-64-NEXT: movl %edi, %eax
868 ; CHECK-64-NEXT: vucomish %xmm1, %xmm0
869 ; CHECK-64-NEXT: cmovnpl %esi, %eax
870 ; CHECK-64-NEXT: retq
871 %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
872 half %f1, half %f2, metadata !"uno",
873 metadata !"fpexcept.strict") #0
874 %res = select i1 %cond, i32 %a, i32 %b
878 define i32 @test_f16_oeq_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
879 ; SSE2-LABEL: test_f16_oeq_s:
881 ; SSE2-NEXT: pushq %rbp
882 ; SSE2-NEXT: pushq %rbx
883 ; SSE2-NEXT: pushq %rax
884 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
885 ; SSE2-NEXT: movl %esi, %ebx
886 ; SSE2-NEXT: movl %edi, %ebp
887 ; SSE2-NEXT: movaps %xmm1, %xmm0
888 ; SSE2-NEXT: callq __extendhfsf2@PLT
889 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
890 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
891 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
892 ; SSE2-NEXT: callq __extendhfsf2@PLT
893 ; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload
894 ; SSE2-NEXT: cmovnel %ebx, %ebp
895 ; SSE2-NEXT: cmovpl %ebx, %ebp
896 ; SSE2-NEXT: movl %ebp, %eax
897 ; SSE2-NEXT: addq $8, %rsp
898 ; SSE2-NEXT: popq %rbx
899 ; SSE2-NEXT: popq %rbp
902 ; AVX-LABEL: test_f16_oeq_s:
904 ; AVX-NEXT: movl %edi, %eax
905 ; AVX-NEXT: vpextrw $0, %xmm0, %ecx
906 ; AVX-NEXT: vpextrw $0, %xmm1, %edx
907 ; AVX-NEXT: movzwl %dx, %edx
908 ; AVX-NEXT: vmovd %edx, %xmm0
909 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
910 ; AVX-NEXT: movzwl %cx, %ecx
911 ; AVX-NEXT: vmovd %ecx, %xmm1
912 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
913 ; AVX-NEXT: vcomiss %xmm0, %xmm1
914 ; AVX-NEXT: cmovnel %esi, %eax
915 ; AVX-NEXT: cmovpl %esi, %eax
918 ; CHECK-32-LABEL: test_f16_oeq_s:
920 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
921 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0
922 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
923 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
924 ; CHECK-32-NEXT: cmovnel %eax, %ecx
925 ; CHECK-32-NEXT: cmovpl %eax, %ecx
926 ; CHECK-32-NEXT: movl (%ecx), %eax
927 ; CHECK-32-NEXT: retl
929 ; CHECK-64-LABEL: test_f16_oeq_s:
931 ; CHECK-64-NEXT: movl %edi, %eax
932 ; CHECK-64-NEXT: vcomish %xmm1, %xmm0
933 ; CHECK-64-NEXT: cmovnel %esi, %eax
934 ; CHECK-64-NEXT: cmovpl %esi, %eax
935 ; CHECK-64-NEXT: retq
936 %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
937 half %f1, half %f2, metadata !"oeq",
938 metadata !"fpexcept.strict") #0
939 %res = select i1 %cond, i32 %a, i32 %b
943 define i32 @test_f16_ogt_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
944 ; SSE2-LABEL: test_f16_ogt_s:
946 ; SSE2-NEXT: pushq %rbp
947 ; SSE2-NEXT: pushq %rbx
948 ; SSE2-NEXT: pushq %rax
949 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
950 ; SSE2-NEXT: movl %esi, %ebx
951 ; SSE2-NEXT: movl %edi, %ebp
952 ; SSE2-NEXT: movaps %xmm1, %xmm0
953 ; SSE2-NEXT: callq __extendhfsf2@PLT
954 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
955 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
956 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
957 ; SSE2-NEXT: callq __extendhfsf2@PLT
958 ; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload
959 ; SSE2-NEXT: cmovbel %ebx, %ebp
960 ; SSE2-NEXT: movl %ebp, %eax
961 ; SSE2-NEXT: addq $8, %rsp
962 ; SSE2-NEXT: popq %rbx
963 ; SSE2-NEXT: popq %rbp
966 ; AVX-LABEL: test_f16_ogt_s:
968 ; AVX-NEXT: movl %edi, %eax
969 ; AVX-NEXT: vpextrw $0, %xmm0, %ecx
970 ; AVX-NEXT: vpextrw $0, %xmm1, %edx
971 ; AVX-NEXT: movzwl %dx, %edx
972 ; AVX-NEXT: vmovd %edx, %xmm0
973 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
974 ; AVX-NEXT: movzwl %cx, %ecx
975 ; AVX-NEXT: vmovd %ecx, %xmm1
976 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
977 ; AVX-NEXT: vcomiss %xmm0, %xmm1
978 ; AVX-NEXT: cmovbel %esi, %eax
981 ; CHECK-32-LABEL: test_f16_ogt_s:
983 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
984 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0
985 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
986 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
987 ; CHECK-32-NEXT: cmoval %eax, %ecx
988 ; CHECK-32-NEXT: movl (%ecx), %eax
989 ; CHECK-32-NEXT: retl
991 ; CHECK-64-LABEL: test_f16_ogt_s:
993 ; CHECK-64-NEXT: movl %edi, %eax
994 ; CHECK-64-NEXT: vcomish %xmm1, %xmm0
995 ; CHECK-64-NEXT: cmovbel %esi, %eax
996 ; CHECK-64-NEXT: retq
997 %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
998 half %f1, half %f2, metadata !"ogt",
999 metadata !"fpexcept.strict") #0
1000 %res = select i1 %cond, i32 %a, i32 %b
1004 define i32 @test_f16_oge_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
1005 ; SSE2-LABEL: test_f16_oge_s:
1007 ; SSE2-NEXT: pushq %rbp
1008 ; SSE2-NEXT: pushq %rbx
1009 ; SSE2-NEXT: pushq %rax
1010 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1011 ; SSE2-NEXT: movl %esi, %ebx
1012 ; SSE2-NEXT: movl %edi, %ebp
1013 ; SSE2-NEXT: movaps %xmm1, %xmm0
1014 ; SSE2-NEXT: callq __extendhfsf2@PLT
1015 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
1016 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1017 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
1018 ; SSE2-NEXT: callq __extendhfsf2@PLT
1019 ; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload
1020 ; SSE2-NEXT: cmovbl %ebx, %ebp
1021 ; SSE2-NEXT: movl %ebp, %eax
1022 ; SSE2-NEXT: addq $8, %rsp
1023 ; SSE2-NEXT: popq %rbx
1024 ; SSE2-NEXT: popq %rbp
1027 ; AVX-LABEL: test_f16_oge_s:
1029 ; AVX-NEXT: movl %edi, %eax
1030 ; AVX-NEXT: vpextrw $0, %xmm0, %ecx
1031 ; AVX-NEXT: vpextrw $0, %xmm1, %edx
1032 ; AVX-NEXT: movzwl %dx, %edx
1033 ; AVX-NEXT: vmovd %edx, %xmm0
1034 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
1035 ; AVX-NEXT: movzwl %cx, %ecx
1036 ; AVX-NEXT: vmovd %ecx, %xmm1
1037 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
1038 ; AVX-NEXT: vcomiss %xmm0, %xmm1
1039 ; AVX-NEXT: cmovbl %esi, %eax
1042 ; CHECK-32-LABEL: test_f16_oge_s:
1043 ; CHECK-32: # %bb.0:
1044 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
1045 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0
1046 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
1047 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
1048 ; CHECK-32-NEXT: cmovael %eax, %ecx
1049 ; CHECK-32-NEXT: movl (%ecx), %eax
1050 ; CHECK-32-NEXT: retl
1052 ; CHECK-64-LABEL: test_f16_oge_s:
1053 ; CHECK-64: # %bb.0:
1054 ; CHECK-64-NEXT: movl %edi, %eax
1055 ; CHECK-64-NEXT: vcomish %xmm1, %xmm0
1056 ; CHECK-64-NEXT: cmovbl %esi, %eax
1057 ; CHECK-64-NEXT: retq
1058 %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
1059 half %f1, half %f2, metadata !"oge",
1060 metadata !"fpexcept.strict") #0
1061 %res = select i1 %cond, i32 %a, i32 %b
1065 define i32 @test_f16_olt_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
1066 ; SSE2-LABEL: test_f16_olt_s:
1068 ; SSE2-NEXT: pushq %rbp
1069 ; SSE2-NEXT: pushq %rbx
1070 ; SSE2-NEXT: pushq %rax
1071 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1072 ; SSE2-NEXT: movl %esi, %ebx
1073 ; SSE2-NEXT: movl %edi, %ebp
1074 ; SSE2-NEXT: movaps %xmm1, %xmm0
1075 ; SSE2-NEXT: callq __extendhfsf2@PLT
1076 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
1077 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1078 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
1079 ; SSE2-NEXT: callq __extendhfsf2@PLT
1080 ; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload
1081 ; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero
1082 ; SSE2-NEXT: comiss %xmm0, %xmm1
1083 ; SSE2-NEXT: cmovbel %ebx, %ebp
1084 ; SSE2-NEXT: movl %ebp, %eax
1085 ; SSE2-NEXT: addq $8, %rsp
1086 ; SSE2-NEXT: popq %rbx
1087 ; SSE2-NEXT: popq %rbp
1090 ; AVX-LABEL: test_f16_olt_s:
1092 ; AVX-NEXT: movl %edi, %eax
1093 ; AVX-NEXT: vpextrw $0, %xmm1, %ecx
1094 ; AVX-NEXT: vpextrw $0, %xmm0, %edx
1095 ; AVX-NEXT: movzwl %dx, %edx
1096 ; AVX-NEXT: vmovd %edx, %xmm0
1097 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
1098 ; AVX-NEXT: movzwl %cx, %ecx
1099 ; AVX-NEXT: vmovd %ecx, %xmm1
1100 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
1101 ; AVX-NEXT: vcomiss %xmm0, %xmm1
1102 ; AVX-NEXT: cmovbel %esi, %eax
1105 ; CHECK-32-LABEL: test_f16_olt_s:
1106 ; CHECK-32: # %bb.0:
1107 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
1108 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0
1109 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
1110 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
1111 ; CHECK-32-NEXT: cmoval %eax, %ecx
1112 ; CHECK-32-NEXT: movl (%ecx), %eax
1113 ; CHECK-32-NEXT: retl
1115 ; CHECK-64-LABEL: test_f16_olt_s:
1116 ; CHECK-64: # %bb.0:
1117 ; CHECK-64-NEXT: movl %edi, %eax
1118 ; CHECK-64-NEXT: vcomish %xmm0, %xmm1
1119 ; CHECK-64-NEXT: cmovbel %esi, %eax
1120 ; CHECK-64-NEXT: retq
1121 %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
1122 half %f1, half %f2, metadata !"olt",
1123 metadata !"fpexcept.strict") #0
1124 %res = select i1 %cond, i32 %a, i32 %b
1128 define i32 @test_f16_ole_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
1129 ; SSE2-LABEL: test_f16_ole_s:
1131 ; SSE2-NEXT: pushq %rbp
1132 ; SSE2-NEXT: pushq %rbx
1133 ; SSE2-NEXT: pushq %rax
1134 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1135 ; SSE2-NEXT: movl %esi, %ebx
1136 ; SSE2-NEXT: movl %edi, %ebp
1137 ; SSE2-NEXT: movaps %xmm1, %xmm0
1138 ; SSE2-NEXT: callq __extendhfsf2@PLT
1139 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
1140 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1141 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
1142 ; SSE2-NEXT: callq __extendhfsf2@PLT
1143 ; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload
1144 ; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero
1145 ; SSE2-NEXT: comiss %xmm0, %xmm1
1146 ; SSE2-NEXT: cmovbl %ebx, %ebp
1147 ; SSE2-NEXT: movl %ebp, %eax
1148 ; SSE2-NEXT: addq $8, %rsp
1149 ; SSE2-NEXT: popq %rbx
1150 ; SSE2-NEXT: popq %rbp
1153 ; AVX-LABEL: test_f16_ole_s:
1155 ; AVX-NEXT: movl %edi, %eax
1156 ; AVX-NEXT: vpextrw $0, %xmm1, %ecx
1157 ; AVX-NEXT: vpextrw $0, %xmm0, %edx
1158 ; AVX-NEXT: movzwl %dx, %edx
1159 ; AVX-NEXT: vmovd %edx, %xmm0
1160 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
1161 ; AVX-NEXT: movzwl %cx, %ecx
1162 ; AVX-NEXT: vmovd %ecx, %xmm1
1163 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
1164 ; AVX-NEXT: vcomiss %xmm0, %xmm1
1165 ; AVX-NEXT: cmovbl %esi, %eax
1168 ; CHECK-32-LABEL: test_f16_ole_s:
1169 ; CHECK-32: # %bb.0:
1170 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
1171 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0
1172 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
1173 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
1174 ; CHECK-32-NEXT: cmovael %eax, %ecx
1175 ; CHECK-32-NEXT: movl (%ecx), %eax
1176 ; CHECK-32-NEXT: retl
1178 ; CHECK-64-LABEL: test_f16_ole_s:
1179 ; CHECK-64: # %bb.0:
1180 ; CHECK-64-NEXT: movl %edi, %eax
1181 ; CHECK-64-NEXT: vcomish %xmm0, %xmm1
1182 ; CHECK-64-NEXT: cmovbl %esi, %eax
1183 ; CHECK-64-NEXT: retq
1184 %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
1185 half %f1, half %f2, metadata !"ole",
1186 metadata !"fpexcept.strict") #0
1187 %res = select i1 %cond, i32 %a, i32 %b
1191 define i32 @test_f16_one_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
1192 ; SSE2-LABEL: test_f16_one_s:
1194 ; SSE2-NEXT: pushq %rbp
1195 ; SSE2-NEXT: pushq %rbx
1196 ; SSE2-NEXT: pushq %rax
1197 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1198 ; SSE2-NEXT: movl %esi, %ebx
1199 ; SSE2-NEXT: movl %edi, %ebp
1200 ; SSE2-NEXT: movaps %xmm1, %xmm0
1201 ; SSE2-NEXT: callq __extendhfsf2@PLT
1202 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
1203 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1204 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
1205 ; SSE2-NEXT: callq __extendhfsf2@PLT
1206 ; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload
1207 ; SSE2-NEXT: cmovel %ebx, %ebp
1208 ; SSE2-NEXT: movl %ebp, %eax
1209 ; SSE2-NEXT: addq $8, %rsp
1210 ; SSE2-NEXT: popq %rbx
1211 ; SSE2-NEXT: popq %rbp
1214 ; AVX-LABEL: test_f16_one_s:
1216 ; AVX-NEXT: movl %edi, %eax
1217 ; AVX-NEXT: vpextrw $0, %xmm0, %ecx
1218 ; AVX-NEXT: vpextrw $0, %xmm1, %edx
1219 ; AVX-NEXT: movzwl %dx, %edx
1220 ; AVX-NEXT: vmovd %edx, %xmm0
1221 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
1222 ; AVX-NEXT: movzwl %cx, %ecx
1223 ; AVX-NEXT: vmovd %ecx, %xmm1
1224 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
1225 ; AVX-NEXT: vcomiss %xmm0, %xmm1
1226 ; AVX-NEXT: cmovel %esi, %eax
1229 ; CHECK-32-LABEL: test_f16_one_s:
1230 ; CHECK-32: # %bb.0:
1231 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
1232 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0
1233 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
1234 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
1235 ; CHECK-32-NEXT: cmovnel %eax, %ecx
1236 ; CHECK-32-NEXT: movl (%ecx), %eax
1237 ; CHECK-32-NEXT: retl
1239 ; CHECK-64-LABEL: test_f16_one_s:
1240 ; CHECK-64: # %bb.0:
1241 ; CHECK-64-NEXT: movl %edi, %eax
1242 ; CHECK-64-NEXT: vcomish %xmm1, %xmm0
1243 ; CHECK-64-NEXT: cmovel %esi, %eax
1244 ; CHECK-64-NEXT: retq
1245 %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
1246 half %f1, half %f2, metadata !"one",
1247 metadata !"fpexcept.strict") #0
1248 %res = select i1 %cond, i32 %a, i32 %b
1252 define i32 @test_f16_ord_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
1253 ; SSE2-LABEL: test_f16_ord_s:
1255 ; SSE2-NEXT: pushq %rbp
1256 ; SSE2-NEXT: pushq %rbx
1257 ; SSE2-NEXT: pushq %rax
1258 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1259 ; SSE2-NEXT: movl %esi, %ebx
1260 ; SSE2-NEXT: movl %edi, %ebp
1261 ; SSE2-NEXT: movaps %xmm1, %xmm0
1262 ; SSE2-NEXT: callq __extendhfsf2@PLT
1263 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
1264 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1265 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
1266 ; SSE2-NEXT: callq __extendhfsf2@PLT
1267 ; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload
1268 ; SSE2-NEXT: cmovpl %ebx, %ebp
1269 ; SSE2-NEXT: movl %ebp, %eax
1270 ; SSE2-NEXT: addq $8, %rsp
1271 ; SSE2-NEXT: popq %rbx
1272 ; SSE2-NEXT: popq %rbp
1275 ; AVX-LABEL: test_f16_ord_s:
1277 ; AVX-NEXT: movl %edi, %eax
1278 ; AVX-NEXT: vpextrw $0, %xmm0, %ecx
1279 ; AVX-NEXT: vpextrw $0, %xmm1, %edx
1280 ; AVX-NEXT: movzwl %dx, %edx
1281 ; AVX-NEXT: vmovd %edx, %xmm0
1282 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
1283 ; AVX-NEXT: movzwl %cx, %ecx
1284 ; AVX-NEXT: vmovd %ecx, %xmm1
1285 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
1286 ; AVX-NEXT: vcomiss %xmm0, %xmm1
1287 ; AVX-NEXT: cmovpl %esi, %eax
1290 ; CHECK-32-LABEL: test_f16_ord_s:
1291 ; CHECK-32: # %bb.0:
1292 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
1293 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0
1294 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
1295 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
1296 ; CHECK-32-NEXT: cmovnpl %eax, %ecx
1297 ; CHECK-32-NEXT: movl (%ecx), %eax
1298 ; CHECK-32-NEXT: retl
1300 ; CHECK-64-LABEL: test_f16_ord_s:
1301 ; CHECK-64: # %bb.0:
1302 ; CHECK-64-NEXT: movl %edi, %eax
1303 ; CHECK-64-NEXT: vcomish %xmm1, %xmm0
1304 ; CHECK-64-NEXT: cmovpl %esi, %eax
1305 ; CHECK-64-NEXT: retq
1306 %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
1307 half %f1, half %f2, metadata !"ord",
1308 metadata !"fpexcept.strict") #0
1309 %res = select i1 %cond, i32 %a, i32 %b
1313 define i32 @test_f16_ueq_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
1314 ; SSE2-LABEL: test_f16_ueq_s:
1316 ; SSE2-NEXT: pushq %rbp
1317 ; SSE2-NEXT: pushq %rbx
1318 ; SSE2-NEXT: pushq %rax
1319 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1320 ; SSE2-NEXT: movl %esi, %ebx
1321 ; SSE2-NEXT: movl %edi, %ebp
1322 ; SSE2-NEXT: movaps %xmm1, %xmm0
1323 ; SSE2-NEXT: callq __extendhfsf2@PLT
1324 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
1325 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1326 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
1327 ; SSE2-NEXT: callq __extendhfsf2@PLT
1328 ; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload
1329 ; SSE2-NEXT: cmovnel %ebx, %ebp
1330 ; SSE2-NEXT: movl %ebp, %eax
1331 ; SSE2-NEXT: addq $8, %rsp
1332 ; SSE2-NEXT: popq %rbx
1333 ; SSE2-NEXT: popq %rbp
1336 ; AVX-LABEL: test_f16_ueq_s:
1338 ; AVX-NEXT: movl %edi, %eax
1339 ; AVX-NEXT: vpextrw $0, %xmm0, %ecx
1340 ; AVX-NEXT: vpextrw $0, %xmm1, %edx
1341 ; AVX-NEXT: movzwl %dx, %edx
1342 ; AVX-NEXT: vmovd %edx, %xmm0
1343 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
1344 ; AVX-NEXT: movzwl %cx, %ecx
1345 ; AVX-NEXT: vmovd %ecx, %xmm1
1346 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
1347 ; AVX-NEXT: vcomiss %xmm0, %xmm1
1348 ; AVX-NEXT: cmovnel %esi, %eax
1351 ; CHECK-32-LABEL: test_f16_ueq_s:
1352 ; CHECK-32: # %bb.0:
1353 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
1354 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0
1355 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
1356 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
1357 ; CHECK-32-NEXT: cmovel %eax, %ecx
1358 ; CHECK-32-NEXT: movl (%ecx), %eax
1359 ; CHECK-32-NEXT: retl
1361 ; CHECK-64-LABEL: test_f16_ueq_s:
1362 ; CHECK-64: # %bb.0:
1363 ; CHECK-64-NEXT: movl %edi, %eax
1364 ; CHECK-64-NEXT: vcomish %xmm1, %xmm0
1365 ; CHECK-64-NEXT: cmovnel %esi, %eax
1366 ; CHECK-64-NEXT: retq
1367 %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
1368 half %f1, half %f2, metadata !"ueq",
1369 metadata !"fpexcept.strict") #0
1370 %res = select i1 %cond, i32 %a, i32 %b
1374 define i32 @test_f16_ugt_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
1375 ; SSE2-LABEL: test_f16_ugt_s:
1377 ; SSE2-NEXT: pushq %rbp
1378 ; SSE2-NEXT: pushq %rbx
1379 ; SSE2-NEXT: pushq %rax
1380 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1381 ; SSE2-NEXT: movl %esi, %ebx
1382 ; SSE2-NEXT: movl %edi, %ebp
1383 ; SSE2-NEXT: movaps %xmm1, %xmm0
1384 ; SSE2-NEXT: callq __extendhfsf2@PLT
1385 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
1386 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1387 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
1388 ; SSE2-NEXT: callq __extendhfsf2@PLT
1389 ; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload
1390 ; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero
1391 ; SSE2-NEXT: comiss %xmm0, %xmm1
1392 ; SSE2-NEXT: cmovael %ebx, %ebp
1393 ; SSE2-NEXT: movl %ebp, %eax
1394 ; SSE2-NEXT: addq $8, %rsp
1395 ; SSE2-NEXT: popq %rbx
1396 ; SSE2-NEXT: popq %rbp
1399 ; AVX-LABEL: test_f16_ugt_s:
1401 ; AVX-NEXT: movl %edi, %eax
1402 ; AVX-NEXT: vpextrw $0, %xmm1, %ecx
1403 ; AVX-NEXT: vpextrw $0, %xmm0, %edx
1404 ; AVX-NEXT: movzwl %dx, %edx
1405 ; AVX-NEXT: vmovd %edx, %xmm0
1406 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
1407 ; AVX-NEXT: movzwl %cx, %ecx
1408 ; AVX-NEXT: vmovd %ecx, %xmm1
1409 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
1410 ; AVX-NEXT: vcomiss %xmm0, %xmm1
1411 ; AVX-NEXT: cmovael %esi, %eax
1414 ; CHECK-32-LABEL: test_f16_ugt_s:
1415 ; CHECK-32: # %bb.0:
1416 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
1417 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0
1418 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
1419 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
1420 ; CHECK-32-NEXT: cmovbl %eax, %ecx
1421 ; CHECK-32-NEXT: movl (%ecx), %eax
1422 ; CHECK-32-NEXT: retl
1424 ; CHECK-64-LABEL: test_f16_ugt_s:
1425 ; CHECK-64: # %bb.0:
1426 ; CHECK-64-NEXT: movl %edi, %eax
1427 ; CHECK-64-NEXT: vcomish %xmm0, %xmm1
1428 ; CHECK-64-NEXT: cmovael %esi, %eax
1429 ; CHECK-64-NEXT: retq
1430 %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
1431 half %f1, half %f2, metadata !"ugt",
1432 metadata !"fpexcept.strict") #0
1433 %res = select i1 %cond, i32 %a, i32 %b
1437 define i32 @test_f16_uge_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
1438 ; SSE2-LABEL: test_f16_uge_s:
1440 ; SSE2-NEXT: pushq %rbp
1441 ; SSE2-NEXT: pushq %rbx
1442 ; SSE2-NEXT: pushq %rax
1443 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1444 ; SSE2-NEXT: movl %esi, %ebx
1445 ; SSE2-NEXT: movl %edi, %ebp
1446 ; SSE2-NEXT: movaps %xmm1, %xmm0
1447 ; SSE2-NEXT: callq __extendhfsf2@PLT
1448 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
1449 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1450 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
1451 ; SSE2-NEXT: callq __extendhfsf2@PLT
1452 ; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload
1453 ; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero
1454 ; SSE2-NEXT: comiss %xmm0, %xmm1
1455 ; SSE2-NEXT: cmoval %ebx, %ebp
1456 ; SSE2-NEXT: movl %ebp, %eax
1457 ; SSE2-NEXT: addq $8, %rsp
1458 ; SSE2-NEXT: popq %rbx
1459 ; SSE2-NEXT: popq %rbp
1462 ; AVX-LABEL: test_f16_uge_s:
1464 ; AVX-NEXT: movl %edi, %eax
1465 ; AVX-NEXT: vpextrw $0, %xmm1, %ecx
1466 ; AVX-NEXT: vpextrw $0, %xmm0, %edx
1467 ; AVX-NEXT: movzwl %dx, %edx
1468 ; AVX-NEXT: vmovd %edx, %xmm0
1469 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
1470 ; AVX-NEXT: movzwl %cx, %ecx
1471 ; AVX-NEXT: vmovd %ecx, %xmm1
1472 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
1473 ; AVX-NEXT: vcomiss %xmm0, %xmm1
1474 ; AVX-NEXT: cmoval %esi, %eax
1477 ; CHECK-32-LABEL: test_f16_uge_s:
1478 ; CHECK-32: # %bb.0:
1479 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
1480 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0
1481 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
1482 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
1483 ; CHECK-32-NEXT: cmovbel %eax, %ecx
1484 ; CHECK-32-NEXT: movl (%ecx), %eax
1485 ; CHECK-32-NEXT: retl
1487 ; CHECK-64-LABEL: test_f16_uge_s:
1488 ; CHECK-64: # %bb.0:
1489 ; CHECK-64-NEXT: movl %edi, %eax
1490 ; CHECK-64-NEXT: vcomish %xmm0, %xmm1
1491 ; CHECK-64-NEXT: cmoval %esi, %eax
1492 ; CHECK-64-NEXT: retq
1493 %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
1494 half %f1, half %f2, metadata !"uge",
1495 metadata !"fpexcept.strict") #0
1496 %res = select i1 %cond, i32 %a, i32 %b
1500 define i32 @test_f16_ult_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
1501 ; SSE2-LABEL: test_f16_ult_s:
1503 ; SSE2-NEXT: pushq %rbp
1504 ; SSE2-NEXT: pushq %rbx
1505 ; SSE2-NEXT: pushq %rax
1506 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1507 ; SSE2-NEXT: movl %esi, %ebx
1508 ; SSE2-NEXT: movl %edi, %ebp
1509 ; SSE2-NEXT: movaps %xmm1, %xmm0
1510 ; SSE2-NEXT: callq __extendhfsf2@PLT
1511 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
1512 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1513 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
1514 ; SSE2-NEXT: callq __extendhfsf2@PLT
1515 ; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload
1516 ; SSE2-NEXT: cmovael %ebx, %ebp
1517 ; SSE2-NEXT: movl %ebp, %eax
1518 ; SSE2-NEXT: addq $8, %rsp
1519 ; SSE2-NEXT: popq %rbx
1520 ; SSE2-NEXT: popq %rbp
1523 ; AVX-LABEL: test_f16_ult_s:
1525 ; AVX-NEXT: movl %edi, %eax
1526 ; AVX-NEXT: vpextrw $0, %xmm0, %ecx
1527 ; AVX-NEXT: vpextrw $0, %xmm1, %edx
1528 ; AVX-NEXT: movzwl %dx, %edx
1529 ; AVX-NEXT: vmovd %edx, %xmm0
1530 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
1531 ; AVX-NEXT: movzwl %cx, %ecx
1532 ; AVX-NEXT: vmovd %ecx, %xmm1
1533 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
1534 ; AVX-NEXT: vcomiss %xmm0, %xmm1
1535 ; AVX-NEXT: cmovael %esi, %eax
1538 ; CHECK-32-LABEL: test_f16_ult_s:
1539 ; CHECK-32: # %bb.0:
1540 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
1541 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0
1542 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
1543 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
1544 ; CHECK-32-NEXT: cmovbl %eax, %ecx
1545 ; CHECK-32-NEXT: movl (%ecx), %eax
1546 ; CHECK-32-NEXT: retl
1548 ; CHECK-64-LABEL: test_f16_ult_s:
1549 ; CHECK-64: # %bb.0:
1550 ; CHECK-64-NEXT: movl %edi, %eax
1551 ; CHECK-64-NEXT: vcomish %xmm1, %xmm0
1552 ; CHECK-64-NEXT: cmovael %esi, %eax
1553 ; CHECK-64-NEXT: retq
1554 %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
1555 half %f1, half %f2, metadata !"ult",
1556 metadata !"fpexcept.strict") #0
1557 %res = select i1 %cond, i32 %a, i32 %b
1561 define i32 @test_f16_ule_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
1562 ; SSE2-LABEL: test_f16_ule_s:
1564 ; SSE2-NEXT: pushq %rbp
1565 ; SSE2-NEXT: pushq %rbx
1566 ; SSE2-NEXT: pushq %rax
1567 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1568 ; SSE2-NEXT: movl %esi, %ebx
1569 ; SSE2-NEXT: movl %edi, %ebp
1570 ; SSE2-NEXT: movaps %xmm1, %xmm0
1571 ; SSE2-NEXT: callq __extendhfsf2@PLT
1572 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
1573 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1574 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
1575 ; SSE2-NEXT: callq __extendhfsf2@PLT
1576 ; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload
1577 ; SSE2-NEXT: cmoval %ebx, %ebp
1578 ; SSE2-NEXT: movl %ebp, %eax
1579 ; SSE2-NEXT: addq $8, %rsp
1580 ; SSE2-NEXT: popq %rbx
1581 ; SSE2-NEXT: popq %rbp
1584 ; AVX-LABEL: test_f16_ule_s:
1586 ; AVX-NEXT: movl %edi, %eax
1587 ; AVX-NEXT: vpextrw $0, %xmm0, %ecx
1588 ; AVX-NEXT: vpextrw $0, %xmm1, %edx
1589 ; AVX-NEXT: movzwl %dx, %edx
1590 ; AVX-NEXT: vmovd %edx, %xmm0
1591 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
1592 ; AVX-NEXT: movzwl %cx, %ecx
1593 ; AVX-NEXT: vmovd %ecx, %xmm1
1594 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
1595 ; AVX-NEXT: vcomiss %xmm0, %xmm1
1596 ; AVX-NEXT: cmoval %esi, %eax
1599 ; CHECK-32-LABEL: test_f16_ule_s:
1600 ; CHECK-32: # %bb.0:
1601 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
1602 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0
1603 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
1604 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
1605 ; CHECK-32-NEXT: cmovbel %eax, %ecx
1606 ; CHECK-32-NEXT: movl (%ecx), %eax
1607 ; CHECK-32-NEXT: retl
1609 ; CHECK-64-LABEL: test_f16_ule_s:
1610 ; CHECK-64: # %bb.0:
1611 ; CHECK-64-NEXT: movl %edi, %eax
1612 ; CHECK-64-NEXT: vcomish %xmm1, %xmm0
1613 ; CHECK-64-NEXT: cmoval %esi, %eax
1614 ; CHECK-64-NEXT: retq
1615 %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
1616 half %f1, half %f2, metadata !"ule",
1617 metadata !"fpexcept.strict") #0
1618 %res = select i1 %cond, i32 %a, i32 %b
1622 define i32 @test_f16_une_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
1623 ; SSE2-LABEL: test_f16_une_s:
1625 ; SSE2-NEXT: pushq %rbp
1626 ; SSE2-NEXT: pushq %rbx
1627 ; SSE2-NEXT: pushq %rax
1628 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1629 ; SSE2-NEXT: movl %esi, %ebx
1630 ; SSE2-NEXT: movl %edi, %ebp
1631 ; SSE2-NEXT: movaps %xmm1, %xmm0
1632 ; SSE2-NEXT: callq __extendhfsf2@PLT
1633 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
1634 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1635 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
1636 ; SSE2-NEXT: callq __extendhfsf2@PLT
1637 ; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload
1638 ; SSE2-NEXT: cmovnel %ebp, %ebx
1639 ; SSE2-NEXT: cmovpl %ebp, %ebx
1640 ; SSE2-NEXT: movl %ebx, %eax
1641 ; SSE2-NEXT: addq $8, %rsp
1642 ; SSE2-NEXT: popq %rbx
1643 ; SSE2-NEXT: popq %rbp
1646 ; AVX-LABEL: test_f16_une_s:
1648 ; AVX-NEXT: movl %esi, %eax
1649 ; AVX-NEXT: vpextrw $0, %xmm0, %ecx
1650 ; AVX-NEXT: vpextrw $0, %xmm1, %edx
1651 ; AVX-NEXT: movzwl %dx, %edx
1652 ; AVX-NEXT: vmovd %edx, %xmm0
1653 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
1654 ; AVX-NEXT: movzwl %cx, %ecx
1655 ; AVX-NEXT: vmovd %ecx, %xmm1
1656 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
1657 ; AVX-NEXT: vcomiss %xmm0, %xmm1
1658 ; AVX-NEXT: cmovnel %edi, %eax
1659 ; AVX-NEXT: cmovpl %edi, %eax
1662 ; CHECK-32-LABEL: test_f16_une_s:
1663 ; CHECK-32: # %bb.0:
1664 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
1665 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0
1666 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
1667 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
1668 ; CHECK-32-NEXT: cmovnel %eax, %ecx
1669 ; CHECK-32-NEXT: cmovpl %eax, %ecx
1670 ; CHECK-32-NEXT: movl (%ecx), %eax
1671 ; CHECK-32-NEXT: retl
1673 ; CHECK-64-LABEL: test_f16_une_s:
1674 ; CHECK-64: # %bb.0:
1675 ; CHECK-64-NEXT: movl %esi, %eax
1676 ; CHECK-64-NEXT: vcomish %xmm1, %xmm0
1677 ; CHECK-64-NEXT: cmovnel %edi, %eax
1678 ; CHECK-64-NEXT: cmovpl %edi, %eax
1679 ; CHECK-64-NEXT: retq
1680 %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
1681 half %f1, half %f2, metadata !"une",
1682 metadata !"fpexcept.strict") #0
1683 %res = select i1 %cond, i32 %a, i32 %b
1687 define i32 @test_f16_uno_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
1688 ; SSE2-LABEL: test_f16_uno_s:
1690 ; SSE2-NEXT: pushq %rbp
1691 ; SSE2-NEXT: pushq %rbx
1692 ; SSE2-NEXT: pushq %rax
1693 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1694 ; SSE2-NEXT: movl %esi, %ebx
1695 ; SSE2-NEXT: movl %edi, %ebp
1696 ; SSE2-NEXT: movaps %xmm1, %xmm0
1697 ; SSE2-NEXT: callq __extendhfsf2@PLT
1698 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
1699 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1700 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
1701 ; SSE2-NEXT: callq __extendhfsf2@PLT
1702 ; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload
1703 ; SSE2-NEXT: cmovnpl %ebx, %ebp
1704 ; SSE2-NEXT: movl %ebp, %eax
1705 ; SSE2-NEXT: addq $8, %rsp
1706 ; SSE2-NEXT: popq %rbx
1707 ; SSE2-NEXT: popq %rbp
1710 ; AVX-LABEL: test_f16_uno_s:
1712 ; AVX-NEXT: movl %edi, %eax
1713 ; AVX-NEXT: vpextrw $0, %xmm0, %ecx
1714 ; AVX-NEXT: vpextrw $0, %xmm1, %edx
1715 ; AVX-NEXT: movzwl %dx, %edx
1716 ; AVX-NEXT: vmovd %edx, %xmm0
1717 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
1718 ; AVX-NEXT: movzwl %cx, %ecx
1719 ; AVX-NEXT: vmovd %ecx, %xmm1
1720 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
1721 ; AVX-NEXT: vcomiss %xmm0, %xmm1
1722 ; AVX-NEXT: cmovnpl %esi, %eax
1725 ; CHECK-32-LABEL: test_f16_uno_s:
1726 ; CHECK-32: # %bb.0:
1727 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
1728 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0
1729 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax
1730 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx
1731 ; CHECK-32-NEXT: cmovpl %eax, %ecx
1732 ; CHECK-32-NEXT: movl (%ecx), %eax
1733 ; CHECK-32-NEXT: retl
1735 ; CHECK-64-LABEL: test_f16_uno_s:
1736 ; CHECK-64: # %bb.0:
1737 ; CHECK-64-NEXT: movl %edi, %eax
1738 ; CHECK-64-NEXT: vcomish %xmm1, %xmm0
1739 ; CHECK-64-NEXT: cmovnpl %esi, %eax
1740 ; CHECK-64-NEXT: retq
1741 %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
1742 half %f1, half %f2, metadata !"uno",
1743 metadata !"fpexcept.strict") #0
1744 %res = select i1 %cond, i32 %a, i32 %b
1748 define void @foo(half %0, half %1) #0 {
1751 ; SSE2-NEXT: pushq %rax
1752 ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1753 ; SSE2-NEXT: movaps %xmm1, %xmm0
1754 ; SSE2-NEXT: callq __extendhfsf2@PLT
1755 ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
1756 ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1757 ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
1758 ; SSE2-NEXT: callq __extendhfsf2@PLT
1759 ; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
1760 ; SSE2-NEXT: jbe .LBB28_1
1761 ; SSE2-NEXT: # %bb.2:
1762 ; SSE2-NEXT: popq %rax
1763 ; SSE2-NEXT: jmp bar@PLT # TAILCALL
1764 ; SSE2-NEXT: .LBB28_1:
1765 ; SSE2-NEXT: popq %rax
1770 ; AVX-NEXT: vpextrw $0, %xmm0, %eax
1771 ; AVX-NEXT: vpextrw $0, %xmm1, %ecx
1772 ; AVX-NEXT: movzwl %cx, %ecx
1773 ; AVX-NEXT: vmovd %ecx, %xmm0
1774 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
1775 ; AVX-NEXT: movzwl %ax, %eax
1776 ; AVX-NEXT: vmovd %eax, %xmm1
1777 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
1778 ; AVX-NEXT: vucomiss %xmm0, %xmm1
1779 ; AVX-NEXT: ja bar@PLT # TAILCALL
1780 ; AVX-NEXT: # %bb.1:
1783 ; CHECK-32-LABEL: foo:
1784 ; CHECK-32: # %bb.0:
1785 ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
1786 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0
1787 ; CHECK-32-NEXT: ja bar@PLT # TAILCALL
1788 ; CHECK-32-NEXT: # %bb.1:
1789 ; CHECK-32-NEXT: retl
1791 ; CHECK-64-LABEL: foo:
1792 ; CHECK-64: # %bb.0:
1793 ; CHECK-64-NEXT: vucomish %xmm1, %xmm0
1794 ; CHECK-64-NEXT: ja bar@PLT # TAILCALL
1795 ; CHECK-64-NEXT: # %bb.1:
1796 ; CHECK-64-NEXT: retq
1797 %3 = call i1 @llvm.experimental.constrained.fcmp.f16( half %0, half %1, metadata !"ogt", metadata !"fpexcept.strict") #0
1798 br i1 %3, label %4, label %5
1801 tail call void @bar() #0
1809 attributes #0 = { strictfp nounwind }
1811 declare i1 @llvm.experimental.constrained.fcmp.f16(half, half, metadata, metadata)
1812 declare i1 @llvm.experimental.constrained.fcmps.f16(half, half, metadata, metadata)