1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=1 -verify-machineinstrs \
3 ; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWON
4 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=0 -verify-machineinstrs \
5 ; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWOFF
6 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+f16c -fixup-byte-word-insts=1 -verify-machineinstrs \
7 ; RUN: | FileCheck %s -check-prefixes=CHECK,BWON,BWON-F16C
8 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -fixup-byte-word-insts=0 -verify-machineinstrs \
9 ; RUN: | FileCheck %s -check-prefixes=CHECK-I686
11 define void @test_load_store(ptr %in, ptr %out) #0 {
12 ; CHECK-LIBCALL-LABEL: test_load_store:
13 ; CHECK-LIBCALL: # %bb.0:
14 ; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0
15 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
16 ; CHECK-LIBCALL-NEXT: movw %ax, (%rsi)
17 ; CHECK-LIBCALL-NEXT: retq
19 ; BWON-F16C-LABEL: test_load_store:
21 ; BWON-F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
22 ; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rsi)
23 ; BWON-F16C-NEXT: retq
25 ; CHECK-I686-LABEL: test_load_store:
26 ; CHECK-I686: # %bb.0:
27 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
28 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx
29 ; CHECK-I686-NEXT: pinsrw $0, (%ecx), %xmm0
30 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %ecx
31 ; CHECK-I686-NEXT: movw %cx, (%eax)
32 ; CHECK-I686-NEXT: retl
33 %val = load half, ptr %in
34 store half %val, ptr %out
38 define i16 @test_bitcast_from_half(ptr %addr) #0 {
39 ; BWON-LABEL: test_bitcast_from_half:
41 ; BWON-NEXT: movzwl (%rdi), %eax
44 ; BWOFF-LABEL: test_bitcast_from_half:
46 ; BWOFF-NEXT: movw (%rdi), %ax
49 ; CHECK-I686-LABEL: test_bitcast_from_half:
50 ; CHECK-I686: # %bb.0:
51 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
52 ; CHECK-I686-NEXT: movw (%eax), %ax
53 ; CHECK-I686-NEXT: retl
54 %val = load half, ptr %addr
55 %val_int = bitcast half %val to i16
59 define void @test_bitcast_to_half(ptr %addr, i16 %in) #0 {
60 ; CHECK-LABEL: test_bitcast_to_half:
62 ; CHECK-NEXT: movw %si, (%rdi)
65 ; CHECK-I686-LABEL: test_bitcast_to_half:
66 ; CHECK-I686: # %bb.0:
67 ; CHECK-I686-NEXT: movw {{[0-9]+}}(%esp), %ax
68 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx
69 ; CHECK-I686-NEXT: movw %ax, (%ecx)
70 ; CHECK-I686-NEXT: retl
71 %val_fp = bitcast i16 %in to half
72 store half %val_fp, ptr %addr
76 define float @test_extend32(ptr %addr) #0 {
77 ; CHECK-LIBCALL-LABEL: test_extend32:
78 ; CHECK-LIBCALL: # %bb.0:
79 ; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0
80 ; CHECK-LIBCALL-NEXT: jmp __extendhfsf2@PLT # TAILCALL
82 ; BWON-F16C-LABEL: test_extend32:
84 ; BWON-F16C-NEXT: movzwl (%rdi), %eax
85 ; BWON-F16C-NEXT: vmovd %eax, %xmm0
86 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
87 ; BWON-F16C-NEXT: retq
89 ; CHECK-I686-LABEL: test_extend32:
90 ; CHECK-I686: # %bb.0:
91 ; CHECK-I686-NEXT: subl $12, %esp
92 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
93 ; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0
94 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
95 ; CHECK-I686-NEXT: movw %ax, (%esp)
96 ; CHECK-I686-NEXT: calll __extendhfsf2
97 ; CHECK-I686-NEXT: addl $12, %esp
98 ; CHECK-I686-NEXT: retl
99 %val16 = load half, ptr %addr
100 %val32 = fpext half %val16 to float
104 define double @test_extend64(ptr %addr) #0 {
105 ; CHECK-LIBCALL-LABEL: test_extend64:
106 ; CHECK-LIBCALL: # %bb.0:
107 ; CHECK-LIBCALL-NEXT: pushq %rax
108 ; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0
109 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
110 ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0
111 ; CHECK-LIBCALL-NEXT: popq %rax
112 ; CHECK-LIBCALL-NEXT: retq
114 ; BWON-F16C-LABEL: test_extend64:
115 ; BWON-F16C: # %bb.0:
116 ; BWON-F16C-NEXT: movzwl (%rdi), %eax
117 ; BWON-F16C-NEXT: vmovd %eax, %xmm0
118 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
119 ; BWON-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
120 ; BWON-F16C-NEXT: retq
122 ; CHECK-I686-LABEL: test_extend64:
123 ; CHECK-I686: # %bb.0:
124 ; CHECK-I686-NEXT: subl $12, %esp
125 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
126 ; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0
127 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
128 ; CHECK-I686-NEXT: movw %ax, (%esp)
129 ; CHECK-I686-NEXT: calll __extendhfsf2
130 ; CHECK-I686-NEXT: addl $12, %esp
131 ; CHECK-I686-NEXT: retl
132 %val16 = load half, ptr %addr
133 %val32 = fpext half %val16 to double
137 define void @test_trunc32(float %in, ptr %addr) #0 {
138 ; CHECK-LIBCALL-LABEL: test_trunc32:
139 ; CHECK-LIBCALL: # %bb.0:
140 ; CHECK-LIBCALL-NEXT: pushq %rbx
141 ; CHECK-LIBCALL-NEXT: movq %rdi, %rbx
142 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
143 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
144 ; CHECK-LIBCALL-NEXT: movw %ax, (%rbx)
145 ; CHECK-LIBCALL-NEXT: popq %rbx
146 ; CHECK-LIBCALL-NEXT: retq
148 ; BWON-F16C-LABEL: test_trunc32:
149 ; BWON-F16C: # %bb.0:
150 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
151 ; BWON-F16C-NEXT: vmovd %xmm0, %eax
152 ; BWON-F16C-NEXT: movw %ax, (%rdi)
153 ; BWON-F16C-NEXT: retq
155 ; CHECK-I686-LABEL: test_trunc32:
156 ; CHECK-I686: # %bb.0:
157 ; CHECK-I686-NEXT: pushl %esi
158 ; CHECK-I686-NEXT: subl $8, %esp
159 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi
160 ; CHECK-I686-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
161 ; CHECK-I686-NEXT: movd %xmm0, (%esp)
162 ; CHECK-I686-NEXT: calll __truncsfhf2
163 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
164 ; CHECK-I686-NEXT: movw %ax, (%esi)
165 ; CHECK-I686-NEXT: addl $8, %esp
166 ; CHECK-I686-NEXT: popl %esi
167 ; CHECK-I686-NEXT: retl
168 %val16 = fptrunc float %in to half
169 store half %val16, ptr %addr
173 define void @test_trunc64(double %in, ptr %addr) #0 {
174 ; CHECK-LIBCALL-LABEL: test_trunc64:
175 ; CHECK-LIBCALL: # %bb.0:
176 ; CHECK-LIBCALL-NEXT: pushq %rbx
177 ; CHECK-LIBCALL-NEXT: movq %rdi, %rbx
178 ; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT
179 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
180 ; CHECK-LIBCALL-NEXT: movw %ax, (%rbx)
181 ; CHECK-LIBCALL-NEXT: popq %rbx
182 ; CHECK-LIBCALL-NEXT: retq
184 ; BWON-F16C-LABEL: test_trunc64:
185 ; BWON-F16C: # %bb.0:
186 ; BWON-F16C-NEXT: pushq %rbx
187 ; BWON-F16C-NEXT: movq %rdi, %rbx
188 ; BWON-F16C-NEXT: callq __truncdfhf2@PLT
189 ; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
190 ; BWON-F16C-NEXT: popq %rbx
191 ; BWON-F16C-NEXT: retq
193 ; CHECK-I686-LABEL: test_trunc64:
194 ; CHECK-I686: # %bb.0:
195 ; CHECK-I686-NEXT: pushl %esi
196 ; CHECK-I686-NEXT: subl $8, %esp
197 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi
198 ; CHECK-I686-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
199 ; CHECK-I686-NEXT: movq %xmm0, (%esp)
200 ; CHECK-I686-NEXT: calll __truncdfhf2
201 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
202 ; CHECK-I686-NEXT: movw %ax, (%esi)
203 ; CHECK-I686-NEXT: addl $8, %esp
204 ; CHECK-I686-NEXT: popl %esi
205 ; CHECK-I686-NEXT: retl
206 %val16 = fptrunc double %in to half
207 store half %val16, ptr %addr
211 define i64 @test_fptosi_i64(ptr %p) #0 {
212 ; CHECK-LIBCALL-LABEL: test_fptosi_i64:
213 ; CHECK-LIBCALL: # %bb.0:
214 ; CHECK-LIBCALL-NEXT: pushq %rax
215 ; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0
216 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
217 ; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax
218 ; CHECK-LIBCALL-NEXT: popq %rcx
219 ; CHECK-LIBCALL-NEXT: retq
221 ; BWON-F16C-LABEL: test_fptosi_i64:
222 ; BWON-F16C: # %bb.0:
223 ; BWON-F16C-NEXT: movzwl (%rdi), %eax
224 ; BWON-F16C-NEXT: vmovd %eax, %xmm0
225 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
226 ; BWON-F16C-NEXT: vcvttss2si %xmm0, %rax
227 ; BWON-F16C-NEXT: retq
229 ; CHECK-I686-LABEL: test_fptosi_i64:
230 ; CHECK-I686: # %bb.0:
231 ; CHECK-I686-NEXT: subl $28, %esp
232 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
233 ; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0
234 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
235 ; CHECK-I686-NEXT: movw %ax, (%esp)
236 ; CHECK-I686-NEXT: calll __extendhfsf2
237 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
238 ; CHECK-I686-NEXT: flds {{[0-9]+}}(%esp)
239 ; CHECK-I686-NEXT: fnstcw {{[0-9]+}}(%esp)
240 ; CHECK-I686-NEXT: movzwl {{[0-9]+}}(%esp), %eax
241 ; CHECK-I686-NEXT: orl $3072, %eax # imm = 0xC00
242 ; CHECK-I686-NEXT: movw %ax, {{[0-9]+}}(%esp)
243 ; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp)
244 ; CHECK-I686-NEXT: fistpll {{[0-9]+}}(%esp)
245 ; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp)
246 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
247 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %edx
248 ; CHECK-I686-NEXT: addl $28, %esp
249 ; CHECK-I686-NEXT: retl
250 %a = load half, ptr %p, align 2
251 %r = fptosi half %a to i64
255 define void @test_sitofp_i64(i64 %a, ptr %p) #0 {
256 ; CHECK-LIBCALL-LABEL: test_sitofp_i64:
257 ; CHECK-LIBCALL: # %bb.0:
258 ; CHECK-LIBCALL-NEXT: pushq %rbx
259 ; CHECK-LIBCALL-NEXT: movq %rsi, %rbx
260 ; CHECK-LIBCALL-NEXT: cvtsi2ss %rdi, %xmm0
261 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
262 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
263 ; CHECK-LIBCALL-NEXT: movw %ax, (%rbx)
264 ; CHECK-LIBCALL-NEXT: popq %rbx
265 ; CHECK-LIBCALL-NEXT: retq
267 ; BWON-F16C-LABEL: test_sitofp_i64:
268 ; BWON-F16C: # %bb.0:
269 ; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
270 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
271 ; BWON-F16C-NEXT: vmovd %xmm0, %eax
272 ; BWON-F16C-NEXT: movw %ax, (%rsi)
273 ; BWON-F16C-NEXT: retq
275 ; CHECK-I686-LABEL: test_sitofp_i64:
276 ; CHECK-I686: # %bb.0:
277 ; CHECK-I686-NEXT: pushl %esi
278 ; CHECK-I686-NEXT: subl $24, %esp
279 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi
280 ; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
281 ; CHECK-I686-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
282 ; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp)
283 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
284 ; CHECK-I686-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
285 ; CHECK-I686-NEXT: movd %xmm0, (%esp)
286 ; CHECK-I686-NEXT: calll __truncsfhf2
287 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
288 ; CHECK-I686-NEXT: movw %ax, (%esi)
289 ; CHECK-I686-NEXT: addl $24, %esp
290 ; CHECK-I686-NEXT: popl %esi
291 ; CHECK-I686-NEXT: retl
292 %r = sitofp i64 %a to half
293 store half %r, ptr %p
297 define i64 @test_fptoui_i64(ptr %p) #0 {
298 ; CHECK-LIBCALL-LABEL: test_fptoui_i64:
299 ; CHECK-LIBCALL: # %bb.0:
300 ; CHECK-LIBCALL-NEXT: pushq %rax
301 ; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0
302 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
303 ; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rcx
304 ; CHECK-LIBCALL-NEXT: movq %rcx, %rdx
305 ; CHECK-LIBCALL-NEXT: sarq $63, %rdx
306 ; CHECK-LIBCALL-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
307 ; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax
308 ; CHECK-LIBCALL-NEXT: andq %rdx, %rax
309 ; CHECK-LIBCALL-NEXT: orq %rcx, %rax
310 ; CHECK-LIBCALL-NEXT: popq %rcx
311 ; CHECK-LIBCALL-NEXT: retq
313 ; BWON-F16C-LABEL: test_fptoui_i64:
314 ; BWON-F16C: # %bb.0:
315 ; BWON-F16C-NEXT: movzwl (%rdi), %eax
316 ; BWON-F16C-NEXT: vmovd %eax, %xmm0
317 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
318 ; BWON-F16C-NEXT: vcvttss2si %xmm0, %rcx
319 ; BWON-F16C-NEXT: movq %rcx, %rdx
320 ; BWON-F16C-NEXT: sarq $63, %rdx
321 ; BWON-F16C-NEXT: vsubss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
322 ; BWON-F16C-NEXT: vcvttss2si %xmm0, %rax
323 ; BWON-F16C-NEXT: andq %rdx, %rax
324 ; BWON-F16C-NEXT: orq %rcx, %rax
325 ; BWON-F16C-NEXT: retq
327 ; CHECK-I686-LABEL: test_fptoui_i64:
328 ; CHECK-I686: # %bb.0:
329 ; CHECK-I686-NEXT: subl $28, %esp
330 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
331 ; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0
332 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
333 ; CHECK-I686-NEXT: movw %ax, (%esp)
334 ; CHECK-I686-NEXT: calll __extendhfsf2
335 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
336 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
337 ; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
338 ; CHECK-I686-NEXT: ucomiss %xmm1, %xmm0
339 ; CHECK-I686-NEXT: jae .LBB9_2
340 ; CHECK-I686-NEXT: # %bb.1:
341 ; CHECK-I686-NEXT: xorps %xmm1, %xmm1
342 ; CHECK-I686-NEXT: .LBB9_2:
343 ; CHECK-I686-NEXT: subss %xmm1, %xmm0
344 ; CHECK-I686-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
345 ; CHECK-I686-NEXT: setae %al
346 ; CHECK-I686-NEXT: flds {{[0-9]+}}(%esp)
347 ; CHECK-I686-NEXT: fnstcw {{[0-9]+}}(%esp)
348 ; CHECK-I686-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
349 ; CHECK-I686-NEXT: orl $3072, %ecx # imm = 0xC00
350 ; CHECK-I686-NEXT: movw %cx, {{[0-9]+}}(%esp)
351 ; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp)
352 ; CHECK-I686-NEXT: fistpll {{[0-9]+}}(%esp)
353 ; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp)
354 ; CHECK-I686-NEXT: movzbl %al, %edx
355 ; CHECK-I686-NEXT: shll $31, %edx
356 ; CHECK-I686-NEXT: xorl {{[0-9]+}}(%esp), %edx
357 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
358 ; CHECK-I686-NEXT: addl $28, %esp
359 ; CHECK-I686-NEXT: retl
360 %a = load half, ptr %p, align 2
361 %r = fptoui half %a to i64
365 define void @test_uitofp_i64(i64 %a, ptr %p) #0 {
366 ; CHECK-LIBCALL-LABEL: test_uitofp_i64:
367 ; CHECK-LIBCALL: # %bb.0:
368 ; CHECK-LIBCALL-NEXT: pushq %rbx
369 ; CHECK-LIBCALL-NEXT: movq %rsi, %rbx
370 ; CHECK-LIBCALL-NEXT: testq %rdi, %rdi
371 ; CHECK-LIBCALL-NEXT: js .LBB10_1
372 ; CHECK-LIBCALL-NEXT: # %bb.2:
373 ; CHECK-LIBCALL-NEXT: cvtsi2ss %rdi, %xmm0
374 ; CHECK-LIBCALL-NEXT: jmp .LBB10_3
375 ; CHECK-LIBCALL-NEXT: .LBB10_1:
376 ; CHECK-LIBCALL-NEXT: movq %rdi, %rax
377 ; CHECK-LIBCALL-NEXT: shrq %rax
378 ; CHECK-LIBCALL-NEXT: andl $1, %edi
379 ; CHECK-LIBCALL-NEXT: orq %rax, %rdi
380 ; CHECK-LIBCALL-NEXT: cvtsi2ss %rdi, %xmm0
381 ; CHECK-LIBCALL-NEXT: addss %xmm0, %xmm0
382 ; CHECK-LIBCALL-NEXT: .LBB10_3:
383 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
384 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
385 ; CHECK-LIBCALL-NEXT: movw %ax, (%rbx)
386 ; CHECK-LIBCALL-NEXT: popq %rbx
387 ; CHECK-LIBCALL-NEXT: retq
389 ; BWON-F16C-LABEL: test_uitofp_i64:
390 ; BWON-F16C: # %bb.0:
391 ; BWON-F16C-NEXT: testq %rdi, %rdi
392 ; BWON-F16C-NEXT: js .LBB10_1
393 ; BWON-F16C-NEXT: # %bb.2:
394 ; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
395 ; BWON-F16C-NEXT: jmp .LBB10_3
396 ; BWON-F16C-NEXT: .LBB10_1:
397 ; BWON-F16C-NEXT: movq %rdi, %rax
398 ; BWON-F16C-NEXT: shrq %rax
399 ; BWON-F16C-NEXT: andl $1, %edi
400 ; BWON-F16C-NEXT: orq %rax, %rdi
401 ; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
402 ; BWON-F16C-NEXT: vaddss %xmm0, %xmm0, %xmm0
403 ; BWON-F16C-NEXT: .LBB10_3:
404 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
405 ; BWON-F16C-NEXT: vmovd %xmm0, %eax
406 ; BWON-F16C-NEXT: movw %ax, (%rsi)
407 ; BWON-F16C-NEXT: retq
409 ; CHECK-I686-LABEL: test_uitofp_i64:
410 ; CHECK-I686: # %bb.0:
411 ; CHECK-I686-NEXT: pushl %esi
412 ; CHECK-I686-NEXT: subl $24, %esp
413 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi
414 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
415 ; CHECK-I686-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
416 ; CHECK-I686-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
417 ; CHECK-I686-NEXT: shrl $31, %eax
418 ; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp)
419 ; CHECK-I686-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
420 ; CHECK-I686-NEXT: fstps (%esp)
421 ; CHECK-I686-NEXT: calll __truncsfhf2
422 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
423 ; CHECK-I686-NEXT: movw %ax, (%esi)
424 ; CHECK-I686-NEXT: addl $24, %esp
425 ; CHECK-I686-NEXT: popl %esi
426 ; CHECK-I686-NEXT: retl
427 %r = uitofp i64 %a to half
428 store half %r, ptr %p
432 define <4 x float> @test_extend32_vec4(ptr %p) #0 {
433 ; CHECK-LIBCALL-LABEL: test_extend32_vec4:
434 ; CHECK-LIBCALL: # %bb.0:
435 ; CHECK-LIBCALL-NEXT: subq $72, %rsp
436 ; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0
437 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
438 ; CHECK-LIBCALL-NEXT: pinsrw $0, 2(%rdi), %xmm0
439 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
440 ; CHECK-LIBCALL-NEXT: pinsrw $0, 4(%rdi), %xmm0
441 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
442 ; CHECK-LIBCALL-NEXT: pinsrw $0, 6(%rdi), %xmm0
443 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
444 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
445 ; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
446 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
447 ; CHECK-LIBCALL-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
448 ; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
449 ; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
450 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
451 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
452 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
453 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
454 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
455 ; CHECK-LIBCALL-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
456 ; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
457 ; CHECK-LIBCALL-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
458 ; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0]
459 ; CHECK-LIBCALL-NEXT: addq $72, %rsp
460 ; CHECK-LIBCALL-NEXT: retq
462 ; BWON-F16C-LABEL: test_extend32_vec4:
463 ; BWON-F16C: # %bb.0:
464 ; BWON-F16C-NEXT: vcvtph2ps (%rdi), %xmm0
465 ; BWON-F16C-NEXT: retq
467 ; CHECK-I686-LABEL: test_extend32_vec4:
468 ; CHECK-I686: # %bb.0:
469 ; CHECK-I686-NEXT: pushl %esi
470 ; CHECK-I686-NEXT: subl $88, %esp
471 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
472 ; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0
473 ; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
474 ; CHECK-I686-NEXT: pinsrw $0, 6(%eax), %xmm0
475 ; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
476 ; CHECK-I686-NEXT: pinsrw $0, 4(%eax), %xmm0
477 ; CHECK-I686-NEXT: pinsrw $0, 2(%eax), %xmm1
478 ; CHECK-I686-NEXT: pextrw $0, %xmm1, %eax
479 ; CHECK-I686-NEXT: movw %ax, (%esp)
480 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi
481 ; CHECK-I686-NEXT: calll __extendhfsf2
482 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
483 ; CHECK-I686-NEXT: movw %si, (%esp)
484 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
485 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi
486 ; CHECK-I686-NEXT: calll __extendhfsf2
487 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
488 ; CHECK-I686-NEXT: movw %si, (%esp)
489 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
490 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi
491 ; CHECK-I686-NEXT: calll __extendhfsf2
492 ; CHECK-I686-NEXT: movw %si, (%esp)
493 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
494 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
495 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
496 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
497 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
498 ; CHECK-I686-NEXT: calll __extendhfsf2
499 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
500 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
501 ; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
502 ; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
503 ; CHECK-I686-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
504 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
505 ; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
506 ; CHECK-I686-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
507 ; CHECK-I686-NEXT: addl $88, %esp
508 ; CHECK-I686-NEXT: popl %esi
509 ; CHECK-I686-NEXT: retl
510 %a = load <4 x half>, ptr %p, align 8
511 %b = fpext <4 x half> %a to <4 x float>
515 define <4 x double> @test_extend64_vec4(ptr %p) #0 {
516 ; CHECK-LIBCALL-LABEL: test_extend64_vec4:
517 ; CHECK-LIBCALL: # %bb.0:
518 ; CHECK-LIBCALL-NEXT: subq $72, %rsp
519 ; CHECK-LIBCALL-NEXT: pinsrw $0, 4(%rdi), %xmm0
520 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
521 ; CHECK-LIBCALL-NEXT: pinsrw $0, 6(%rdi), %xmm0
522 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
523 ; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0
524 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
525 ; CHECK-LIBCALL-NEXT: pinsrw $0, 2(%rdi), %xmm0
526 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
527 ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0
528 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
529 ; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
530 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
531 ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0
532 ; CHECK-LIBCALL-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
533 ; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0]
534 ; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
535 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
536 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
537 ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0
538 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
539 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
540 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
541 ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm1
542 ; CHECK-LIBCALL-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
543 ; CHECK-LIBCALL-NEXT: # xmm1 = xmm1[0],mem[0]
544 ; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
545 ; CHECK-LIBCALL-NEXT: addq $72, %rsp
546 ; CHECK-LIBCALL-NEXT: retq
548 ; BWON-F16C-LABEL: test_extend64_vec4:
549 ; BWON-F16C: # %bb.0:
550 ; BWON-F16C-NEXT: vcvtph2ps (%rdi), %xmm0
551 ; BWON-F16C-NEXT: vcvtps2pd %xmm0, %ymm0
552 ; BWON-F16C-NEXT: retq
554 ; CHECK-I686-LABEL: test_extend64_vec4:
555 ; CHECK-I686: # %bb.0:
556 ; CHECK-I686-NEXT: pushl %esi
557 ; CHECK-I686-NEXT: subl $104, %esp
558 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
559 ; CHECK-I686-NEXT: pinsrw $0, 6(%eax), %xmm0
560 ; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
561 ; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0
562 ; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
563 ; CHECK-I686-NEXT: pinsrw $0, 2(%eax), %xmm0
564 ; CHECK-I686-NEXT: pinsrw $0, 4(%eax), %xmm1
565 ; CHECK-I686-NEXT: pextrw $0, %xmm1, %eax
566 ; CHECK-I686-NEXT: movw %ax, (%esp)
567 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi
568 ; CHECK-I686-NEXT: calll __extendhfsf2
569 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
570 ; CHECK-I686-NEXT: movw %si, (%esp)
571 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
572 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi
573 ; CHECK-I686-NEXT: calll __extendhfsf2
574 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
575 ; CHECK-I686-NEXT: movw %si, (%esp)
576 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
577 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi
578 ; CHECK-I686-NEXT: calll __extendhfsf2
579 ; CHECK-I686-NEXT: movw %si, (%esp)
580 ; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp)
581 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
582 ; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp)
583 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
584 ; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp)
585 ; CHECK-I686-NEXT: calll __extendhfsf2
586 ; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp)
587 ; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
588 ; CHECK-I686-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
589 ; CHECK-I686-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
590 ; CHECK-I686-NEXT: movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
591 ; CHECK-I686-NEXT: addl $104, %esp
592 ; CHECK-I686-NEXT: popl %esi
593 ; CHECK-I686-NEXT: retl
594 %a = load <4 x half>, ptr %p, align 8
595 %b = fpext <4 x half> %a to <4 x double>
599 define void @test_trunc32_vec4(<4 x float> %a, ptr %p) #0 {
600 ; CHECK-LIBCALL-LABEL: test_trunc32_vec4:
601 ; CHECK-LIBCALL: # %bb.0:
602 ; CHECK-LIBCALL-NEXT: pushq %rbx
603 ; CHECK-LIBCALL-NEXT: subq $64, %rsp
604 ; CHECK-LIBCALL-NEXT: movq %rdi, %rbx
605 ; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
606 ; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
607 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
608 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
609 ; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
610 ; CHECK-LIBCALL-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
611 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
612 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
613 ; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
614 ; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
615 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
616 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
617 ; CHECK-LIBCALL-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
618 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
619 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
620 ; CHECK-LIBCALL-NEXT: movw %ax, (%rbx)
621 ; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
622 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
623 ; CHECK-LIBCALL-NEXT: movw %ax, 6(%rbx)
624 ; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
625 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
626 ; CHECK-LIBCALL-NEXT: movw %ax, 4(%rbx)
627 ; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
628 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
629 ; CHECK-LIBCALL-NEXT: movw %ax, 2(%rbx)
630 ; CHECK-LIBCALL-NEXT: addq $64, %rsp
631 ; CHECK-LIBCALL-NEXT: popq %rbx
632 ; CHECK-LIBCALL-NEXT: retq
634 ; BWON-F16C-LABEL: test_trunc32_vec4:
635 ; BWON-F16C: # %bb.0:
636 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, (%rdi)
637 ; BWON-F16C-NEXT: retq
639 ; CHECK-I686-LABEL: test_trunc32_vec4:
640 ; CHECK-I686: # %bb.0:
641 ; CHECK-I686-NEXT: pushl %esi
642 ; CHECK-I686-NEXT: subl $88, %esp
643 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
644 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi
645 ; CHECK-I686-NEXT: movaps %xmm0, %xmm1
646 ; CHECK-I686-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
647 ; CHECK-I686-NEXT: movss %xmm1, (%esp)
648 ; CHECK-I686-NEXT: calll __truncsfhf2
649 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
650 ; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
651 ; CHECK-I686-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
652 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
653 ; CHECK-I686-NEXT: calll __truncsfhf2
654 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
655 ; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
656 ; CHECK-I686-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
657 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
658 ; CHECK-I686-NEXT: calll __truncsfhf2
659 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
660 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
661 ; CHECK-I686-NEXT: movd %xmm0, (%esp)
662 ; CHECK-I686-NEXT: calll __truncsfhf2
663 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
664 ; CHECK-I686-NEXT: movw %ax, (%esi)
665 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
666 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
667 ; CHECK-I686-NEXT: movw %ax, 6(%esi)
668 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
669 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
670 ; CHECK-I686-NEXT: movw %ax, 4(%esi)
671 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
672 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
673 ; CHECK-I686-NEXT: movw %ax, 2(%esi)
674 ; CHECK-I686-NEXT: addl $88, %esp
675 ; CHECK-I686-NEXT: popl %esi
676 ; CHECK-I686-NEXT: retl
677 %v = fptrunc <4 x float> %a to <4 x half>
678 store <4 x half> %v, ptr %p
682 define void @test_trunc64_vec4(<4 x double> %a, ptr %p) #0 {
683 ; CHECK-LIBCALL-LABEL: test_trunc64_vec4:
684 ; CHECK-LIBCALL: # %bb.0:
685 ; CHECK-LIBCALL-NEXT: pushq %rbx
686 ; CHECK-LIBCALL-NEXT: subq $64, %rsp
687 ; CHECK-LIBCALL-NEXT: movq %rdi, %rbx
688 ; CHECK-LIBCALL-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
689 ; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
690 ; CHECK-LIBCALL-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
691 ; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT
692 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
693 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
694 ; CHECK-LIBCALL-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
695 ; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT
696 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
697 ; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
698 ; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT
699 ; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
700 ; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
701 ; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT
702 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
703 ; CHECK-LIBCALL-NEXT: movw %ax, 4(%rbx)
704 ; CHECK-LIBCALL-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
705 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
706 ; CHECK-LIBCALL-NEXT: movw %ax, (%rbx)
707 ; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
708 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
709 ; CHECK-LIBCALL-NEXT: movw %ax, 6(%rbx)
710 ; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
711 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
712 ; CHECK-LIBCALL-NEXT: movw %ax, 2(%rbx)
713 ; CHECK-LIBCALL-NEXT: addq $64, %rsp
714 ; CHECK-LIBCALL-NEXT: popq %rbx
715 ; CHECK-LIBCALL-NEXT: retq
717 ; BWON-F16C-LABEL: test_trunc64_vec4:
718 ; BWON-F16C: # %bb.0:
719 ; BWON-F16C-NEXT: pushq %rbx
720 ; BWON-F16C-NEXT: subq $64, %rsp
721 ; BWON-F16C-NEXT: movq %rdi, %rbx
722 ; BWON-F16C-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
723 ; BWON-F16C-NEXT: vextractf128 $1, %ymm0, %xmm0
724 ; BWON-F16C-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
725 ; BWON-F16C-NEXT: vzeroupper
726 ; BWON-F16C-NEXT: callq __truncdfhf2@PLT
727 ; BWON-F16C-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
728 ; BWON-F16C-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
729 ; BWON-F16C-NEXT: # xmm0 = mem[1,0]
730 ; BWON-F16C-NEXT: callq __truncdfhf2@PLT
731 ; BWON-F16C-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
732 ; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
733 ; BWON-F16C-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
734 ; BWON-F16C-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
735 ; BWON-F16C-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
736 ; BWON-F16C-NEXT: vzeroupper
737 ; BWON-F16C-NEXT: callq __truncdfhf2@PLT
738 ; BWON-F16C-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
739 ; BWON-F16C-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
740 ; BWON-F16C-NEXT: # xmm0 = mem[1,0]
741 ; BWON-F16C-NEXT: callq __truncdfhf2@PLT
742 ; BWON-F16C-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
743 ; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
744 ; BWON-F16C-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
745 ; BWON-F16C-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
746 ; BWON-F16C-NEXT: vmovq %xmm0, (%rbx)
747 ; BWON-F16C-NEXT: addq $64, %rsp
748 ; BWON-F16C-NEXT: popq %rbx
749 ; BWON-F16C-NEXT: retq
751 ; CHECK-I686-LABEL: test_trunc64_vec4:
752 ; CHECK-I686: # %bb.0:
753 ; CHECK-I686-NEXT: pushl %esi
754 ; CHECK-I686-NEXT: subl $88, %esp
755 ; CHECK-I686-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
756 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
757 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi
758 ; CHECK-I686-NEXT: movlps %xmm0, (%esp)
759 ; CHECK-I686-NEXT: calll __truncdfhf2
760 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
761 ; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
762 ; CHECK-I686-NEXT: movhps %xmm0, (%esp)
763 ; CHECK-I686-NEXT: calll __truncdfhf2
764 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
765 ; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
766 ; CHECK-I686-NEXT: movlps %xmm0, (%esp)
767 ; CHECK-I686-NEXT: calll __truncdfhf2
768 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
769 ; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
770 ; CHECK-I686-NEXT: movhps %xmm0, (%esp)
771 ; CHECK-I686-NEXT: calll __truncdfhf2
772 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
773 ; CHECK-I686-NEXT: movw %ax, 6(%esi)
774 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
775 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
776 ; CHECK-I686-NEXT: movw %ax, 4(%esi)
777 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
778 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
779 ; CHECK-I686-NEXT: movw %ax, 2(%esi)
780 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
781 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
782 ; CHECK-I686-NEXT: movw %ax, (%esi)
783 ; CHECK-I686-NEXT: addl $88, %esp
784 ; CHECK-I686-NEXT: popl %esi
785 ; CHECK-I686-NEXT: retl
786 %v = fptrunc <4 x double> %a to <4 x half>
787 store <4 x half> %v, ptr %p
791 declare float @test_floatret();
793 ; On i686, if SSE2 is available, the return value from test_floatret is loaded
794 ; to f80 and then rounded to f32. The DAG combiner should not combine this
795 ; fp_round and the subsequent fptrunc from float to half.
796 define half @test_f80trunc_nodagcombine() #0 {
797 ; CHECK-LIBCALL-LABEL: test_f80trunc_nodagcombine:
798 ; CHECK-LIBCALL: # %bb.0:
799 ; CHECK-LIBCALL-NEXT: pushq %rax
800 ; CHECK-LIBCALL-NEXT: callq test_floatret@PLT
801 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
802 ; CHECK-LIBCALL-NEXT: popq %rax
803 ; CHECK-LIBCALL-NEXT: retq
805 ; BWON-F16C-LABEL: test_f80trunc_nodagcombine:
806 ; BWON-F16C: # %bb.0:
807 ; BWON-F16C-NEXT: pushq %rax
808 ; BWON-F16C-NEXT: callq test_floatret@PLT
809 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
810 ; BWON-F16C-NEXT: vmovd %xmm0, %eax
811 ; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
812 ; BWON-F16C-NEXT: popq %rax
813 ; BWON-F16C-NEXT: retq
815 ; CHECK-I686-LABEL: test_f80trunc_nodagcombine:
816 ; CHECK-I686: # %bb.0:
817 ; CHECK-I686-NEXT: subl $12, %esp
818 ; CHECK-I686-NEXT: calll test_floatret@PLT
819 ; CHECK-I686-NEXT: fstps (%esp)
820 ; CHECK-I686-NEXT: calll __truncsfhf2
821 ; CHECK-I686-NEXT: addl $12, %esp
822 ; CHECK-I686-NEXT: retl
823 %1 = call float @test_floatret()
824 %2 = fptrunc float %1 to half
831 define float @test_sitofp_fadd_i32(i32 %a, ptr %b) #0 {
832 ; CHECK-LIBCALL-LABEL: test_sitofp_fadd_i32:
833 ; CHECK-LIBCALL: # %bb.0:
834 ; CHECK-LIBCALL-NEXT: subq $40, %rsp
835 ; CHECK-LIBCALL-NEXT: pinsrw $0, (%rsi), %xmm0
836 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
837 ; CHECK-LIBCALL-NEXT: xorps %xmm0, %xmm0
838 ; CHECK-LIBCALL-NEXT: cvtsi2ss %edi, %xmm0
839 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
840 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
841 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
842 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
843 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
844 ; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
845 ; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero
846 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
847 ; CHECK-LIBCALL-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
848 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
849 ; CHECK-LIBCALL-NEXT: addq $40, %rsp
850 ; CHECK-LIBCALL-NEXT: jmp __extendhfsf2@PLT # TAILCALL
852 ; BWON-F16C-LABEL: test_sitofp_fadd_i32:
853 ; BWON-F16C: # %bb.0:
854 ; BWON-F16C-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
855 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
856 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
857 ; BWON-F16C-NEXT: movzwl (%rsi), %eax
858 ; BWON-F16C-NEXT: vmovd %eax, %xmm1
859 ; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
860 ; BWON-F16C-NEXT: vaddss %xmm0, %xmm1, %xmm0
861 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
862 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
863 ; BWON-F16C-NEXT: retq
865 ; CHECK-I686-LABEL: test_sitofp_fadd_i32:
866 ; CHECK-I686: # %bb.0:
867 ; CHECK-I686-NEXT: subl $60, %esp
868 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
869 ; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0
870 ; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
871 ; CHECK-I686-NEXT: xorps %xmm0, %xmm0
872 ; CHECK-I686-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
873 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
874 ; CHECK-I686-NEXT: calll __truncsfhf2
875 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
876 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
877 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
878 ; CHECK-I686-NEXT: movw %ax, (%esp)
879 ; CHECK-I686-NEXT: calll __extendhfsf2
880 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
881 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
882 ; CHECK-I686-NEXT: movw %ax, (%esp)
883 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
884 ; CHECK-I686-NEXT: calll __extendhfsf2
885 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
886 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
887 ; CHECK-I686-NEXT: addss {{[0-9]+}}(%esp), %xmm0
888 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
889 ; CHECK-I686-NEXT: calll __truncsfhf2
890 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
891 ; CHECK-I686-NEXT: movw %ax, (%esp)
892 ; CHECK-I686-NEXT: calll __extendhfsf2
893 ; CHECK-I686-NEXT: addl $60, %esp
894 ; CHECK-I686-NEXT: retl
895 %tmp0 = load half, ptr %b
896 %tmp1 = sitofp i32 %a to half
897 %tmp2 = fadd half %tmp0, %tmp1
898 %tmp3 = fpext half %tmp2 to float
902 define half @PR40273(half) #0 {
903 ; CHECK-LIBCALL-LABEL: PR40273:
904 ; CHECK-LIBCALL: # %bb.0:
905 ; CHECK-LIBCALL-NEXT: pushq %rax
906 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
907 ; CHECK-LIBCALL-NEXT: xorl %eax, %eax
908 ; CHECK-LIBCALL-NEXT: xorps %xmm1, %xmm1
909 ; CHECK-LIBCALL-NEXT: ucomiss %xmm1, %xmm0
910 ; CHECK-LIBCALL-NEXT: movl $15360, %ecx # imm = 0x3C00
911 ; CHECK-LIBCALL-NEXT: cmovnel %ecx, %eax
912 ; CHECK-LIBCALL-NEXT: cmovpl %ecx, %eax
913 ; CHECK-LIBCALL-NEXT: pinsrw $0, %eax, %xmm0
914 ; CHECK-LIBCALL-NEXT: popq %rax
915 ; CHECK-LIBCALL-NEXT: retq
917 ; BWON-F16C-LABEL: PR40273:
918 ; BWON-F16C: # %bb.0:
919 ; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax
920 ; BWON-F16C-NEXT: vmovd %eax, %xmm0
921 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
922 ; BWON-F16C-NEXT: xorl %eax, %eax
923 ; BWON-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
924 ; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0
925 ; BWON-F16C-NEXT: movl $15360, %ecx # imm = 0x3C00
926 ; BWON-F16C-NEXT: cmovnel %ecx, %eax
927 ; BWON-F16C-NEXT: cmovpl %ecx, %eax
928 ; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
929 ; BWON-F16C-NEXT: retq
931 ; CHECK-I686-LABEL: PR40273:
932 ; CHECK-I686: # %bb.0:
933 ; CHECK-I686-NEXT: subl $12, %esp
934 ; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
935 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
936 ; CHECK-I686-NEXT: movw %ax, (%esp)
937 ; CHECK-I686-NEXT: calll __extendhfsf2
938 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
939 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
940 ; CHECK-I686-NEXT: xorl %eax, %eax
941 ; CHECK-I686-NEXT: xorps %xmm1, %xmm1
942 ; CHECK-I686-NEXT: ucomiss %xmm1, %xmm0
943 ; CHECK-I686-NEXT: movl $15360, %ecx # imm = 0x3C00
944 ; CHECK-I686-NEXT: cmovnel %ecx, %eax
945 ; CHECK-I686-NEXT: cmovpl %ecx, %eax
946 ; CHECK-I686-NEXT: pinsrw $0, %eax, %xmm0
947 ; CHECK-I686-NEXT: addl $12, %esp
948 ; CHECK-I686-NEXT: retl
949 %2 = fcmp une half %0, 0xH0000
950 %3 = uitofp i1 %2 to half
954 define void @brcond(half %0) #0 {
955 ; CHECK-LIBCALL-LABEL: brcond:
956 ; CHECK-LIBCALL: # %bb.0: # %entry
957 ; CHECK-LIBCALL-NEXT: pushq %rax
958 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
959 ; CHECK-LIBCALL-NEXT: xorps %xmm1, %xmm1
960 ; CHECK-LIBCALL-NEXT: ucomiss %xmm1, %xmm0
961 ; CHECK-LIBCALL-NEXT: setp %al
962 ; CHECK-LIBCALL-NEXT: setne %cl
963 ; CHECK-LIBCALL-NEXT: orb %al, %cl
964 ; CHECK-LIBCALL-NEXT: jne .LBB18_2
965 ; CHECK-LIBCALL-NEXT: # %bb.1: # %if.then
966 ; CHECK-LIBCALL-NEXT: popq %rax
967 ; CHECK-LIBCALL-NEXT: retq
968 ; CHECK-LIBCALL-NEXT: .LBB18_2: # %if.end
970 ; BWON-F16C-LABEL: brcond:
971 ; BWON-F16C: # %bb.0: # %entry
972 ; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax
973 ; BWON-F16C-NEXT: vmovd %eax, %xmm0
974 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
975 ; BWON-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
976 ; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0
977 ; BWON-F16C-NEXT: setp %al
978 ; BWON-F16C-NEXT: setne %cl
979 ; BWON-F16C-NEXT: orb %al, %cl
980 ; BWON-F16C-NEXT: jne .LBB18_2
981 ; BWON-F16C-NEXT: # %bb.1: # %if.then
982 ; BWON-F16C-NEXT: retq
983 ; BWON-F16C-NEXT: .LBB18_2: # %if.end
985 ; CHECK-I686-LABEL: brcond:
986 ; CHECK-I686: # %bb.0: # %entry
987 ; CHECK-I686-NEXT: subl $12, %esp
988 ; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
989 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
990 ; CHECK-I686-NEXT: movw %ax, (%esp)
991 ; CHECK-I686-NEXT: calll __extendhfsf2
992 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
993 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
994 ; CHECK-I686-NEXT: xorps %xmm1, %xmm1
995 ; CHECK-I686-NEXT: ucomiss %xmm1, %xmm0
996 ; CHECK-I686-NEXT: setp %al
997 ; CHECK-I686-NEXT: setne %cl
998 ; CHECK-I686-NEXT: orb %al, %cl
999 ; CHECK-I686-NEXT: jne .LBB18_2
1000 ; CHECK-I686-NEXT: # %bb.1: # %if.then
1001 ; CHECK-I686-NEXT: addl $12, %esp
1002 ; CHECK-I686-NEXT: retl
1003 ; CHECK-I686-NEXT: .LBB18_2: # %if.end
1005 %cmp = fcmp oeq half 0xH0000, %0
1006 br i1 %cmp, label %if.then, label %if.end
1008 if.then: ; preds = %entry
1011 if.end: ; preds = %entry
1015 define half @test_sqrt(half %0) #0 {
1016 ; CHECK-LIBCALL-LABEL: test_sqrt:
1017 ; CHECK-LIBCALL: # %bb.0: # %entry
1018 ; CHECK-LIBCALL-NEXT: pushq %rax
1019 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1020 ; CHECK-LIBCALL-NEXT: sqrtss %xmm0, %xmm0
1021 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
1022 ; CHECK-LIBCALL-NEXT: popq %rax
1023 ; CHECK-LIBCALL-NEXT: retq
1025 ; BWON-F16C-LABEL: test_sqrt:
1026 ; BWON-F16C: # %bb.0: # %entry
1027 ; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax
1028 ; BWON-F16C-NEXT: vmovd %eax, %xmm0
1029 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
1030 ; BWON-F16C-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
1031 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1032 ; BWON-F16C-NEXT: vmovd %xmm0, %eax
1033 ; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
1034 ; BWON-F16C-NEXT: retq
1036 ; CHECK-I686-LABEL: test_sqrt:
1037 ; CHECK-I686: # %bb.0: # %entry
1038 ; CHECK-I686-NEXT: subl $12, %esp
1039 ; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
1040 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
1041 ; CHECK-I686-NEXT: movw %ax, (%esp)
1042 ; CHECK-I686-NEXT: calll __extendhfsf2
1043 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1044 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1045 ; CHECK-I686-NEXT: sqrtss %xmm0, %xmm0
1046 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
1047 ; CHECK-I686-NEXT: calll __truncsfhf2
1048 ; CHECK-I686-NEXT: addl $12, %esp
1049 ; CHECK-I686-NEXT: retl
1051 %1 = call half @llvm.sqrt.f16(half %0)
1055 declare half @llvm.sqrt.f16(half)
1057 define void @main.158() #0 {
1058 ; CHECK-LIBCALL-LABEL: main.158:
1059 ; CHECK-LIBCALL: # %bb.0: # %entry
1060 ; CHECK-LIBCALL-NEXT: pushq %rax
1061 ; CHECK-LIBCALL-NEXT: xorps %xmm0, %xmm0
1062 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
1063 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1064 ; CHECK-LIBCALL-NEXT: movss {{.*#+}} xmm1 = [8.0E+0,0.0E+0,0.0E+0,0.0E+0]
1065 ; CHECK-LIBCALL-NEXT: ucomiss %xmm0, %xmm1
1066 ; CHECK-LIBCALL-NEXT: xorps %xmm0, %xmm0
1067 ; CHECK-LIBCALL-NEXT: jae .LBB20_2
1068 ; CHECK-LIBCALL-NEXT: # %bb.1: # %entry
1069 ; CHECK-LIBCALL-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
1070 ; CHECK-LIBCALL-NEXT: .LBB20_2: # %entry
1071 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
1072 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
1073 ; CHECK-LIBCALL-NEXT: movw %ax, (%rax)
1074 ; CHECK-LIBCALL-NEXT: popq %rax
1075 ; CHECK-LIBCALL-NEXT: retq
1077 ; BWON-F16C-LABEL: main.158:
1078 ; BWON-F16C: # %bb.0: # %entry
1079 ; BWON-F16C-NEXT: vxorps %xmm0, %xmm0, %xmm0
1080 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm1
1081 ; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
1082 ; BWON-F16C-NEXT: vmovss {{.*#+}} xmm2 = [8.0E+0,0.0E+0,0.0E+0,0.0E+0]
1083 ; BWON-F16C-NEXT: vucomiss %xmm1, %xmm2
1084 ; BWON-F16C-NEXT: jae .LBB20_2
1085 ; BWON-F16C-NEXT: # %bb.1: # %entry
1086 ; BWON-F16C-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
1087 ; BWON-F16C-NEXT: .LBB20_2: # %entry
1088 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1089 ; BWON-F16C-NEXT: vmovd %xmm0, %eax
1090 ; BWON-F16C-NEXT: movw %ax, (%rax)
1091 ; BWON-F16C-NEXT: retq
1093 ; CHECK-I686-LABEL: main.158:
1094 ; CHECK-I686: # %bb.0: # %entry
1095 ; CHECK-I686-NEXT: subl $12, %esp
1096 ; CHECK-I686-NEXT: movl $0, (%esp)
1097 ; CHECK-I686-NEXT: calll __truncsfhf2
1098 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
1099 ; CHECK-I686-NEXT: movw %ax, (%esp)
1100 ; CHECK-I686-NEXT: calll __extendhfsf2
1101 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1102 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = [8.0E+0,0.0E+0,0.0E+0,0.0E+0]
1103 ; CHECK-I686-NEXT: ucomiss {{[0-9]+}}(%esp), %xmm0
1104 ; CHECK-I686-NEXT: xorps %xmm0, %xmm0
1105 ; CHECK-I686-NEXT: jae .LBB20_2
1106 ; CHECK-I686-NEXT: # %bb.1: # %entry
1107 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
1108 ; CHECK-I686-NEXT: .LBB20_2: # %entry
1109 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
1110 ; CHECK-I686-NEXT: calll __truncsfhf2
1111 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
1112 ; CHECK-I686-NEXT: movw %ax, (%eax)
1113 ; CHECK-I686-NEXT: addl $12, %esp
1114 ; CHECK-I686-NEXT: retl
1116 %0 = tail call half @llvm.fabs.f16(half undef)
1117 %1 = fpext half %0 to float
1118 %compare.2 = fcmp ole half %0, 0xH4800
1119 %multiply.95 = fmul float %1, 5.000000e-01
1120 %add.82 = fadd float %multiply.95, -2.000000e+00
1121 %multiply.68 = fmul float %add.82, 0.000000e+00
1122 %subtract.65 = fsub float %multiply.68, 0.000000e+00
1123 %multiply.57 = fmul float undef, 0.000000e+00
1124 %2 = select i1 %compare.2, float 0.000000e+00, float %multiply.57
1125 %3 = fptrunc float %2 to half
1126 store half %3, ptr undef, align 2
1130 define void @main.45() #0 {
1131 ; CHECK-LIBCALL-LABEL: main.45:
1132 ; CHECK-LIBCALL: # %bb.0: # %entry
1133 ; CHECK-LIBCALL-NEXT: pushq %rbp
1134 ; CHECK-LIBCALL-NEXT: pushq %r15
1135 ; CHECK-LIBCALL-NEXT: pushq %r14
1136 ; CHECK-LIBCALL-NEXT: pushq %rbx
1137 ; CHECK-LIBCALL-NEXT: pushq %rax
1138 ; CHECK-LIBCALL-NEXT: pinsrw $0, (%rax), %xmm0
1139 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
1140 ; CHECK-LIBCALL-NEXT: movd %eax, %xmm1
1141 ; CHECK-LIBCALL-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1142 ; CHECK-LIBCALL-NEXT: movq %xmm1, %rbx
1143 ; CHECK-LIBCALL-NEXT: movq %rbx, %r14
1144 ; CHECK-LIBCALL-NEXT: shrq $48, %r14
1145 ; CHECK-LIBCALL-NEXT: movq %rbx, %r15
1146 ; CHECK-LIBCALL-NEXT: shrq $32, %r15
1147 ; CHECK-LIBCALL-NEXT: movl %ebx, %ebp
1148 ; CHECK-LIBCALL-NEXT: shrl $16, %ebp
1149 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1150 ; CHECK-LIBCALL-NEXT: ucomiss %xmm0, %xmm0
1151 ; CHECK-LIBCALL-NEXT: movl $32256, %eax # imm = 0x7E00
1152 ; CHECK-LIBCALL-NEXT: cmovpl %eax, %ebp
1153 ; CHECK-LIBCALL-NEXT: cmovpl %eax, %r15d
1154 ; CHECK-LIBCALL-NEXT: cmovpl %eax, %r14d
1155 ; CHECK-LIBCALL-NEXT: cmovpl %eax, %ebx
1156 ; CHECK-LIBCALL-NEXT: movw %bx, (%rax)
1157 ; CHECK-LIBCALL-NEXT: movw %r14w, (%rax)
1158 ; CHECK-LIBCALL-NEXT: movw %r15w, (%rax)
1159 ; CHECK-LIBCALL-NEXT: movw %bp, (%rax)
1160 ; CHECK-LIBCALL-NEXT: addq $8, %rsp
1161 ; CHECK-LIBCALL-NEXT: popq %rbx
1162 ; CHECK-LIBCALL-NEXT: popq %r14
1163 ; CHECK-LIBCALL-NEXT: popq %r15
1164 ; CHECK-LIBCALL-NEXT: popq %rbp
1165 ; CHECK-LIBCALL-NEXT: retq
1167 ; BWON-F16C-LABEL: main.45:
1168 ; BWON-F16C: # %bb.0: # %entry
1169 ; BWON-F16C-NEXT: vpinsrw $0, (%rax), %xmm0, %xmm0
1170 ; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax
1171 ; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1172 ; BWON-F16C-NEXT: vmovd %eax, %xmm1
1173 ; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1174 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
1175 ; BWON-F16C-NEXT: vxorps %xmm2, %xmm2, %xmm2
1176 ; BWON-F16C-NEXT: vcmpunordps %xmm2, %xmm0, %xmm0
1177 ; BWON-F16C-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
1178 ; BWON-F16C-NEXT: vpblendvb %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
1179 ; BWON-F16C-NEXT: vmovq %xmm0, (%rax)
1180 ; BWON-F16C-NEXT: retq
1182 ; CHECK-I686-LABEL: main.45:
1183 ; CHECK-I686: # %bb.0: # %entry
1184 ; CHECK-I686-NEXT: pushl %edi
1185 ; CHECK-I686-NEXT: pushl %esi
1186 ; CHECK-I686-NEXT: subl $20, %esp
1187 ; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0
1188 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
1189 ; CHECK-I686-NEXT: movd %eax, %xmm0
1190 ; CHECK-I686-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1191 ; CHECK-I686-NEXT: movd %xmm0, %esi
1192 ; CHECK-I686-NEXT: movl %esi, %edi
1193 ; CHECK-I686-NEXT: shrl $16, %edi
1194 ; CHECK-I686-NEXT: movw %ax, (%esp)
1195 ; CHECK-I686-NEXT: calll __extendhfsf2
1196 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1197 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1198 ; CHECK-I686-NEXT: ucomiss %xmm0, %xmm0
1199 ; CHECK-I686-NEXT: movl $32256, %eax # imm = 0x7E00
1200 ; CHECK-I686-NEXT: cmovpl %eax, %esi
1201 ; CHECK-I686-NEXT: cmovpl %eax, %edi
1202 ; CHECK-I686-NEXT: movw %di, (%eax)
1203 ; CHECK-I686-NEXT: movw %si, (%eax)
1204 ; CHECK-I686-NEXT: addl $20, %esp
1205 ; CHECK-I686-NEXT: popl %esi
1206 ; CHECK-I686-NEXT: popl %edi
1207 ; CHECK-I686-NEXT: retl
1209 %0 = load half, ptr undef, align 8
1210 %1 = bitcast half %0 to i16
1211 %broadcast.splatinsert = insertelement <4 x half> poison, half %0, i64 0
1212 %broadcast.splat = shufflevector <4 x half> %broadcast.splatinsert, <4 x half> poison, <4 x i32> zeroinitializer
1213 %broadcast.splatinsert13 = insertelement <4 x i16> poison, i16 %1, i64 0
1214 %broadcast.splat14 = shufflevector <4 x i16> %broadcast.splatinsert13, <4 x i16> poison, <4 x i32> zeroinitializer
1215 %2 = fcmp uno <4 x half> %broadcast.splat, zeroinitializer
1216 %3 = add <4 x i16> zeroinitializer, %broadcast.splat14
1217 %4 = select i1 undef, <4 x i16> undef, <4 x i16> %3
1218 %5 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> %4
1219 %6 = bitcast <4 x i16> %5 to <4 x half>
1220 %7 = select <4 x i1> %2, <4 x half> <half 0xH7E00, half 0xH7E00, half 0xH7E00, half 0xH7E00>, <4 x half> %6
1221 store <4 x half> %7, ptr undef, align 16
1225 define half @fcopysign(half %x, half %y) {
1226 ; CHECK-LIBCALL-LABEL: fcopysign:
1227 ; CHECK-LIBCALL: # %bb.0:
1228 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm1, %eax
1229 ; CHECK-LIBCALL-NEXT: andl $-32768, %eax # imm = 0x8000
1230 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %ecx
1231 ; CHECK-LIBCALL-NEXT: andl $32767, %ecx # imm = 0x7FFF
1232 ; CHECK-LIBCALL-NEXT: orl %eax, %ecx
1233 ; CHECK-LIBCALL-NEXT: pinsrw $0, %ecx, %xmm0
1234 ; CHECK-LIBCALL-NEXT: retq
1236 ; BWON-F16C-LABEL: fcopysign:
1237 ; BWON-F16C: # %bb.0:
1238 ; BWON-F16C-NEXT: vpextrw $0, %xmm1, %eax
1239 ; BWON-F16C-NEXT: andl $-32768, %eax # imm = 0x8000
1240 ; BWON-F16C-NEXT: vpextrw $0, %xmm0, %ecx
1241 ; BWON-F16C-NEXT: andl $32767, %ecx # imm = 0x7FFF
1242 ; BWON-F16C-NEXT: orl %eax, %ecx
1243 ; BWON-F16C-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0
1244 ; BWON-F16C-NEXT: retq
1246 ; CHECK-I686-LABEL: fcopysign:
1247 ; CHECK-I686: # %bb.0:
1248 ; CHECK-I686-NEXT: movl $-32768, %eax # imm = 0x8000
1249 ; CHECK-I686-NEXT: andl {{[0-9]+}}(%esp), %eax
1250 ; CHECK-I686-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
1251 ; CHECK-I686-NEXT: andl $32767, %ecx # imm = 0x7FFF
1252 ; CHECK-I686-NEXT: orl %eax, %ecx
1253 ; CHECK-I686-NEXT: pinsrw $0, %ecx, %xmm0
1254 ; CHECK-I686-NEXT: retl
1255 %a = call half @llvm.copysign.f16(half %x, half %y)
1259 declare half @llvm.fabs.f16(half)
1260 declare half @llvm.copysign.f16(half, half)
1262 define <8 x half> @select(i1 %c, <8 x half> %x, <8 x half> %y) {
1263 ; CHECK-LIBCALL-LABEL: select:
1264 ; CHECK-LIBCALL: # %bb.0:
1265 ; CHECK-LIBCALL-NEXT: testb $1, %dil
1266 ; CHECK-LIBCALL-NEXT: jne .LBB23_2
1267 ; CHECK-LIBCALL-NEXT: # %bb.1:
1268 ; CHECK-LIBCALL-NEXT: movaps %xmm1, %xmm0
1269 ; CHECK-LIBCALL-NEXT: .LBB23_2:
1270 ; CHECK-LIBCALL-NEXT: retq
1272 ; BWON-F16C-LABEL: select:
1273 ; BWON-F16C: # %bb.0:
1274 ; BWON-F16C-NEXT: testb $1, %dil
1275 ; BWON-F16C-NEXT: jne .LBB23_2
1276 ; BWON-F16C-NEXT: # %bb.1:
1277 ; BWON-F16C-NEXT: vmovaps %xmm1, %xmm0
1278 ; BWON-F16C-NEXT: .LBB23_2:
1279 ; BWON-F16C-NEXT: retq
1281 ; CHECK-I686-LABEL: select:
1282 ; CHECK-I686: # %bb.0:
1283 ; CHECK-I686-NEXT: testb $1, {{[0-9]+}}(%esp)
1284 ; CHECK-I686-NEXT: jne .LBB23_2
1285 ; CHECK-I686-NEXT: # %bb.1:
1286 ; CHECK-I686-NEXT: movaps %xmm1, %xmm0
1287 ; CHECK-I686-NEXT: .LBB23_2:
1288 ; CHECK-I686-NEXT: retl
1289 %s = select i1 %c, <8 x half> %x, <8 x half> %y
1293 define <8 x half> @shuffle(ptr %p) {
1294 ; CHECK-LIBCALL-LABEL: shuffle:
1295 ; CHECK-LIBCALL: # %bb.0:
1296 ; CHECK-LIBCALL-NEXT: movdqu (%rdi), %xmm0
1297 ; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
1298 ; CHECK-LIBCALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
1299 ; CHECK-LIBCALL-NEXT: retq
1301 ; BWON-F16C-LABEL: shuffle:
1302 ; BWON-F16C: # %bb.0:
1303 ; BWON-F16C-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,4,4,4,4]
1304 ; BWON-F16C-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
1305 ; BWON-F16C-NEXT: retq
1307 ; CHECK-I686-LABEL: shuffle:
1308 ; CHECK-I686: # %bb.0:
1309 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
1310 ; CHECK-I686-NEXT: movdqu (%eax), %xmm0
1311 ; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
1312 ; CHECK-I686-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
1313 ; CHECK-I686-NEXT: retl
1314 %1 = load <8 x half>, ptr %p, align 8
1315 %2 = shufflevector <8 x half> %1, <8 x half> poison, <8 x i32> <i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1319 declare half @llvm.minnum.f16(half, half)
1321 define half @pr61271(half %0, half %1) #0 {
1322 ; CHECK-LIBCALL-LABEL: pr61271:
1323 ; CHECK-LIBCALL: # %bb.0:
1324 ; CHECK-LIBCALL-NEXT: pushq %rax
1325 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1326 ; CHECK-LIBCALL-NEXT: movaps %xmm1, %xmm0
1327 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1328 ; CHECK-LIBCALL-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
1329 ; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1330 ; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero
1331 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1332 ; CHECK-LIBCALL-NEXT: minss (%rsp), %xmm0 # 4-byte Folded Reload
1333 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
1334 ; CHECK-LIBCALL-NEXT: popq %rax
1335 ; CHECK-LIBCALL-NEXT: retq
1337 ; BWON-F16C-LABEL: pr61271:
1338 ; BWON-F16C: # %bb.0:
1339 ; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax
1340 ; BWON-F16C-NEXT: vpextrw $0, %xmm1, %ecx
1341 ; BWON-F16C-NEXT: vmovd %ecx, %xmm0
1342 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
1343 ; BWON-F16C-NEXT: vmovd %eax, %xmm1
1344 ; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
1345 ; BWON-F16C-NEXT: vminss %xmm0, %xmm1, %xmm0
1346 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1347 ; BWON-F16C-NEXT: vmovd %xmm0, %eax
1348 ; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
1349 ; BWON-F16C-NEXT: retq
1351 ; CHECK-I686-LABEL: pr61271:
1352 ; CHECK-I686: # %bb.0:
1353 ; CHECK-I686-NEXT: subl $44, %esp
1354 ; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
1355 ; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
1356 ; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
1357 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
1358 ; CHECK-I686-NEXT: movw %ax, (%esp)
1359 ; CHECK-I686-NEXT: calll __extendhfsf2
1360 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1361 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
1362 ; CHECK-I686-NEXT: movw %ax, (%esp)
1363 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1364 ; CHECK-I686-NEXT: calll __extendhfsf2
1365 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1366 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1367 ; CHECK-I686-NEXT: minss {{[0-9]+}}(%esp), %xmm0
1368 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
1369 ; CHECK-I686-NEXT: calll __truncsfhf2
1370 ; CHECK-I686-NEXT: addl $44, %esp
1371 ; CHECK-I686-NEXT: retl
1372 %3 = call fast half @llvm.minnum.f16(half %0, half %1)
1376 declare <8 x half> @llvm.maxnum.v8f16(<8 x half>, <8 x half>)
1378 define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 {
1379 ; CHECK-LIBCALL-LABEL: maxnum_v8f16:
1380 ; CHECK-LIBCALL: # %bb.0:
1381 ; CHECK-LIBCALL-NEXT: subq $184, %rsp
1382 ; CHECK-LIBCALL-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1383 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1384 ; CHECK-LIBCALL-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1385 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1386 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1387 ; CHECK-LIBCALL-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1388 ; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1389 ; CHECK-LIBCALL-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1390 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1391 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1392 ; CHECK-LIBCALL-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1393 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1394 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1395 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1396 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1397 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1398 ; CHECK-LIBCALL-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1399 ; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1400 ; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero
1401 ; CHECK-LIBCALL-NEXT: ja .LBB26_2
1402 ; CHECK-LIBCALL-NEXT: # %bb.1:
1403 ; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1404 ; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero
1405 ; CHECK-LIBCALL-NEXT: .LBB26_2:
1406 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
1407 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1408 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1409 ; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1410 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1411 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1412 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1413 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1414 ; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1415 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1416 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1417 ; CHECK-LIBCALL-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
1418 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1419 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1420 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1421 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1422 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1423 ; CHECK-LIBCALL-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1424 ; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1425 ; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero
1426 ; CHECK-LIBCALL-NEXT: ja .LBB26_4
1427 ; CHECK-LIBCALL-NEXT: # %bb.3:
1428 ; CHECK-LIBCALL-NEXT: movss (%rsp), %xmm0 # 4-byte Reload
1429 ; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero
1430 ; CHECK-LIBCALL-NEXT: .LBB26_4:
1431 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
1432 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1433 ; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1434 ; CHECK-LIBCALL-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1435 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1436 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1437 ; CHECK-LIBCALL-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1438 ; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1439 ; CHECK-LIBCALL-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1440 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
1441 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1442 ; CHECK-LIBCALL-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1443 ; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
1444 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1445 ; CHECK-LIBCALL-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
1446 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1447 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1448 ; CHECK-LIBCALL-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
1449 ; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1450 ; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero
1451 ; CHECK-LIBCALL-NEXT: ja .LBB26_6
1452 ; CHECK-LIBCALL-NEXT: # %bb.5:
1453 ; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1454 ; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero
1455 ; CHECK-LIBCALL-NEXT: .LBB26_6:
1456 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
1457 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1458 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1459 ; CHECK-LIBCALL-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
1460 ; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1461 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1462 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1463 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1464 ; CHECK-LIBCALL-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
1465 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1466 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1467 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1468 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1469 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1470 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1471 ; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
1472 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1473 ; CHECK-LIBCALL-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1474 ; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1475 ; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero
1476 ; CHECK-LIBCALL-NEXT: ja .LBB26_8
1477 ; CHECK-LIBCALL-NEXT: # %bb.7:
1478 ; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1479 ; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero
1480 ; CHECK-LIBCALL-NEXT: .LBB26_8:
1481 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
1482 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1483 ; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1484 ; CHECK-LIBCALL-NEXT: psrlq $48, %xmm0
1485 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1486 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1487 ; CHECK-LIBCALL-NEXT: movd %xmm0, (%rsp) # 4-byte Folded Spill
1488 ; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1489 ; CHECK-LIBCALL-NEXT: psrlq $48, %xmm0
1490 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1491 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1492 ; CHECK-LIBCALL-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1493 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1494 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1495 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1496 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1497 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1498 ; CHECK-LIBCALL-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1499 ; CHECK-LIBCALL-NEXT: movss (%rsp), %xmm0 # 4-byte Reload
1500 ; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero
1501 ; CHECK-LIBCALL-NEXT: ja .LBB26_10
1502 ; CHECK-LIBCALL-NEXT: # %bb.9:
1503 ; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1504 ; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero
1505 ; CHECK-LIBCALL-NEXT: .LBB26_10:
1506 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
1507 ; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1508 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1509 ; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1510 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1511 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1512 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1513 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1514 ; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1515 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1516 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1517 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1518 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1519 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1520 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1521 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1522 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1523 ; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1524 ; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero
1525 ; CHECK-LIBCALL-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1526 ; CHECK-LIBCALL-NEXT: ja .LBB26_12
1527 ; CHECK-LIBCALL-NEXT: # %bb.11:
1528 ; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1529 ; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero
1530 ; CHECK-LIBCALL-NEXT: .LBB26_12:
1531 ; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1532 ; CHECK-LIBCALL-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1533 ; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
1534 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1535 ; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1536 ; CHECK-LIBCALL-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1537 ; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
1538 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1539 ; CHECK-LIBCALL-NEXT: movaps %xmm1, %xmm0
1540 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
1541 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1542 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1543 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1544 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1545 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1546 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1547 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1548 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1549 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1550 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1551 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1552 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1553 ; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1554 ; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero
1555 ; CHECK-LIBCALL-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1556 ; CHECK-LIBCALL-NEXT: ja .LBB26_14
1557 ; CHECK-LIBCALL-NEXT: # %bb.13:
1558 ; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1559 ; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero
1560 ; CHECK-LIBCALL-NEXT: .LBB26_14:
1561 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1562 ; CHECK-LIBCALL-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1563 ; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
1564 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1565 ; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1566 ; CHECK-LIBCALL-NEXT: punpcklwd (%rsp), %xmm0 # 16-byte Folded Reload
1567 ; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
1568 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1569 ; CHECK-LIBCALL-NEXT: movaps %xmm1, %xmm0
1570 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
1571 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1572 ; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1573 ; CHECK-LIBCALL-NEXT: psrld $16, %xmm0
1574 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1575 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1576 ; CHECK-LIBCALL-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1577 ; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1578 ; CHECK-LIBCALL-NEXT: psrld $16, %xmm0
1579 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1580 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1581 ; CHECK-LIBCALL-NEXT: movd %xmm0, (%rsp) # 4-byte Folded Spill
1582 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1583 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1584 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1585 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1586 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1587 ; CHECK-LIBCALL-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1588 ; CHECK-LIBCALL-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1589 ; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero
1590 ; CHECK-LIBCALL-NEXT: ja .LBB26_16
1591 ; CHECK-LIBCALL-NEXT: # %bb.15:
1592 ; CHECK-LIBCALL-NEXT: movd (%rsp), %xmm0 # 4-byte Folded Reload
1593 ; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero
1594 ; CHECK-LIBCALL-NEXT: .LBB26_16:
1595 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
1596 ; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1597 ; CHECK-LIBCALL-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1598 ; CHECK-LIBCALL-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1599 ; CHECK-LIBCALL-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
1600 ; CHECK-LIBCALL-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1601 ; CHECK-LIBCALL-NEXT: # xmm1 = xmm1[0],mem[0]
1602 ; CHECK-LIBCALL-NEXT: movdqa %xmm1, %xmm0
1603 ; CHECK-LIBCALL-NEXT: addq $184, %rsp
1604 ; CHECK-LIBCALL-NEXT: retq
1606 ; BWON-F16C-LABEL: maxnum_v8f16:
1607 ; BWON-F16C: # %bb.0:
1608 ; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1609 ; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2
1610 ; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1611 ; BWON-F16C-NEXT: vcvtph2ps %xmm3, %xmm3
1612 ; BWON-F16C-NEXT: vucomiss %xmm2, %xmm3
1613 ; BWON-F16C-NEXT: ja .LBB26_2
1614 ; BWON-F16C-NEXT: # %bb.1:
1615 ; BWON-F16C-NEXT: vmovaps %xmm2, %xmm3
1616 ; BWON-F16C-NEXT: .LBB26_2:
1617 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm3, %xmm2
1618 ; BWON-F16C-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[3,3,3,3]
1619 ; BWON-F16C-NEXT: vcvtph2ps %xmm3, %xmm3
1620 ; BWON-F16C-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[3,3,3,3]
1621 ; BWON-F16C-NEXT: vcvtph2ps %xmm4, %xmm4
1622 ; BWON-F16C-NEXT: vucomiss %xmm3, %xmm4
1623 ; BWON-F16C-NEXT: ja .LBB26_4
1624 ; BWON-F16C-NEXT: # %bb.3:
1625 ; BWON-F16C-NEXT: vmovaps %xmm3, %xmm4
1626 ; BWON-F16C-NEXT: .LBB26_4:
1627 ; BWON-F16C-NEXT: vmovd %xmm2, %eax
1628 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm4, %xmm2
1629 ; BWON-F16C-NEXT: vmovd %xmm2, %ecx
1630 ; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1631 ; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm3
1632 ; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1633 ; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2
1634 ; BWON-F16C-NEXT: vucomiss %xmm3, %xmm2
1635 ; BWON-F16C-NEXT: ja .LBB26_6
1636 ; BWON-F16C-NEXT: # %bb.5:
1637 ; BWON-F16C-NEXT: vmovaps %xmm3, %xmm2
1638 ; BWON-F16C-NEXT: .LBB26_6:
1639 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm2, %xmm2
1640 ; BWON-F16C-NEXT: vmovd %xmm2, %edx
1641 ; BWON-F16C-NEXT: vshufpd {{.*#+}} xmm2 = xmm1[1,0]
1642 ; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm3
1643 ; BWON-F16C-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
1644 ; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2
1645 ; BWON-F16C-NEXT: vucomiss %xmm3, %xmm2
1646 ; BWON-F16C-NEXT: ja .LBB26_8
1647 ; BWON-F16C-NEXT: # %bb.7:
1648 ; BWON-F16C-NEXT: vmovaps %xmm3, %xmm2
1649 ; BWON-F16C-NEXT: .LBB26_8:
1650 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm2, %xmm2
1651 ; BWON-F16C-NEXT: vmovd %xmm2, %esi
1652 ; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[3,3,3,3,4,5,6,7]
1653 ; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2
1654 ; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[3,3,3,3,4,5,6,7]
1655 ; BWON-F16C-NEXT: vcvtph2ps %xmm3, %xmm6
1656 ; BWON-F16C-NEXT: vucomiss %xmm2, %xmm6
1657 ; BWON-F16C-NEXT: ja .LBB26_10
1658 ; BWON-F16C-NEXT: # %bb.9:
1659 ; BWON-F16C-NEXT: vmovaps %xmm2, %xmm6
1660 ; BWON-F16C-NEXT: .LBB26_10:
1661 ; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm2
1662 ; BWON-F16C-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm3
1663 ; BWON-F16C-NEXT: vpinsrw $0, %edx, %xmm0, %xmm4
1664 ; BWON-F16C-NEXT: vpinsrw $0, %esi, %xmm0, %xmm5
1665 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm6, %xmm6
1666 ; BWON-F16C-NEXT: vmovd %xmm6, %eax
1667 ; BWON-F16C-NEXT: vmovshdup {{.*#+}} xmm6 = xmm1[1,1,3,3]
1668 ; BWON-F16C-NEXT: vcvtph2ps %xmm6, %xmm7
1669 ; BWON-F16C-NEXT: vmovshdup {{.*#+}} xmm6 = xmm0[1,1,3,3]
1670 ; BWON-F16C-NEXT: vcvtph2ps %xmm6, %xmm6
1671 ; BWON-F16C-NEXT: vucomiss %xmm7, %xmm6
1672 ; BWON-F16C-NEXT: ja .LBB26_12
1673 ; BWON-F16C-NEXT: # %bb.11:
1674 ; BWON-F16C-NEXT: vmovaps %xmm7, %xmm6
1675 ; BWON-F16C-NEXT: .LBB26_12:
1676 ; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
1677 ; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
1678 ; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm4
1679 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm6, %xmm5
1680 ; BWON-F16C-NEXT: vmovd %xmm5, %eax
1681 ; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm5
1682 ; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm7
1683 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm6
1684 ; BWON-F16C-NEXT: vucomiss %xmm7, %xmm6
1685 ; BWON-F16C-NEXT: ja .LBB26_14
1686 ; BWON-F16C-NEXT: # %bb.13:
1687 ; BWON-F16C-NEXT: vmovaps %xmm7, %xmm6
1688 ; BWON-F16C-NEXT: .LBB26_14:
1689 ; BWON-F16C-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
1690 ; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
1691 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm6, %xmm4
1692 ; BWON-F16C-NEXT: vmovd %xmm4, %eax
1693 ; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm4
1694 ; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
1695 ; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
1696 ; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
1697 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
1698 ; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0
1699 ; BWON-F16C-NEXT: ja .LBB26_16
1700 ; BWON-F16C-NEXT: # %bb.15:
1701 ; BWON-F16C-NEXT: vmovaps %xmm1, %xmm0
1702 ; BWON-F16C-NEXT: .LBB26_16:
1703 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1704 ; BWON-F16C-NEXT: vmovd %xmm0, %eax
1705 ; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
1706 ; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
1707 ; BWON-F16C-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
1708 ; BWON-F16C-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
1709 ; BWON-F16C-NEXT: retq
1711 ; CHECK-I686-LABEL: maxnum_v8f16:
1712 ; CHECK-I686: # %bb.0:
1713 ; CHECK-I686-NEXT: pushl %ebx
1714 ; CHECK-I686-NEXT: pushl %edi
1715 ; CHECK-I686-NEXT: pushl %esi
1716 ; CHECK-I686-NEXT: subl $336, %esp # imm = 0x150
1717 ; CHECK-I686-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
1718 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
1719 ; CHECK-I686-NEXT: movaps %xmm1, %xmm0
1720 ; CHECK-I686-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[1,1]
1721 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %edi
1722 ; CHECK-I686-NEXT: movw %di, (%esp)
1723 ; CHECK-I686-NEXT: calll __extendhfsf2
1724 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1725 ; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1726 ; CHECK-I686-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1727 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi
1728 ; CHECK-I686-NEXT: movw %si, (%esp)
1729 ; CHECK-I686-NEXT: calll __extendhfsf2
1730 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1731 ; CHECK-I686-NEXT: movw %di, (%esp)
1732 ; CHECK-I686-NEXT: calll __extendhfsf2
1733 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1734 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1735 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %edi
1736 ; CHECK-I686-NEXT: movw %di, (%esp)
1737 ; CHECK-I686-NEXT: calll __extendhfsf2
1738 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1739 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1740 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %ebx
1741 ; CHECK-I686-NEXT: movw %bx, (%esp)
1742 ; CHECK-I686-NEXT: calll __extendhfsf2
1743 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1744 ; CHECK-I686-NEXT: movw %di, (%esp)
1745 ; CHECK-I686-NEXT: calll __extendhfsf2
1746 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1747 ; CHECK-I686-NEXT: movw %bx, (%esp)
1748 ; CHECK-I686-NEXT: calll __extendhfsf2
1749 ; CHECK-I686-NEXT: movw %si, (%esp)
1750 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1751 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1752 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1753 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1754 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1755 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1756 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1757 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1758 ; CHECK-I686-NEXT: ucomiss {{[0-9]+}}(%esp), %xmm0
1759 ; CHECK-I686-NEXT: ja .LBB26_1
1760 ; CHECK-I686-NEXT: # %bb.2:
1761 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1762 ; CHECK-I686-NEXT: jmp .LBB26_3
1763 ; CHECK-I686-NEXT: .LBB26_1:
1764 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1765 ; CHECK-I686-NEXT: .LBB26_3:
1766 ; CHECK-I686-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1767 ; CHECK-I686-NEXT: calll __extendhfsf2
1768 ; CHECK-I686-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
1769 ; CHECK-I686-NEXT: # xmm0 = mem[0],zero,zero,zero
1770 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
1771 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1772 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1773 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1774 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1775 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1776 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1777 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1778 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1779 ; CHECK-I686-NEXT: ucomiss {{[0-9]+}}(%esp), %xmm0
1780 ; CHECK-I686-NEXT: ja .LBB26_4
1781 ; CHECK-I686-NEXT: # %bb.5:
1782 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1783 ; CHECK-I686-NEXT: jmp .LBB26_6
1784 ; CHECK-I686-NEXT: .LBB26_4:
1785 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1786 ; CHECK-I686-NEXT: .LBB26_6:
1787 ; CHECK-I686-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1788 ; CHECK-I686-NEXT: calll __truncsfhf2
1789 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
1790 ; CHECK-I686-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
1791 ; CHECK-I686-NEXT: # xmm0 = mem[0],zero,zero,zero
1792 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
1793 ; CHECK-I686-NEXT: calll __truncsfhf2
1794 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
1795 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1796 ; CHECK-I686-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1]
1797 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %edi
1798 ; CHECK-I686-NEXT: movw %di, (%esp)
1799 ; CHECK-I686-NEXT: calll __extendhfsf2
1800 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1801 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1802 ; CHECK-I686-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1]
1803 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi
1804 ; CHECK-I686-NEXT: movw %si, (%esp)
1805 ; CHECK-I686-NEXT: calll __extendhfsf2
1806 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1807 ; CHECK-I686-NEXT: movw %di, (%esp)
1808 ; CHECK-I686-NEXT: calll __extendhfsf2
1809 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1810 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1811 ; CHECK-I686-NEXT: psrlq $48, %xmm0
1812 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %edi
1813 ; CHECK-I686-NEXT: movw %di, (%esp)
1814 ; CHECK-I686-NEXT: calll __extendhfsf2
1815 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1816 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1817 ; CHECK-I686-NEXT: psrlq $48, %xmm0
1818 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %ebx
1819 ; CHECK-I686-NEXT: movw %bx, (%esp)
1820 ; CHECK-I686-NEXT: calll __extendhfsf2
1821 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1822 ; CHECK-I686-NEXT: movw %di, (%esp)
1823 ; CHECK-I686-NEXT: calll __extendhfsf2
1824 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1825 ; CHECK-I686-NEXT: movw %bx, (%esp)
1826 ; CHECK-I686-NEXT: calll __extendhfsf2
1827 ; CHECK-I686-NEXT: movw %si, (%esp)
1828 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1829 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1830 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1831 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1832 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1833 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1834 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1835 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1836 ; CHECK-I686-NEXT: ucomiss {{[0-9]+}}(%esp), %xmm0
1837 ; CHECK-I686-NEXT: ja .LBB26_7
1838 ; CHECK-I686-NEXT: # %bb.8:
1839 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1840 ; CHECK-I686-NEXT: jmp .LBB26_9
1841 ; CHECK-I686-NEXT: .LBB26_7:
1842 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1843 ; CHECK-I686-NEXT: .LBB26_9:
1844 ; CHECK-I686-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1845 ; CHECK-I686-NEXT: calll __extendhfsf2
1846 ; CHECK-I686-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
1847 ; CHECK-I686-NEXT: # xmm0 = mem[0],zero,zero,zero
1848 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
1849 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1850 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1851 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1852 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1853 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1854 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1855 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1856 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1857 ; CHECK-I686-NEXT: ucomiss {{[0-9]+}}(%esp), %xmm0
1858 ; CHECK-I686-NEXT: ja .LBB26_10
1859 ; CHECK-I686-NEXT: # %bb.11:
1860 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1861 ; CHECK-I686-NEXT: jmp .LBB26_12
1862 ; CHECK-I686-NEXT: .LBB26_10:
1863 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1864 ; CHECK-I686-NEXT: .LBB26_12:
1865 ; CHECK-I686-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1866 ; CHECK-I686-NEXT: calll __truncsfhf2
1867 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
1868 ; CHECK-I686-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
1869 ; CHECK-I686-NEXT: # xmm0 = mem[0],zero,zero,zero
1870 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
1871 ; CHECK-I686-NEXT: calll __truncsfhf2
1872 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
1873 ; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1874 ; CHECK-I686-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1875 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %edi
1876 ; CHECK-I686-NEXT: movw %di, (%esp)
1877 ; CHECK-I686-NEXT: calll __extendhfsf2
1878 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1879 ; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1880 ; CHECK-I686-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1881 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi
1882 ; CHECK-I686-NEXT: movw %si, (%esp)
1883 ; CHECK-I686-NEXT: calll __extendhfsf2
1884 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1885 ; CHECK-I686-NEXT: movw %di, (%esp)
1886 ; CHECK-I686-NEXT: calll __extendhfsf2
1887 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1888 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1889 ; CHECK-I686-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1890 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %edi
1891 ; CHECK-I686-NEXT: movw %di, (%esp)
1892 ; CHECK-I686-NEXT: calll __extendhfsf2
1893 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1894 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1895 ; CHECK-I686-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1896 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %ebx
1897 ; CHECK-I686-NEXT: movw %bx, (%esp)
1898 ; CHECK-I686-NEXT: calll __extendhfsf2
1899 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1900 ; CHECK-I686-NEXT: movw %di, (%esp)
1901 ; CHECK-I686-NEXT: calll __extendhfsf2
1902 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1903 ; CHECK-I686-NEXT: movw %bx, (%esp)
1904 ; CHECK-I686-NEXT: calll __extendhfsf2
1905 ; CHECK-I686-NEXT: movw %si, (%esp)
1906 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1907 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1908 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1909 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1910 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1911 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1912 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1913 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1914 ; CHECK-I686-NEXT: ucomiss {{[0-9]+}}(%esp), %xmm0
1915 ; CHECK-I686-NEXT: ja .LBB26_13
1916 ; CHECK-I686-NEXT: # %bb.14:
1917 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1918 ; CHECK-I686-NEXT: jmp .LBB26_15
1919 ; CHECK-I686-NEXT: .LBB26_13:
1920 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1921 ; CHECK-I686-NEXT: .LBB26_15:
1922 ; CHECK-I686-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1923 ; CHECK-I686-NEXT: calll __extendhfsf2
1924 ; CHECK-I686-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
1925 ; CHECK-I686-NEXT: # xmm0 = mem[0],zero,zero,zero
1926 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
1927 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1928 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1929 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1930 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1931 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1932 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1933 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1934 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1935 ; CHECK-I686-NEXT: ucomiss {{[0-9]+}}(%esp), %xmm0
1936 ; CHECK-I686-NEXT: ja .LBB26_16
1937 ; CHECK-I686-NEXT: # %bb.17:
1938 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1939 ; CHECK-I686-NEXT: jmp .LBB26_18
1940 ; CHECK-I686-NEXT: .LBB26_16:
1941 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1942 ; CHECK-I686-NEXT: .LBB26_18:
1943 ; CHECK-I686-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1944 ; CHECK-I686-NEXT: calll __truncsfhf2
1945 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
1946 ; CHECK-I686-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
1947 ; CHECK-I686-NEXT: # xmm0 = mem[0],zero,zero,zero
1948 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
1949 ; CHECK-I686-NEXT: calll __truncsfhf2
1950 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
1951 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1952 ; CHECK-I686-NEXT: psrld $16, %xmm0
1953 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %edi
1954 ; CHECK-I686-NEXT: movw %di, (%esp)
1955 ; CHECK-I686-NEXT: calll __extendhfsf2
1956 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1957 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1958 ; CHECK-I686-NEXT: psrld $16, %xmm0
1959 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi
1960 ; CHECK-I686-NEXT: movw %si, (%esp)
1961 ; CHECK-I686-NEXT: calll __extendhfsf2
1962 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1963 ; CHECK-I686-NEXT: movw %di, (%esp)
1964 ; CHECK-I686-NEXT: calll __extendhfsf2
1965 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1966 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1967 ; CHECK-I686-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1968 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %edi
1969 ; CHECK-I686-NEXT: movw %di, (%esp)
1970 ; CHECK-I686-NEXT: calll __extendhfsf2
1971 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1972 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1973 ; CHECK-I686-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1974 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %ebx
1975 ; CHECK-I686-NEXT: movw %bx, (%esp)
1976 ; CHECK-I686-NEXT: calll __extendhfsf2
1977 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1978 ; CHECK-I686-NEXT: movw %di, (%esp)
1979 ; CHECK-I686-NEXT: calll __extendhfsf2
1980 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1981 ; CHECK-I686-NEXT: movw %bx, (%esp)
1982 ; CHECK-I686-NEXT: calll __extendhfsf2
1983 ; CHECK-I686-NEXT: movw %si, (%esp)
1984 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1985 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1986 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1987 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1988 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1989 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1990 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1991 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1992 ; CHECK-I686-NEXT: ucomiss {{[0-9]+}}(%esp), %xmm0
1993 ; CHECK-I686-NEXT: ja .LBB26_19
1994 ; CHECK-I686-NEXT: # %bb.20:
1995 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1996 ; CHECK-I686-NEXT: jmp .LBB26_21
1997 ; CHECK-I686-NEXT: .LBB26_19:
1998 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1999 ; CHECK-I686-NEXT: .LBB26_21:
2000 ; CHECK-I686-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2001 ; CHECK-I686-NEXT: calll __extendhfsf2
2002 ; CHECK-I686-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
2003 ; CHECK-I686-NEXT: # xmm0 = mem[0],zero,zero,zero
2004 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
2005 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
2006 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
2007 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
2008 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
2009 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
2010 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
2011 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
2012 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2013 ; CHECK-I686-NEXT: ucomiss {{[0-9]+}}(%esp), %xmm0
2014 ; CHECK-I686-NEXT: ja .LBB26_22
2015 ; CHECK-I686-NEXT: # %bb.23:
2016 ; CHECK-I686-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2017 ; CHECK-I686-NEXT: jmp .LBB26_24
2018 ; CHECK-I686-NEXT: .LBB26_22:
2019 ; CHECK-I686-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2020 ; CHECK-I686-NEXT: .LBB26_24:
2021 ; CHECK-I686-NEXT: movd %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
2022 ; CHECK-I686-NEXT: calll __truncsfhf2
2023 ; CHECK-I686-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 4-byte Reload
2024 ; CHECK-I686-NEXT: # xmm1 = mem[0],zero,zero,zero
2025 ; CHECK-I686-NEXT: movss %xmm1, (%esp)
2026 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
2027 ; CHECK-I686-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2028 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
2029 ; CHECK-I686-NEXT: punpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2030 ; CHECK-I686-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
2031 ; CHECK-I686-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2032 ; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
2033 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
2034 ; CHECK-I686-NEXT: punpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2035 ; CHECK-I686-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
2036 ; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
2037 ; CHECK-I686-NEXT: calll __truncsfhf2
2038 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
2039 ; CHECK-I686-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2040 ; CHECK-I686-NEXT: punpckldq {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2041 ; CHECK-I686-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
2042 ; CHECK-I686-NEXT: punpcklqdq {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2043 ; CHECK-I686-NEXT: # xmm1 = xmm1[0],mem[0]
2044 ; CHECK-I686-NEXT: movdqa %xmm1, %xmm0
2045 ; CHECK-I686-NEXT: addl $336, %esp # imm = 0x150
2046 ; CHECK-I686-NEXT: popl %esi
2047 ; CHECK-I686-NEXT: popl %edi
2048 ; CHECK-I686-NEXT: popl %ebx
2049 ; CHECK-I686-NEXT: retl
2050 %3 = call fast <8 x half> @llvm.maxnum.v8f16(<8 x half> %0, <8 x half> %1)
2054 define void @pr63114() {
2055 ; CHECK-LIBCALL-LABEL: pr63114:
2056 ; CHECK-LIBCALL: # %bb.0:
2057 ; CHECK-LIBCALL-NEXT: movdqu (%rax), %xmm4
2058 ; CHECK-LIBCALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm4[0,1,3,3,4,5,6,7]
2059 ; CHECK-LIBCALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,1]
2060 ; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,0,65535,65535,65535,65535]
2061 ; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm0
2062 ; CHECK-LIBCALL-NEXT: movq {{.*#+}} xmm2 = [0,0,0,15360,0,0,0,0]
2063 ; CHECK-LIBCALL-NEXT: por %xmm2, %xmm0
2064 ; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm3 = [65535,65535,65535,65535,65535,65535,65535,0]
2065 ; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm0
2066 ; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm5 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60]
2067 ; CHECK-LIBCALL-NEXT: por %xmm5, %xmm0
2068 ; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm6 = xmm4[0,1,2,3,4,5,7,7]
2069 ; CHECK-LIBCALL-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,2,2,3]
2070 ; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm6
2071 ; CHECK-LIBCALL-NEXT: por %xmm2, %xmm6
2072 ; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm6
2073 ; CHECK-LIBCALL-NEXT: por %xmm5, %xmm6
2074 ; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm7 = xmm4[0,1,2,3,5,5,5,5]
2075 ; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,3,0,3]
2076 ; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,5,5,5,5]
2077 ; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm4
2078 ; CHECK-LIBCALL-NEXT: por %xmm2, %xmm4
2079 ; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm4
2080 ; CHECK-LIBCALL-NEXT: por %xmm5, %xmm4
2081 ; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm7
2082 ; CHECK-LIBCALL-NEXT: por %xmm2, %xmm7
2083 ; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm7
2084 ; CHECK-LIBCALL-NEXT: por %xmm5, %xmm7
2085 ; CHECK-LIBCALL-NEXT: movdqu %xmm7, 0
2086 ; CHECK-LIBCALL-NEXT: movdqu %xmm4, 32
2087 ; CHECK-LIBCALL-NEXT: movdqu %xmm6, 48
2088 ; CHECK-LIBCALL-NEXT: movdqu %xmm0, 16
2089 ; CHECK-LIBCALL-NEXT: retq
2091 ; BWON-F16C-LABEL: pr63114:
2092 ; BWON-F16C: # %bb.0:
2093 ; BWON-F16C-NEXT: vmovdqu (%rax), %xmm0
2094 ; BWON-F16C-NEXT: vpsrld $16, %xmm0, %xmm1
2095 ; BWON-F16C-NEXT: vbroadcastss (%rax), %xmm2
2096 ; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2097 ; BWON-F16C-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,0],xmm3[0,0]
2098 ; BWON-F16C-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
2099 ; BWON-F16C-NEXT: vpsllq $48, %xmm3, %xmm4
2100 ; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[3],xmm2[4,5,6,7]
2101 ; BWON-F16C-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
2102 ; BWON-F16C-NEXT: vpor %xmm3, %xmm2, %xmm2
2103 ; BWON-F16C-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,3],xmm1[2,0]
2104 ; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[3],xmm1[4,5,6,7]
2105 ; BWON-F16C-NEXT: vpor %xmm3, %xmm1, %xmm1
2106 ; BWON-F16C-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
2107 ; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,1,3,3,4,5,6,7]
2108 ; BWON-F16C-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,1]
2109 ; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[3],xmm2[4,5,6,7]
2110 ; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm3[7]
2111 ; BWON-F16C-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
2112 ; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[3],xmm0[4,5,6,7]
2113 ; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm3[7]
2114 ; BWON-F16C-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2115 ; BWON-F16C-NEXT: vmovups %ymm0, 0
2116 ; BWON-F16C-NEXT: vmovups %ymm1, 32
2117 ; BWON-F16C-NEXT: vzeroupper
2118 ; BWON-F16C-NEXT: retq
2120 ; CHECK-I686-LABEL: pr63114:
2121 ; CHECK-I686: # %bb.0:
2122 ; CHECK-I686-NEXT: movdqu (%eax), %xmm6
2123 ; CHECK-I686-NEXT: pshuflw {{.*#+}} xmm0 = xmm6[0,1,3,3,4,5,6,7]
2124 ; CHECK-I686-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,1]
2125 ; CHECK-I686-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,0,65535,65535,65535,65535]
2126 ; CHECK-I686-NEXT: pand %xmm1, %xmm0
2127 ; CHECK-I686-NEXT: movq {{.*#+}} xmm2 = [0,0,0,15360,0,0,0,0]
2128 ; CHECK-I686-NEXT: por %xmm2, %xmm0
2129 ; CHECK-I686-NEXT: movdqa {{.*#+}} xmm3 = [65535,65535,65535,65535,65535,65535,65535,0]
2130 ; CHECK-I686-NEXT: pand %xmm3, %xmm0
2131 ; CHECK-I686-NEXT: movdqa {{.*#+}} xmm4 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60]
2132 ; CHECK-I686-NEXT: por %xmm4, %xmm0
2133 ; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm5 = xmm6[0,1,2,3,4,5,7,7]
2134 ; CHECK-I686-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3]
2135 ; CHECK-I686-NEXT: pand %xmm1, %xmm5
2136 ; CHECK-I686-NEXT: por %xmm2, %xmm5
2137 ; CHECK-I686-NEXT: pand %xmm3, %xmm5
2138 ; CHECK-I686-NEXT: por %xmm4, %xmm5
2139 ; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm7 = xmm6[0,1,2,3,5,5,5,5]
2140 ; CHECK-I686-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,3,0,3]
2141 ; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,5,5,5,5]
2142 ; CHECK-I686-NEXT: pand %xmm1, %xmm6
2143 ; CHECK-I686-NEXT: por %xmm2, %xmm6
2144 ; CHECK-I686-NEXT: pand %xmm3, %xmm6
2145 ; CHECK-I686-NEXT: por %xmm4, %xmm6
2146 ; CHECK-I686-NEXT: pand %xmm1, %xmm7
2147 ; CHECK-I686-NEXT: por %xmm2, %xmm7
2148 ; CHECK-I686-NEXT: pand %xmm3, %xmm7
2149 ; CHECK-I686-NEXT: por %xmm4, %xmm7
2150 ; CHECK-I686-NEXT: movdqu %xmm7, 0
2151 ; CHECK-I686-NEXT: movdqu %xmm6, 32
2152 ; CHECK-I686-NEXT: movdqu %xmm5, 48
2153 ; CHECK-I686-NEXT: movdqu %xmm0, 16
2154 ; CHECK-I686-NEXT: retl
2155 %1 = load <24 x half>, ptr poison, align 2
2156 %2 = shufflevector <24 x half> %1, <24 x half> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
2157 %3 = shufflevector <8 x half> %2, <8 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2158 %4 = shufflevector <16 x half> poison, <16 x half> %3, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
2159 store <32 x half> %4, ptr null, align 2
2163 attributes #0 = { nounwind }