1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=1 \
3 ; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWON
4 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=0 \
5 ; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWOFF
6 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+f16c -fixup-byte-word-insts=1 \
7 ; RUN: | FileCheck %s -check-prefixes=CHECK,BWON,BWON-F16C
8 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -fixup-byte-word-insts=0 \
9 ; RUN: | FileCheck %s -check-prefixes=CHECK-I686
11 define void @test_load_store(ptr %in, ptr %out) #0 {
12 ; CHECK-LIBCALL-LABEL: test_load_store:
13 ; CHECK-LIBCALL: # %bb.0:
14 ; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0
15 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
16 ; CHECK-LIBCALL-NEXT: movw %ax, (%rsi)
17 ; CHECK-LIBCALL-NEXT: retq
19 ; BWON-F16C-LABEL: test_load_store:
21 ; BWON-F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
22 ; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rsi)
23 ; BWON-F16C-NEXT: retq
25 ; CHECK-I686-LABEL: test_load_store:
26 ; CHECK-I686: # %bb.0:
27 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
28 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx
29 ; CHECK-I686-NEXT: pinsrw $0, (%ecx), %xmm0
30 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %ecx
31 ; CHECK-I686-NEXT: movw %cx, (%eax)
32 ; CHECK-I686-NEXT: retl
33 %val = load half, ptr %in
34 store half %val, ptr %out
38 define i16 @test_bitcast_from_half(ptr %addr) #0 {
39 ; BWON-LABEL: test_bitcast_from_half:
41 ; BWON-NEXT: movzwl (%rdi), %eax
44 ; BWOFF-LABEL: test_bitcast_from_half:
46 ; BWOFF-NEXT: movw (%rdi), %ax
49 ; CHECK-I686-LABEL: test_bitcast_from_half:
50 ; CHECK-I686: # %bb.0:
51 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
52 ; CHECK-I686-NEXT: movw (%eax), %ax
53 ; CHECK-I686-NEXT: retl
54 %val = load half, ptr %addr
55 %val_int = bitcast half %val to i16
59 define void @test_bitcast_to_half(ptr %addr, i16 %in) #0 {
60 ; CHECK-LABEL: test_bitcast_to_half:
62 ; CHECK-NEXT: movw %si, (%rdi)
65 ; CHECK-I686-LABEL: test_bitcast_to_half:
66 ; CHECK-I686: # %bb.0:
67 ; CHECK-I686-NEXT: movw {{[0-9]+}}(%esp), %ax
68 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx
69 ; CHECK-I686-NEXT: movw %ax, (%ecx)
70 ; CHECK-I686-NEXT: retl
71 %val_fp = bitcast i16 %in to half
72 store half %val_fp, ptr %addr
76 define float @test_extend32(ptr %addr) #0 {
77 ; CHECK-LIBCALL-LABEL: test_extend32:
78 ; CHECK-LIBCALL: # %bb.0:
79 ; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0
80 ; CHECK-LIBCALL-NEXT: jmp __extendhfsf2@PLT # TAILCALL
82 ; BWON-F16C-LABEL: test_extend32:
84 ; BWON-F16C-NEXT: movzwl (%rdi), %eax
85 ; BWON-F16C-NEXT: vmovd %eax, %xmm0
86 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
87 ; BWON-F16C-NEXT: retq
89 ; CHECK-I686-LABEL: test_extend32:
90 ; CHECK-I686: # %bb.0:
91 ; CHECK-I686-NEXT: subl $12, %esp
92 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
93 ; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0
94 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
95 ; CHECK-I686-NEXT: movw %ax, (%esp)
96 ; CHECK-I686-NEXT: calll __extendhfsf2
97 ; CHECK-I686-NEXT: addl $12, %esp
98 ; CHECK-I686-NEXT: retl
99 %val16 = load half, ptr %addr
100 %val32 = fpext half %val16 to float
104 define double @test_extend64(ptr %addr) #0 {
105 ; CHECK-LIBCALL-LABEL: test_extend64:
106 ; CHECK-LIBCALL: # %bb.0:
107 ; CHECK-LIBCALL-NEXT: pushq %rax
108 ; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0
109 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
110 ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0
111 ; CHECK-LIBCALL-NEXT: popq %rax
112 ; CHECK-LIBCALL-NEXT: retq
114 ; BWON-F16C-LABEL: test_extend64:
115 ; BWON-F16C: # %bb.0:
116 ; BWON-F16C-NEXT: movzwl (%rdi), %eax
117 ; BWON-F16C-NEXT: vmovd %eax, %xmm0
118 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
119 ; BWON-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
120 ; BWON-F16C-NEXT: retq
122 ; CHECK-I686-LABEL: test_extend64:
123 ; CHECK-I686: # %bb.0:
124 ; CHECK-I686-NEXT: subl $12, %esp
125 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
126 ; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0
127 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
128 ; CHECK-I686-NEXT: movw %ax, (%esp)
129 ; CHECK-I686-NEXT: calll __extendhfsf2
130 ; CHECK-I686-NEXT: addl $12, %esp
131 ; CHECK-I686-NEXT: retl
132 %val16 = load half, ptr %addr
133 %val32 = fpext half %val16 to double
137 define void @test_trunc32(float %in, ptr %addr) #0 {
138 ; CHECK-LIBCALL-LABEL: test_trunc32:
139 ; CHECK-LIBCALL: # %bb.0:
140 ; CHECK-LIBCALL-NEXT: pushq %rbx
141 ; CHECK-LIBCALL-NEXT: movq %rdi, %rbx
142 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
143 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
144 ; CHECK-LIBCALL-NEXT: movw %ax, (%rbx)
145 ; CHECK-LIBCALL-NEXT: popq %rbx
146 ; CHECK-LIBCALL-NEXT: retq
148 ; BWON-F16C-LABEL: test_trunc32:
149 ; BWON-F16C: # %bb.0:
150 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
151 ; BWON-F16C-NEXT: vmovd %xmm0, %eax
152 ; BWON-F16C-NEXT: movw %ax, (%rdi)
153 ; BWON-F16C-NEXT: retq
155 ; CHECK-I686-LABEL: test_trunc32:
156 ; CHECK-I686: # %bb.0:
157 ; CHECK-I686-NEXT: pushl %esi
158 ; CHECK-I686-NEXT: subl $8, %esp
159 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi
160 ; CHECK-I686-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
161 ; CHECK-I686-NEXT: movd %xmm0, (%esp)
162 ; CHECK-I686-NEXT: calll __truncsfhf2
163 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
164 ; CHECK-I686-NEXT: movw %ax, (%esi)
165 ; CHECK-I686-NEXT: addl $8, %esp
166 ; CHECK-I686-NEXT: popl %esi
167 ; CHECK-I686-NEXT: retl
168 %val16 = fptrunc float %in to half
169 store half %val16, ptr %addr
173 define void @test_trunc64(double %in, ptr %addr) #0 {
174 ; CHECK-LIBCALL-LABEL: test_trunc64:
175 ; CHECK-LIBCALL: # %bb.0:
176 ; CHECK-LIBCALL-NEXT: pushq %rbx
177 ; CHECK-LIBCALL-NEXT: movq %rdi, %rbx
178 ; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT
179 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
180 ; CHECK-LIBCALL-NEXT: movw %ax, (%rbx)
181 ; CHECK-LIBCALL-NEXT: popq %rbx
182 ; CHECK-LIBCALL-NEXT: retq
184 ; BWON-F16C-LABEL: test_trunc64:
185 ; BWON-F16C: # %bb.0:
186 ; BWON-F16C-NEXT: pushq %rbx
187 ; BWON-F16C-NEXT: movq %rdi, %rbx
188 ; BWON-F16C-NEXT: callq __truncdfhf2@PLT
189 ; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
190 ; BWON-F16C-NEXT: popq %rbx
191 ; BWON-F16C-NEXT: retq
193 ; CHECK-I686-LABEL: test_trunc64:
194 ; CHECK-I686: # %bb.0:
195 ; CHECK-I686-NEXT: pushl %esi
196 ; CHECK-I686-NEXT: subl $8, %esp
197 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi
198 ; CHECK-I686-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
199 ; CHECK-I686-NEXT: movq %xmm0, (%esp)
200 ; CHECK-I686-NEXT: calll __truncdfhf2
201 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
202 ; CHECK-I686-NEXT: movw %ax, (%esi)
203 ; CHECK-I686-NEXT: addl $8, %esp
204 ; CHECK-I686-NEXT: popl %esi
205 ; CHECK-I686-NEXT: retl
206 %val16 = fptrunc double %in to half
207 store half %val16, ptr %addr
211 define i64 @test_fptosi_i64(ptr %p) #0 {
212 ; CHECK-LIBCALL-LABEL: test_fptosi_i64:
213 ; CHECK-LIBCALL: # %bb.0:
214 ; CHECK-LIBCALL-NEXT: pushq %rax
215 ; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0
216 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
217 ; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax
218 ; CHECK-LIBCALL-NEXT: popq %rcx
219 ; CHECK-LIBCALL-NEXT: retq
221 ; BWON-F16C-LABEL: test_fptosi_i64:
222 ; BWON-F16C: # %bb.0:
223 ; BWON-F16C-NEXT: movzwl (%rdi), %eax
224 ; BWON-F16C-NEXT: vmovd %eax, %xmm0
225 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
226 ; BWON-F16C-NEXT: vcvttss2si %xmm0, %rax
227 ; BWON-F16C-NEXT: retq
229 ; CHECK-I686-LABEL: test_fptosi_i64:
230 ; CHECK-I686: # %bb.0:
231 ; CHECK-I686-NEXT: subl $28, %esp
232 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
233 ; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0
234 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
235 ; CHECK-I686-NEXT: movw %ax, (%esp)
236 ; CHECK-I686-NEXT: calll __extendhfsf2
237 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
238 ; CHECK-I686-NEXT: flds {{[0-9]+}}(%esp)
239 ; CHECK-I686-NEXT: fnstcw {{[0-9]+}}(%esp)
240 ; CHECK-I686-NEXT: movzwl {{[0-9]+}}(%esp), %eax
241 ; CHECK-I686-NEXT: orl $3072, %eax # imm = 0xC00
242 ; CHECK-I686-NEXT: movw %ax, {{[0-9]+}}(%esp)
243 ; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp)
244 ; CHECK-I686-NEXT: fistpll {{[0-9]+}}(%esp)
245 ; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp)
246 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
247 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %edx
248 ; CHECK-I686-NEXT: addl $28, %esp
249 ; CHECK-I686-NEXT: retl
250 %a = load half, ptr %p, align 2
251 %r = fptosi half %a to i64
255 define void @test_sitofp_i64(i64 %a, ptr %p) #0 {
256 ; CHECK-LIBCALL-LABEL: test_sitofp_i64:
257 ; CHECK-LIBCALL: # %bb.0:
258 ; CHECK-LIBCALL-NEXT: pushq %rbx
259 ; CHECK-LIBCALL-NEXT: movq %rsi, %rbx
260 ; CHECK-LIBCALL-NEXT: cvtsi2ss %rdi, %xmm0
261 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
262 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
263 ; CHECK-LIBCALL-NEXT: movw %ax, (%rbx)
264 ; CHECK-LIBCALL-NEXT: popq %rbx
265 ; CHECK-LIBCALL-NEXT: retq
267 ; BWON-F16C-LABEL: test_sitofp_i64:
268 ; BWON-F16C: # %bb.0:
269 ; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
270 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
271 ; BWON-F16C-NEXT: vmovd %xmm0, %eax
272 ; BWON-F16C-NEXT: movw %ax, (%rsi)
273 ; BWON-F16C-NEXT: retq
275 ; CHECK-I686-LABEL: test_sitofp_i64:
276 ; CHECK-I686: # %bb.0:
277 ; CHECK-I686-NEXT: pushl %esi
278 ; CHECK-I686-NEXT: subl $24, %esp
279 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi
280 ; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
281 ; CHECK-I686-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
282 ; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp)
283 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
284 ; CHECK-I686-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
285 ; CHECK-I686-NEXT: movd %xmm0, (%esp)
286 ; CHECK-I686-NEXT: calll __truncsfhf2
287 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
288 ; CHECK-I686-NEXT: movw %ax, (%esi)
289 ; CHECK-I686-NEXT: addl $24, %esp
290 ; CHECK-I686-NEXT: popl %esi
291 ; CHECK-I686-NEXT: retl
292 %r = sitofp i64 %a to half
293 store half %r, ptr %p
297 define i64 @test_fptoui_i64(ptr %p) #0 {
298 ; CHECK-LIBCALL-LABEL: test_fptoui_i64:
299 ; CHECK-LIBCALL: # %bb.0:
300 ; CHECK-LIBCALL-NEXT: pushq %rax
301 ; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0
302 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
303 ; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rcx
304 ; CHECK-LIBCALL-NEXT: movq %rcx, %rdx
305 ; CHECK-LIBCALL-NEXT: sarq $63, %rdx
306 ; CHECK-LIBCALL-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
307 ; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax
308 ; CHECK-LIBCALL-NEXT: andq %rdx, %rax
309 ; CHECK-LIBCALL-NEXT: orq %rcx, %rax
310 ; CHECK-LIBCALL-NEXT: popq %rcx
311 ; CHECK-LIBCALL-NEXT: retq
313 ; BWON-F16C-LABEL: test_fptoui_i64:
314 ; BWON-F16C: # %bb.0:
315 ; BWON-F16C-NEXT: movzwl (%rdi), %eax
316 ; BWON-F16C-NEXT: vmovd %eax, %xmm0
317 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
318 ; BWON-F16C-NEXT: vcvttss2si %xmm0, %rcx
319 ; BWON-F16C-NEXT: movq %rcx, %rdx
320 ; BWON-F16C-NEXT: sarq $63, %rdx
321 ; BWON-F16C-NEXT: vsubss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
322 ; BWON-F16C-NEXT: vcvttss2si %xmm0, %rax
323 ; BWON-F16C-NEXT: andq %rdx, %rax
324 ; BWON-F16C-NEXT: orq %rcx, %rax
325 ; BWON-F16C-NEXT: retq
327 ; CHECK-I686-LABEL: test_fptoui_i64:
328 ; CHECK-I686: # %bb.0:
329 ; CHECK-I686-NEXT: subl $28, %esp
330 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
331 ; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0
332 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
333 ; CHECK-I686-NEXT: movw %ax, (%esp)
334 ; CHECK-I686-NEXT: calll __extendhfsf2
335 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
336 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
337 ; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
338 ; CHECK-I686-NEXT: ucomiss %xmm1, %xmm0
339 ; CHECK-I686-NEXT: jae .LBB9_2
340 ; CHECK-I686-NEXT: # %bb.1:
341 ; CHECK-I686-NEXT: xorps %xmm1, %xmm1
342 ; CHECK-I686-NEXT: .LBB9_2:
343 ; CHECK-I686-NEXT: subss %xmm1, %xmm0
344 ; CHECK-I686-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
345 ; CHECK-I686-NEXT: setae %al
346 ; CHECK-I686-NEXT: flds {{[0-9]+}}(%esp)
347 ; CHECK-I686-NEXT: fnstcw {{[0-9]+}}(%esp)
348 ; CHECK-I686-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
349 ; CHECK-I686-NEXT: orl $3072, %ecx # imm = 0xC00
350 ; CHECK-I686-NEXT: movw %cx, {{[0-9]+}}(%esp)
351 ; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp)
352 ; CHECK-I686-NEXT: fistpll {{[0-9]+}}(%esp)
353 ; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp)
354 ; CHECK-I686-NEXT: movzbl %al, %edx
355 ; CHECK-I686-NEXT: shll $31, %edx
356 ; CHECK-I686-NEXT: xorl {{[0-9]+}}(%esp), %edx
357 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
358 ; CHECK-I686-NEXT: addl $28, %esp
359 ; CHECK-I686-NEXT: retl
360 %a = load half, ptr %p, align 2
361 %r = fptoui half %a to i64
365 define void @test_uitofp_i64(i64 %a, ptr %p) #0 {
366 ; CHECK-LIBCALL-LABEL: test_uitofp_i64:
367 ; CHECK-LIBCALL: # %bb.0:
368 ; CHECK-LIBCALL-NEXT: pushq %rbx
369 ; CHECK-LIBCALL-NEXT: movq %rsi, %rbx
370 ; CHECK-LIBCALL-NEXT: testq %rdi, %rdi
371 ; CHECK-LIBCALL-NEXT: js .LBB10_1
372 ; CHECK-LIBCALL-NEXT: # %bb.2:
373 ; CHECK-LIBCALL-NEXT: cvtsi2ss %rdi, %xmm0
374 ; CHECK-LIBCALL-NEXT: jmp .LBB10_3
375 ; CHECK-LIBCALL-NEXT: .LBB10_1:
376 ; CHECK-LIBCALL-NEXT: movq %rdi, %rax
377 ; CHECK-LIBCALL-NEXT: shrq %rax
378 ; CHECK-LIBCALL-NEXT: andl $1, %edi
379 ; CHECK-LIBCALL-NEXT: orq %rax, %rdi
380 ; CHECK-LIBCALL-NEXT: cvtsi2ss %rdi, %xmm0
381 ; CHECK-LIBCALL-NEXT: addss %xmm0, %xmm0
382 ; CHECK-LIBCALL-NEXT: .LBB10_3:
383 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
384 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
385 ; CHECK-LIBCALL-NEXT: movw %ax, (%rbx)
386 ; CHECK-LIBCALL-NEXT: popq %rbx
387 ; CHECK-LIBCALL-NEXT: retq
389 ; BWON-F16C-LABEL: test_uitofp_i64:
390 ; BWON-F16C: # %bb.0:
391 ; BWON-F16C-NEXT: testq %rdi, %rdi
392 ; BWON-F16C-NEXT: js .LBB10_1
393 ; BWON-F16C-NEXT: # %bb.2:
394 ; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
395 ; BWON-F16C-NEXT: jmp .LBB10_3
396 ; BWON-F16C-NEXT: .LBB10_1:
397 ; BWON-F16C-NEXT: movq %rdi, %rax
398 ; BWON-F16C-NEXT: shrq %rax
399 ; BWON-F16C-NEXT: andl $1, %edi
400 ; BWON-F16C-NEXT: orq %rax, %rdi
401 ; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
402 ; BWON-F16C-NEXT: vaddss %xmm0, %xmm0, %xmm0
403 ; BWON-F16C-NEXT: .LBB10_3:
404 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
405 ; BWON-F16C-NEXT: vmovd %xmm0, %eax
406 ; BWON-F16C-NEXT: movw %ax, (%rsi)
407 ; BWON-F16C-NEXT: retq
409 ; CHECK-I686-LABEL: test_uitofp_i64:
410 ; CHECK-I686: # %bb.0:
411 ; CHECK-I686-NEXT: pushl %esi
412 ; CHECK-I686-NEXT: subl $24, %esp
413 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi
414 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
415 ; CHECK-I686-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
416 ; CHECK-I686-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
417 ; CHECK-I686-NEXT: shrl $31, %eax
418 ; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp)
419 ; CHECK-I686-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
420 ; CHECK-I686-NEXT: fstps (%esp)
421 ; CHECK-I686-NEXT: calll __truncsfhf2
422 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
423 ; CHECK-I686-NEXT: movw %ax, (%esi)
424 ; CHECK-I686-NEXT: addl $24, %esp
425 ; CHECK-I686-NEXT: popl %esi
426 ; CHECK-I686-NEXT: retl
427 %r = uitofp i64 %a to half
428 store half %r, ptr %p
432 define <4 x float> @test_extend32_vec4(ptr %p) #0 {
433 ; CHECK-LIBCALL-LABEL: test_extend32_vec4:
434 ; CHECK-LIBCALL: # %bb.0:
435 ; CHECK-LIBCALL-NEXT: subq $72, %rsp
436 ; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0
437 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
438 ; CHECK-LIBCALL-NEXT: pinsrw $0, 2(%rdi), %xmm0
439 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
440 ; CHECK-LIBCALL-NEXT: pinsrw $0, 4(%rdi), %xmm0
441 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
442 ; CHECK-LIBCALL-NEXT: pinsrw $0, 6(%rdi), %xmm0
443 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
444 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
445 ; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
446 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
447 ; CHECK-LIBCALL-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
448 ; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
449 ; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
450 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
451 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
452 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
453 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
454 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
455 ; CHECK-LIBCALL-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
456 ; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
457 ; CHECK-LIBCALL-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
458 ; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0]
459 ; CHECK-LIBCALL-NEXT: addq $72, %rsp
460 ; CHECK-LIBCALL-NEXT: retq
462 ; BWON-F16C-LABEL: test_extend32_vec4:
463 ; BWON-F16C: # %bb.0:
464 ; BWON-F16C-NEXT: vcvtph2ps (%rdi), %xmm0
465 ; BWON-F16C-NEXT: retq
467 ; CHECK-I686-LABEL: test_extend32_vec4:
468 ; CHECK-I686: # %bb.0:
469 ; CHECK-I686-NEXT: pushl %esi
470 ; CHECK-I686-NEXT: subl $88, %esp
471 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
472 ; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0
473 ; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
474 ; CHECK-I686-NEXT: pinsrw $0, 6(%eax), %xmm0
475 ; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
476 ; CHECK-I686-NEXT: pinsrw $0, 4(%eax), %xmm0
477 ; CHECK-I686-NEXT: pinsrw $0, 2(%eax), %xmm1
478 ; CHECK-I686-NEXT: pextrw $0, %xmm1, %eax
479 ; CHECK-I686-NEXT: movw %ax, (%esp)
480 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi
481 ; CHECK-I686-NEXT: calll __extendhfsf2
482 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
483 ; CHECK-I686-NEXT: movw %si, (%esp)
484 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
485 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi
486 ; CHECK-I686-NEXT: calll __extendhfsf2
487 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
488 ; CHECK-I686-NEXT: movw %si, (%esp)
489 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
490 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi
491 ; CHECK-I686-NEXT: calll __extendhfsf2
492 ; CHECK-I686-NEXT: movw %si, (%esp)
493 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
494 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
495 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
496 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
497 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
498 ; CHECK-I686-NEXT: calll __extendhfsf2
499 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
500 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
501 ; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
502 ; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
503 ; CHECK-I686-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
504 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
505 ; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
506 ; CHECK-I686-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
507 ; CHECK-I686-NEXT: addl $88, %esp
508 ; CHECK-I686-NEXT: popl %esi
509 ; CHECK-I686-NEXT: retl
510 %a = load <4 x half>, ptr %p, align 8
511 %b = fpext <4 x half> %a to <4 x float>
515 define <4 x double> @test_extend64_vec4(ptr %p) #0 {
516 ; CHECK-LIBCALL-LABEL: test_extend64_vec4:
517 ; CHECK-LIBCALL: # %bb.0:
518 ; CHECK-LIBCALL-NEXT: subq $72, %rsp
519 ; CHECK-LIBCALL-NEXT: pinsrw $0, 4(%rdi), %xmm0
520 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
521 ; CHECK-LIBCALL-NEXT: pinsrw $0, 6(%rdi), %xmm0
522 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
523 ; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0
524 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
525 ; CHECK-LIBCALL-NEXT: pinsrw $0, 2(%rdi), %xmm0
526 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
527 ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0
528 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
529 ; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
530 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
531 ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0
532 ; CHECK-LIBCALL-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
533 ; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0]
534 ; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
535 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
536 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
537 ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0
538 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
539 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
540 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
541 ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm1
542 ; CHECK-LIBCALL-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
543 ; CHECK-LIBCALL-NEXT: # xmm1 = xmm1[0],mem[0]
544 ; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
545 ; CHECK-LIBCALL-NEXT: addq $72, %rsp
546 ; CHECK-LIBCALL-NEXT: retq
548 ; BWON-F16C-LABEL: test_extend64_vec4:
549 ; BWON-F16C: # %bb.0:
550 ; BWON-F16C-NEXT: vcvtph2ps (%rdi), %xmm0
551 ; BWON-F16C-NEXT: vcvtps2pd %xmm0, %ymm0
552 ; BWON-F16C-NEXT: retq
554 ; CHECK-I686-LABEL: test_extend64_vec4:
555 ; CHECK-I686: # %bb.0:
556 ; CHECK-I686-NEXT: pushl %esi
557 ; CHECK-I686-NEXT: subl $104, %esp
558 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
559 ; CHECK-I686-NEXT: pinsrw $0, 6(%eax), %xmm0
560 ; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
561 ; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0
562 ; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
563 ; CHECK-I686-NEXT: pinsrw $0, 2(%eax), %xmm0
564 ; CHECK-I686-NEXT: pinsrw $0, 4(%eax), %xmm1
565 ; CHECK-I686-NEXT: pextrw $0, %xmm1, %eax
566 ; CHECK-I686-NEXT: movw %ax, (%esp)
567 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi
568 ; CHECK-I686-NEXT: calll __extendhfsf2
569 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
570 ; CHECK-I686-NEXT: movw %si, (%esp)
571 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
572 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi
573 ; CHECK-I686-NEXT: calll __extendhfsf2
574 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
575 ; CHECK-I686-NEXT: movw %si, (%esp)
576 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
577 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi
578 ; CHECK-I686-NEXT: calll __extendhfsf2
579 ; CHECK-I686-NEXT: movw %si, (%esp)
580 ; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp)
581 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
582 ; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp)
583 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
584 ; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp)
585 ; CHECK-I686-NEXT: calll __extendhfsf2
586 ; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp)
587 ; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
588 ; CHECK-I686-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
589 ; CHECK-I686-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
590 ; CHECK-I686-NEXT: movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
591 ; CHECK-I686-NEXT: addl $104, %esp
592 ; CHECK-I686-NEXT: popl %esi
593 ; CHECK-I686-NEXT: retl
594 %a = load <4 x half>, ptr %p, align 8
595 %b = fpext <4 x half> %a to <4 x double>
599 define void @test_trunc32_vec4(<4 x float> %a, ptr %p) #0 {
600 ; CHECK-LIBCALL-LABEL: test_trunc32_vec4:
601 ; CHECK-LIBCALL: # %bb.0:
602 ; CHECK-LIBCALL-NEXT: pushq %rbx
603 ; CHECK-LIBCALL-NEXT: subq $64, %rsp
604 ; CHECK-LIBCALL-NEXT: movq %rdi, %rbx
605 ; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
606 ; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
607 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
608 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
609 ; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
610 ; CHECK-LIBCALL-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
611 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
612 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
613 ; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
614 ; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
615 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
616 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
617 ; CHECK-LIBCALL-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
618 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
619 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
620 ; CHECK-LIBCALL-NEXT: movw %ax, (%rbx)
621 ; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
622 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
623 ; CHECK-LIBCALL-NEXT: movw %ax, 6(%rbx)
624 ; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
625 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
626 ; CHECK-LIBCALL-NEXT: movw %ax, 4(%rbx)
627 ; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
628 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
629 ; CHECK-LIBCALL-NEXT: movw %ax, 2(%rbx)
630 ; CHECK-LIBCALL-NEXT: addq $64, %rsp
631 ; CHECK-LIBCALL-NEXT: popq %rbx
632 ; CHECK-LIBCALL-NEXT: retq
634 ; BWON-F16C-LABEL: test_trunc32_vec4:
635 ; BWON-F16C: # %bb.0:
636 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, (%rdi)
637 ; BWON-F16C-NEXT: retq
639 ; CHECK-I686-LABEL: test_trunc32_vec4:
640 ; CHECK-I686: # %bb.0:
641 ; CHECK-I686-NEXT: pushl %esi
642 ; CHECK-I686-NEXT: subl $88, %esp
643 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
644 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi
645 ; CHECK-I686-NEXT: movaps %xmm0, %xmm1
646 ; CHECK-I686-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
647 ; CHECK-I686-NEXT: movss %xmm1, (%esp)
648 ; CHECK-I686-NEXT: calll __truncsfhf2
649 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
650 ; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
651 ; CHECK-I686-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
652 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
653 ; CHECK-I686-NEXT: calll __truncsfhf2
654 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
655 ; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
656 ; CHECK-I686-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
657 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
658 ; CHECK-I686-NEXT: calll __truncsfhf2
659 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
660 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
661 ; CHECK-I686-NEXT: movd %xmm0, (%esp)
662 ; CHECK-I686-NEXT: calll __truncsfhf2
663 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
664 ; CHECK-I686-NEXT: movw %ax, (%esi)
665 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
666 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
667 ; CHECK-I686-NEXT: movw %ax, 6(%esi)
668 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
669 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
670 ; CHECK-I686-NEXT: movw %ax, 4(%esi)
671 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
672 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
673 ; CHECK-I686-NEXT: movw %ax, 2(%esi)
674 ; CHECK-I686-NEXT: addl $88, %esp
675 ; CHECK-I686-NEXT: popl %esi
676 ; CHECK-I686-NEXT: retl
677 %v = fptrunc <4 x float> %a to <4 x half>
678 store <4 x half> %v, ptr %p
682 define void @test_trunc64_vec4(<4 x double> %a, ptr %p) #0 {
683 ; CHECK-LIBCALL-LABEL: test_trunc64_vec4:
684 ; CHECK-LIBCALL: # %bb.0:
685 ; CHECK-LIBCALL-NEXT: pushq %rbx
686 ; CHECK-LIBCALL-NEXT: subq $64, %rsp
687 ; CHECK-LIBCALL-NEXT: movq %rdi, %rbx
688 ; CHECK-LIBCALL-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
689 ; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
690 ; CHECK-LIBCALL-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
691 ; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT
692 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
693 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
694 ; CHECK-LIBCALL-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
695 ; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT
696 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
697 ; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
698 ; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT
699 ; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
700 ; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
701 ; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT
702 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
703 ; CHECK-LIBCALL-NEXT: movw %ax, 4(%rbx)
704 ; CHECK-LIBCALL-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
705 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
706 ; CHECK-LIBCALL-NEXT: movw %ax, (%rbx)
707 ; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
708 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
709 ; CHECK-LIBCALL-NEXT: movw %ax, 6(%rbx)
710 ; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
711 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
712 ; CHECK-LIBCALL-NEXT: movw %ax, 2(%rbx)
713 ; CHECK-LIBCALL-NEXT: addq $64, %rsp
714 ; CHECK-LIBCALL-NEXT: popq %rbx
715 ; CHECK-LIBCALL-NEXT: retq
717 ; BWON-F16C-LABEL: test_trunc64_vec4:
718 ; BWON-F16C: # %bb.0:
719 ; BWON-F16C-NEXT: pushq %rbx
720 ; BWON-F16C-NEXT: subq $64, %rsp
721 ; BWON-F16C-NEXT: movq %rdi, %rbx
722 ; BWON-F16C-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
723 ; BWON-F16C-NEXT: vextractf128 $1, %ymm0, %xmm0
724 ; BWON-F16C-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
725 ; BWON-F16C-NEXT: vzeroupper
726 ; BWON-F16C-NEXT: callq __truncdfhf2@PLT
727 ; BWON-F16C-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
728 ; BWON-F16C-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
729 ; BWON-F16C-NEXT: # xmm0 = mem[1,0]
730 ; BWON-F16C-NEXT: callq __truncdfhf2@PLT
731 ; BWON-F16C-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
732 ; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
733 ; BWON-F16C-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
734 ; BWON-F16C-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
735 ; BWON-F16C-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
736 ; BWON-F16C-NEXT: vzeroupper
737 ; BWON-F16C-NEXT: callq __truncdfhf2@PLT
738 ; BWON-F16C-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
739 ; BWON-F16C-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
740 ; BWON-F16C-NEXT: # xmm0 = mem[1,0]
741 ; BWON-F16C-NEXT: callq __truncdfhf2@PLT
742 ; BWON-F16C-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
743 ; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
744 ; BWON-F16C-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
745 ; BWON-F16C-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
746 ; BWON-F16C-NEXT: vmovq %xmm0, (%rbx)
747 ; BWON-F16C-NEXT: addq $64, %rsp
748 ; BWON-F16C-NEXT: popq %rbx
749 ; BWON-F16C-NEXT: retq
751 ; CHECK-I686-LABEL: test_trunc64_vec4:
752 ; CHECK-I686: # %bb.0:
753 ; CHECK-I686-NEXT: pushl %esi
754 ; CHECK-I686-NEXT: subl $88, %esp
755 ; CHECK-I686-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
756 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
757 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi
758 ; CHECK-I686-NEXT: movlps %xmm0, (%esp)
759 ; CHECK-I686-NEXT: calll __truncdfhf2
760 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
761 ; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
762 ; CHECK-I686-NEXT: movhps %xmm0, (%esp)
763 ; CHECK-I686-NEXT: calll __truncdfhf2
764 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
765 ; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
766 ; CHECK-I686-NEXT: movlps %xmm0, (%esp)
767 ; CHECK-I686-NEXT: calll __truncdfhf2
768 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
769 ; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
770 ; CHECK-I686-NEXT: movhps %xmm0, (%esp)
771 ; CHECK-I686-NEXT: calll __truncdfhf2
772 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
773 ; CHECK-I686-NEXT: movw %ax, 6(%esi)
774 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
775 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
776 ; CHECK-I686-NEXT: movw %ax, 4(%esi)
777 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
778 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
779 ; CHECK-I686-NEXT: movw %ax, 2(%esi)
780 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
781 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
782 ; CHECK-I686-NEXT: movw %ax, (%esi)
783 ; CHECK-I686-NEXT: addl $88, %esp
784 ; CHECK-I686-NEXT: popl %esi
785 ; CHECK-I686-NEXT: retl
786 %v = fptrunc <4 x double> %a to <4 x half>
787 store <4 x half> %v, ptr %p
791 declare float @test_floatret();
793 ; On i686, if SSE2 is available, the return value from test_floatret is loaded
794 ; to f80 and then rounded to f32. The DAG combiner should not combine this
795 ; fp_round and the subsequent fptrunc from float to half.
796 define half @test_f80trunc_nodagcombine() #0 {
797 ; CHECK-LIBCALL-LABEL: test_f80trunc_nodagcombine:
798 ; CHECK-LIBCALL: # %bb.0:
799 ; CHECK-LIBCALL-NEXT: pushq %rax
800 ; CHECK-LIBCALL-NEXT: callq test_floatret@PLT
801 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
802 ; CHECK-LIBCALL-NEXT: popq %rax
803 ; CHECK-LIBCALL-NEXT: retq
805 ; BWON-F16C-LABEL: test_f80trunc_nodagcombine:
806 ; BWON-F16C: # %bb.0:
807 ; BWON-F16C-NEXT: pushq %rax
808 ; BWON-F16C-NEXT: callq test_floatret@PLT
809 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
810 ; BWON-F16C-NEXT: vmovd %xmm0, %eax
811 ; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
812 ; BWON-F16C-NEXT: popq %rax
813 ; BWON-F16C-NEXT: retq
815 ; CHECK-I686-LABEL: test_f80trunc_nodagcombine:
816 ; CHECK-I686: # %bb.0:
817 ; CHECK-I686-NEXT: subl $12, %esp
818 ; CHECK-I686-NEXT: calll test_floatret@PLT
819 ; CHECK-I686-NEXT: fstps (%esp)
820 ; CHECK-I686-NEXT: calll __truncsfhf2
821 ; CHECK-I686-NEXT: addl $12, %esp
822 ; CHECK-I686-NEXT: retl
823 %1 = call float @test_floatret()
824 %2 = fptrunc float %1 to half
831 define float @test_sitofp_fadd_i32(i32 %a, ptr %b) #0 {
832 ; CHECK-LIBCALL-LABEL: test_sitofp_fadd_i32:
833 ; CHECK-LIBCALL: # %bb.0:
834 ; CHECK-LIBCALL-NEXT: subq $40, %rsp
835 ; CHECK-LIBCALL-NEXT: pinsrw $0, (%rsi), %xmm0
836 ; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
837 ; CHECK-LIBCALL-NEXT: xorps %xmm0, %xmm0
838 ; CHECK-LIBCALL-NEXT: cvtsi2ss %edi, %xmm0
839 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
840 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
841 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
842 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
843 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
844 ; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
845 ; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero
846 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
847 ; CHECK-LIBCALL-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
848 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
849 ; CHECK-LIBCALL-NEXT: addq $40, %rsp
850 ; CHECK-LIBCALL-NEXT: jmp __extendhfsf2@PLT # TAILCALL
852 ; BWON-F16C-LABEL: test_sitofp_fadd_i32:
853 ; BWON-F16C: # %bb.0:
854 ; BWON-F16C-NEXT: movzwl (%rsi), %eax
855 ; BWON-F16C-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
856 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
857 ; BWON-F16C-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
858 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
859 ; BWON-F16C-NEXT: vmovd %eax, %xmm1
860 ; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
861 ; BWON-F16C-NEXT: vaddss %xmm0, %xmm1, %xmm0
862 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
863 ; BWON-F16C-NEXT: vmovd %xmm0, %eax
864 ; BWON-F16C-NEXT: movzwl %ax, %eax
865 ; BWON-F16C-NEXT: vmovd %eax, %xmm0
866 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
867 ; BWON-F16C-NEXT: retq
869 ; CHECK-I686-LABEL: test_sitofp_fadd_i32:
870 ; CHECK-I686: # %bb.0:
871 ; CHECK-I686-NEXT: subl $60, %esp
872 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
873 ; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0
874 ; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
875 ; CHECK-I686-NEXT: xorps %xmm0, %xmm0
876 ; CHECK-I686-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
877 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
878 ; CHECK-I686-NEXT: calll __truncsfhf2
879 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
880 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
881 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
882 ; CHECK-I686-NEXT: movw %ax, (%esp)
883 ; CHECK-I686-NEXT: calll __extendhfsf2
884 ; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
885 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
886 ; CHECK-I686-NEXT: movw %ax, (%esp)
887 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
888 ; CHECK-I686-NEXT: calll __extendhfsf2
889 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
890 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
891 ; CHECK-I686-NEXT: addss {{[0-9]+}}(%esp), %xmm0
892 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
893 ; CHECK-I686-NEXT: calll __truncsfhf2
894 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
895 ; CHECK-I686-NEXT: movw %ax, (%esp)
896 ; CHECK-I686-NEXT: calll __extendhfsf2
897 ; CHECK-I686-NEXT: addl $60, %esp
898 ; CHECK-I686-NEXT: retl
899 %tmp0 = load half, ptr %b
900 %tmp1 = sitofp i32 %a to half
901 %tmp2 = fadd half %tmp0, %tmp1
902 %tmp3 = fpext half %tmp2 to float
906 define half @PR40273(half) #0 {
907 ; CHECK-LIBCALL-LABEL: PR40273:
908 ; CHECK-LIBCALL: # %bb.0:
909 ; CHECK-LIBCALL-NEXT: pushq %rax
910 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
911 ; CHECK-LIBCALL-NEXT: xorl %eax, %eax
912 ; CHECK-LIBCALL-NEXT: xorps %xmm1, %xmm1
913 ; CHECK-LIBCALL-NEXT: ucomiss %xmm1, %xmm0
914 ; CHECK-LIBCALL-NEXT: movl $15360, %ecx # imm = 0x3C00
915 ; CHECK-LIBCALL-NEXT: cmovnel %ecx, %eax
916 ; CHECK-LIBCALL-NEXT: cmovpl %ecx, %eax
917 ; CHECK-LIBCALL-NEXT: pinsrw $0, %eax, %xmm0
918 ; CHECK-LIBCALL-NEXT: popq %rax
919 ; CHECK-LIBCALL-NEXT: retq
921 ; BWON-F16C-LABEL: PR40273:
922 ; BWON-F16C: # %bb.0:
923 ; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax
924 ; BWON-F16C-NEXT: movzwl %ax, %eax
925 ; BWON-F16C-NEXT: vmovd %eax, %xmm0
926 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
927 ; BWON-F16C-NEXT: xorl %eax, %eax
928 ; BWON-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
929 ; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0
930 ; BWON-F16C-NEXT: movl $15360, %ecx # imm = 0x3C00
931 ; BWON-F16C-NEXT: cmovnel %ecx, %eax
932 ; BWON-F16C-NEXT: cmovpl %ecx, %eax
933 ; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
934 ; BWON-F16C-NEXT: retq
936 ; CHECK-I686-LABEL: PR40273:
937 ; CHECK-I686: # %bb.0:
938 ; CHECK-I686-NEXT: subl $12, %esp
939 ; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
940 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
941 ; CHECK-I686-NEXT: movw %ax, (%esp)
942 ; CHECK-I686-NEXT: calll __extendhfsf2
943 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
944 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
945 ; CHECK-I686-NEXT: xorl %eax, %eax
946 ; CHECK-I686-NEXT: xorps %xmm1, %xmm1
947 ; CHECK-I686-NEXT: ucomiss %xmm1, %xmm0
948 ; CHECK-I686-NEXT: movl $15360, %ecx # imm = 0x3C00
949 ; CHECK-I686-NEXT: cmovnel %ecx, %eax
950 ; CHECK-I686-NEXT: cmovpl %ecx, %eax
951 ; CHECK-I686-NEXT: pinsrw $0, %eax, %xmm0
952 ; CHECK-I686-NEXT: addl $12, %esp
953 ; CHECK-I686-NEXT: retl
954 %2 = fcmp une half %0, 0xH0000
955 %3 = uitofp i1 %2 to half
959 define dso_local void @brcond(half %0) {
960 ; CHECK-LIBCALL-LABEL: brcond:
961 ; CHECK-LIBCALL: # %bb.0: # %entry
962 ; CHECK-LIBCALL-NEXT: pushq %rax
963 ; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 16
964 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
965 ; CHECK-LIBCALL-NEXT: xorps %xmm1, %xmm1
966 ; CHECK-LIBCALL-NEXT: ucomiss %xmm1, %xmm0
967 ; CHECK-LIBCALL-NEXT: setp %al
968 ; CHECK-LIBCALL-NEXT: setne %cl
969 ; CHECK-LIBCALL-NEXT: orb %al, %cl
970 ; CHECK-LIBCALL-NEXT: jne .LBB18_2
971 ; CHECK-LIBCALL-NEXT: # %bb.1: # %if.then
972 ; CHECK-LIBCALL-NEXT: popq %rax
973 ; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 8
974 ; CHECK-LIBCALL-NEXT: retq
975 ; CHECK-LIBCALL-NEXT: .LBB18_2: # %if.end
977 ; BWON-F16C-LABEL: brcond:
978 ; BWON-F16C: # %bb.0: # %entry
979 ; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax
980 ; BWON-F16C-NEXT: movzwl %ax, %eax
981 ; BWON-F16C-NEXT: vmovd %eax, %xmm0
982 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
983 ; BWON-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
984 ; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0
985 ; BWON-F16C-NEXT: setp %al
986 ; BWON-F16C-NEXT: setne %cl
987 ; BWON-F16C-NEXT: orb %al, %cl
988 ; BWON-F16C-NEXT: jne .LBB18_2
989 ; BWON-F16C-NEXT: # %bb.1: # %if.then
990 ; BWON-F16C-NEXT: retq
991 ; BWON-F16C-NEXT: .LBB18_2: # %if.end
993 ; CHECK-I686-LABEL: brcond:
994 ; CHECK-I686: # %bb.0: # %entry
995 ; CHECK-I686-NEXT: subl $12, %esp
996 ; CHECK-I686-NEXT: .cfi_def_cfa_offset 16
997 ; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
998 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
999 ; CHECK-I686-NEXT: movw %ax, (%esp)
1000 ; CHECK-I686-NEXT: calll __extendhfsf2
1001 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1002 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1003 ; CHECK-I686-NEXT: xorps %xmm1, %xmm1
1004 ; CHECK-I686-NEXT: ucomiss %xmm1, %xmm0
1005 ; CHECK-I686-NEXT: setp %al
1006 ; CHECK-I686-NEXT: setne %cl
1007 ; CHECK-I686-NEXT: orb %al, %cl
1008 ; CHECK-I686-NEXT: jne .LBB18_2
1009 ; CHECK-I686-NEXT: # %bb.1: # %if.then
1010 ; CHECK-I686-NEXT: addl $12, %esp
1011 ; CHECK-I686-NEXT: .cfi_def_cfa_offset 4
1012 ; CHECK-I686-NEXT: retl
1013 ; CHECK-I686-NEXT: .LBB18_2: # %if.end
1015 %cmp = fcmp oeq half 0xH0000, %0
1016 br i1 %cmp, label %if.then, label %if.end
1018 if.then: ; preds = %entry
1021 if.end: ; preds = %entry
1025 define half @test_sqrt(half %0) {
1026 ; CHECK-LIBCALL-LABEL: test_sqrt:
1027 ; CHECK-LIBCALL: # %bb.0: # %entry
1028 ; CHECK-LIBCALL-NEXT: pushq %rax
1029 ; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 16
1030 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1031 ; CHECK-LIBCALL-NEXT: sqrtss %xmm0, %xmm0
1032 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
1033 ; CHECK-LIBCALL-NEXT: popq %rax
1034 ; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 8
1035 ; CHECK-LIBCALL-NEXT: retq
1037 ; BWON-F16C-LABEL: test_sqrt:
1038 ; BWON-F16C: # %bb.0: # %entry
1039 ; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax
1040 ; BWON-F16C-NEXT: movzwl %ax, %eax
1041 ; BWON-F16C-NEXT: vmovd %eax, %xmm0
1042 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
1043 ; BWON-F16C-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
1044 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1045 ; BWON-F16C-NEXT: vmovd %xmm0, %eax
1046 ; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
1047 ; BWON-F16C-NEXT: retq
1049 ; CHECK-I686-LABEL: test_sqrt:
1050 ; CHECK-I686: # %bb.0: # %entry
1051 ; CHECK-I686-NEXT: subl $12, %esp
1052 ; CHECK-I686-NEXT: .cfi_def_cfa_offset 16
1053 ; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
1054 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
1055 ; CHECK-I686-NEXT: movw %ax, (%esp)
1056 ; CHECK-I686-NEXT: calll __extendhfsf2
1057 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1058 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1059 ; CHECK-I686-NEXT: sqrtss %xmm0, %xmm0
1060 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
1061 ; CHECK-I686-NEXT: calll __truncsfhf2
1062 ; CHECK-I686-NEXT: addl $12, %esp
1063 ; CHECK-I686-NEXT: .cfi_def_cfa_offset 4
1064 ; CHECK-I686-NEXT: retl
1066 %1 = call half @llvm.sqrt.f16(half %0)
1070 declare half @llvm.sqrt.f16(half)
1072 define void @main.158() local_unnamed_addr #0 {
1073 ; CHECK-LIBCALL-LABEL: main.158:
1074 ; CHECK-LIBCALL: # %bb.0: # %entry
1075 ; CHECK-LIBCALL-NEXT: pushq %rax
1076 ; CHECK-LIBCALL-NEXT: xorps %xmm0, %xmm0
1077 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
1078 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1079 ; CHECK-LIBCALL-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1080 ; CHECK-LIBCALL-NEXT: ucomiss %xmm0, %xmm1
1081 ; CHECK-LIBCALL-NEXT: xorps %xmm0, %xmm0
1082 ; CHECK-LIBCALL-NEXT: jae .LBB20_2
1083 ; CHECK-LIBCALL-NEXT: # %bb.1: # %entry
1084 ; CHECK-LIBCALL-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1085 ; CHECK-LIBCALL-NEXT: .LBB20_2: # %entry
1086 ; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
1087 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
1088 ; CHECK-LIBCALL-NEXT: movw %ax, (%rax)
1089 ; CHECK-LIBCALL-NEXT: popq %rax
1090 ; CHECK-LIBCALL-NEXT: retq
1092 ; BWON-F16C-LABEL: main.158:
1093 ; BWON-F16C: # %bb.0: # %entry
1094 ; BWON-F16C-NEXT: vxorps %xmm0, %xmm0, %xmm0
1095 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1096 ; BWON-F16C-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1097 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
1098 ; BWON-F16C-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1099 ; BWON-F16C-NEXT: vucomiss %xmm0, %xmm1
1100 ; BWON-F16C-NEXT: vxorps %xmm0, %xmm0, %xmm0
1101 ; BWON-F16C-NEXT: jae .LBB20_2
1102 ; BWON-F16C-NEXT: # %bb.1: # %entry
1103 ; BWON-F16C-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1104 ; BWON-F16C-NEXT: .LBB20_2: # %entry
1105 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1106 ; BWON-F16C-NEXT: vmovd %xmm0, %eax
1107 ; BWON-F16C-NEXT: movw %ax, (%rax)
1108 ; BWON-F16C-NEXT: retq
1110 ; CHECK-I686-LABEL: main.158:
1111 ; CHECK-I686: # %bb.0: # %entry
1112 ; CHECK-I686-NEXT: subl $12, %esp
1113 ; CHECK-I686-NEXT: pxor %xmm0, %xmm0
1114 ; CHECK-I686-NEXT: movd %xmm0, (%esp)
1115 ; CHECK-I686-NEXT: calll __truncsfhf2
1116 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
1117 ; CHECK-I686-NEXT: movw %ax, (%esp)
1118 ; CHECK-I686-NEXT: calll __extendhfsf2
1119 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1120 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1121 ; CHECK-I686-NEXT: ucomiss {{[0-9]+}}(%esp), %xmm0
1122 ; CHECK-I686-NEXT: xorps %xmm0, %xmm0
1123 ; CHECK-I686-NEXT: jae .LBB20_2
1124 ; CHECK-I686-NEXT: # %bb.1: # %entry
1125 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1126 ; CHECK-I686-NEXT: .LBB20_2: # %entry
1127 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
1128 ; CHECK-I686-NEXT: calll __truncsfhf2
1129 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
1130 ; CHECK-I686-NEXT: movw %ax, (%eax)
1131 ; CHECK-I686-NEXT: addl $12, %esp
1132 ; CHECK-I686-NEXT: retl
1134 %0 = tail call half @llvm.fabs.f16(half undef)
1135 %1 = fpext half %0 to float
1136 %compare.2 = fcmp ole half %0, 0xH4800
1137 %multiply.95 = fmul float %1, 5.000000e-01
1138 %add.82 = fadd float %multiply.95, -2.000000e+00
1139 %multiply.68 = fmul float %add.82, 0.000000e+00
1140 %subtract.65 = fsub float %multiply.68, 0.000000e+00
1141 %multiply.57 = fmul float undef, 0.000000e+00
1142 %2 = select i1 %compare.2, float 0.000000e+00, float %multiply.57
1143 %3 = fptrunc float %2 to half
1144 store half %3, ptr undef, align 2
1148 define void @main.45() local_unnamed_addr {
1149 ; CHECK-LIBCALL-LABEL: main.45:
1150 ; CHECK-LIBCALL: # %bb.0: # %entry
1151 ; CHECK-LIBCALL-NEXT: pushq %rbp
1152 ; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 16
1153 ; CHECK-LIBCALL-NEXT: pushq %r15
1154 ; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 24
1155 ; CHECK-LIBCALL-NEXT: pushq %r14
1156 ; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 32
1157 ; CHECK-LIBCALL-NEXT: pushq %rbx
1158 ; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 40
1159 ; CHECK-LIBCALL-NEXT: pushq %rax
1160 ; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 48
1161 ; CHECK-LIBCALL-NEXT: .cfi_offset %rbx, -40
1162 ; CHECK-LIBCALL-NEXT: .cfi_offset %r14, -32
1163 ; CHECK-LIBCALL-NEXT: .cfi_offset %r15, -24
1164 ; CHECK-LIBCALL-NEXT: .cfi_offset %rbp, -16
1165 ; CHECK-LIBCALL-NEXT: pinsrw $0, (%rax), %xmm0
1166 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
1167 ; CHECK-LIBCALL-NEXT: movd %eax, %xmm1
1168 ; CHECK-LIBCALL-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1169 ; CHECK-LIBCALL-NEXT: movq %xmm1, %rbx
1170 ; CHECK-LIBCALL-NEXT: movq %rbx, %r14
1171 ; CHECK-LIBCALL-NEXT: shrq $48, %r14
1172 ; CHECK-LIBCALL-NEXT: movq %rbx, %r15
1173 ; CHECK-LIBCALL-NEXT: shrq $32, %r15
1174 ; CHECK-LIBCALL-NEXT: movl %ebx, %ebp
1175 ; CHECK-LIBCALL-NEXT: shrl $16, %ebp
1176 ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
1177 ; CHECK-LIBCALL-NEXT: ucomiss %xmm0, %xmm0
1178 ; CHECK-LIBCALL-NEXT: movl $32256, %eax # imm = 0x7E00
1179 ; CHECK-LIBCALL-NEXT: cmovpl %eax, %ebp
1180 ; CHECK-LIBCALL-NEXT: cmovpl %eax, %r15d
1181 ; CHECK-LIBCALL-NEXT: cmovpl %eax, %r14d
1182 ; CHECK-LIBCALL-NEXT: cmovpl %eax, %ebx
1183 ; CHECK-LIBCALL-NEXT: movw %bx, (%rax)
1184 ; CHECK-LIBCALL-NEXT: movw %r14w, (%rax)
1185 ; CHECK-LIBCALL-NEXT: movw %r15w, (%rax)
1186 ; CHECK-LIBCALL-NEXT: movw %bp, (%rax)
1187 ; CHECK-LIBCALL-NEXT: addq $8, %rsp
1188 ; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 40
1189 ; CHECK-LIBCALL-NEXT: popq %rbx
1190 ; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 32
1191 ; CHECK-LIBCALL-NEXT: popq %r14
1192 ; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 24
1193 ; CHECK-LIBCALL-NEXT: popq %r15
1194 ; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 16
1195 ; CHECK-LIBCALL-NEXT: popq %rbp
1196 ; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 8
1197 ; CHECK-LIBCALL-NEXT: retq
1199 ; BWON-F16C-LABEL: main.45:
1200 ; BWON-F16C: # %bb.0: # %entry
1201 ; BWON-F16C-NEXT: movzwl (%rax), %eax
1202 ; BWON-F16C-NEXT: vmovd %eax, %xmm0
1203 ; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
1204 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
1205 ; BWON-F16C-NEXT: xorl %eax, %eax
1206 ; BWON-F16C-NEXT: vucomiss %xmm0, %xmm0
1207 ; BWON-F16C-NEXT: movl $65535, %ecx # imm = 0xFFFF
1208 ; BWON-F16C-NEXT: cmovnpl %eax, %ecx
1209 ; BWON-F16C-NEXT: vmovd %ecx, %xmm0
1210 ; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1211 ; BWON-F16C-NEXT: vpblendvb %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
1212 ; BWON-F16C-NEXT: vmovq %xmm0, (%rax)
1213 ; BWON-F16C-NEXT: retq
1215 ; CHECK-I686-LABEL: main.45:
1216 ; CHECK-I686: # %bb.0: # %entry
1217 ; CHECK-I686-NEXT: pushl %edi
1218 ; CHECK-I686-NEXT: .cfi_def_cfa_offset 8
1219 ; CHECK-I686-NEXT: pushl %esi
1220 ; CHECK-I686-NEXT: .cfi_def_cfa_offset 12
1221 ; CHECK-I686-NEXT: subl $20, %esp
1222 ; CHECK-I686-NEXT: .cfi_def_cfa_offset 32
1223 ; CHECK-I686-NEXT: .cfi_offset %esi, -12
1224 ; CHECK-I686-NEXT: .cfi_offset %edi, -8
1225 ; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0
1226 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
1227 ; CHECK-I686-NEXT: movd %eax, %xmm0
1228 ; CHECK-I686-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1229 ; CHECK-I686-NEXT: movd %xmm0, %esi
1230 ; CHECK-I686-NEXT: movl %esi, %edi
1231 ; CHECK-I686-NEXT: shrl $16, %edi
1232 ; CHECK-I686-NEXT: movw %ax, (%esp)
1233 ; CHECK-I686-NEXT: calll __extendhfsf2
1234 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
1235 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1236 ; CHECK-I686-NEXT: ucomiss %xmm0, %xmm0
1237 ; CHECK-I686-NEXT: movl $32256, %eax # imm = 0x7E00
1238 ; CHECK-I686-NEXT: cmovpl %eax, %esi
1239 ; CHECK-I686-NEXT: cmovpl %eax, %edi
1240 ; CHECK-I686-NEXT: movw %di, (%eax)
1241 ; CHECK-I686-NEXT: movw %si, (%eax)
1242 ; CHECK-I686-NEXT: addl $20, %esp
1243 ; CHECK-I686-NEXT: .cfi_def_cfa_offset 12
1244 ; CHECK-I686-NEXT: popl %esi
1245 ; CHECK-I686-NEXT: .cfi_def_cfa_offset 8
1246 ; CHECK-I686-NEXT: popl %edi
1247 ; CHECK-I686-NEXT: .cfi_def_cfa_offset 4
1248 ; CHECK-I686-NEXT: retl
1250 %0 = load half, ptr undef, align 8
1251 %1 = bitcast half %0 to i16
1252 %broadcast.splatinsert = insertelement <4 x half> poison, half %0, i64 0
1253 %broadcast.splat = shufflevector <4 x half> %broadcast.splatinsert, <4 x half> poison, <4 x i32> zeroinitializer
1254 %broadcast.splatinsert13 = insertelement <4 x i16> poison, i16 %1, i64 0
1255 %broadcast.splat14 = shufflevector <4 x i16> %broadcast.splatinsert13, <4 x i16> poison, <4 x i32> zeroinitializer
1256 %2 = fcmp uno <4 x half> %broadcast.splat, zeroinitializer
1257 %3 = add <4 x i16> zeroinitializer, %broadcast.splat14
1258 %4 = select i1 undef, <4 x i16> undef, <4 x i16> %3
1259 %5 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> %4
1260 %6 = bitcast <4 x i16> %5 to <4 x half>
1261 %7 = select <4 x i1> %2, <4 x half> <half 0xH7E00, half 0xH7E00, half 0xH7E00, half 0xH7E00>, <4 x half> %6
1262 store <4 x half> %7, ptr undef, align 16
1266 define half @fcopysign(half %x, half %y) {
1267 ; CHECK-LIBCALL-LABEL: fcopysign:
1268 ; CHECK-LIBCALL: # %bb.0:
1269 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm1, %eax
1270 ; CHECK-LIBCALL-NEXT: andl $-32768, %eax # imm = 0x8000
1271 ; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %ecx
1272 ; CHECK-LIBCALL-NEXT: andl $32767, %ecx # imm = 0x7FFF
1273 ; CHECK-LIBCALL-NEXT: orl %eax, %ecx
1274 ; CHECK-LIBCALL-NEXT: pinsrw $0, %ecx, %xmm0
1275 ; CHECK-LIBCALL-NEXT: retq
1277 ; BWON-F16C-LABEL: fcopysign:
1278 ; BWON-F16C: # %bb.0:
1279 ; BWON-F16C-NEXT: vpextrw $0, %xmm1, %eax
1280 ; BWON-F16C-NEXT: andl $-32768, %eax # imm = 0x8000
1281 ; BWON-F16C-NEXT: vpextrw $0, %xmm0, %ecx
1282 ; BWON-F16C-NEXT: andl $32767, %ecx # imm = 0x7FFF
1283 ; BWON-F16C-NEXT: orl %eax, %ecx
1284 ; BWON-F16C-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0
1285 ; BWON-F16C-NEXT: retq
1287 ; CHECK-I686-LABEL: fcopysign:
1288 ; CHECK-I686: # %bb.0:
1289 ; CHECK-I686-NEXT: movl $-32768, %eax # imm = 0x8000
1290 ; CHECK-I686-NEXT: andl {{[0-9]+}}(%esp), %eax
1291 ; CHECK-I686-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
1292 ; CHECK-I686-NEXT: andl $32767, %ecx # imm = 0x7FFF
1293 ; CHECK-I686-NEXT: orl %eax, %ecx
1294 ; CHECK-I686-NEXT: pinsrw $0, %ecx, %xmm0
1295 ; CHECK-I686-NEXT: retl
1296 %a = call half @llvm.copysign.f16(half %x, half %y)
1300 declare half @llvm.fabs.f16(half)
1301 declare half @llvm.copysign.f16(half, half)
1303 define <8 x half> @select(i1 %c, <8 x half> %x, <8 x half> %y) {
1304 ; CHECK-LIBCALL-LABEL: select:
1305 ; CHECK-LIBCALL: # %bb.0:
1306 ; CHECK-LIBCALL-NEXT: testb $1, %dil
1307 ; CHECK-LIBCALL-NEXT: jne .LBB23_2
1308 ; CHECK-LIBCALL-NEXT: # %bb.1:
1309 ; CHECK-LIBCALL-NEXT: movaps %xmm1, %xmm0
1310 ; CHECK-LIBCALL-NEXT: .LBB23_2:
1311 ; CHECK-LIBCALL-NEXT: retq
1313 ; BWON-F16C-LABEL: select:
1314 ; BWON-F16C: # %bb.0:
1315 ; BWON-F16C-NEXT: testb $1, %dil
1316 ; BWON-F16C-NEXT: jne .LBB23_2
1317 ; BWON-F16C-NEXT: # %bb.1:
1318 ; BWON-F16C-NEXT: vmovaps %xmm1, %xmm0
1319 ; BWON-F16C-NEXT: .LBB23_2:
1320 ; BWON-F16C-NEXT: retq
1322 ; CHECK-I686-LABEL: select:
1323 ; CHECK-I686: # %bb.0:
1324 ; CHECK-I686-NEXT: testb $1, {{[0-9]+}}(%esp)
1325 ; CHECK-I686-NEXT: jne .LBB23_2
1326 ; CHECK-I686-NEXT: # %bb.1:
1327 ; CHECK-I686-NEXT: movaps %xmm1, %xmm0
1328 ; CHECK-I686-NEXT: .LBB23_2:
1329 ; CHECK-I686-NEXT: retl
1330 %s = select i1 %c, <8 x half> %x, <8 x half> %y
1334 define <8 x half> @shuffle(ptr %p) {
1335 ; CHECK-LIBCALL-LABEL: shuffle:
1336 ; CHECK-LIBCALL: # %bb.0:
1337 ; CHECK-LIBCALL-NEXT: movdqu (%rdi), %xmm0
1338 ; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
1339 ; CHECK-LIBCALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
1340 ; CHECK-LIBCALL-NEXT: retq
1342 ; BWON-F16C-LABEL: shuffle:
1343 ; BWON-F16C: # %bb.0:
1344 ; BWON-F16C-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,4,4,4,4]
1345 ; BWON-F16C-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
1346 ; BWON-F16C-NEXT: retq
1348 ; CHECK-I686-LABEL: shuffle:
1349 ; CHECK-I686: # %bb.0:
1350 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
1351 ; CHECK-I686-NEXT: movdqu (%eax), %xmm0
1352 ; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
1353 ; CHECK-I686-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
1354 ; CHECK-I686-NEXT: retl
1355 %1 = load <8 x half>, ptr %p, align 8
1356 %2 = shufflevector <8 x half> %1, <8 x half> poison, <8 x i32> <i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1360 attributes #0 = { nounwind }