1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=1 \
3 ; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWON,BWON-NOF16C
4 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=0 \
5 ; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWOFF
6 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+f16c -fixup-byte-word-insts=1 \
7 ; RUN: | FileCheck %s -check-prefixes=CHECK,BWON,BWON-F16C
8 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -fixup-byte-word-insts=0 \
9 ; RUN: | FileCheck %s -check-prefixes=CHECK-I686
11 define void @test_load_store(half* %in, half* %out) #0 {
12 ; BWON-LABEL: test_load_store:
14 ; BWON-NEXT: movzwl (%rdi), %eax
15 ; BWON-NEXT: movw %ax, (%rsi)
18 ; BWOFF-LABEL: test_load_store:
20 ; BWOFF-NEXT: movw (%rdi), %ax
21 ; BWOFF-NEXT: movw %ax, (%rsi)
24 ; CHECK-I686-LABEL: test_load_store:
25 ; CHECK-I686: # %bb.0:
26 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
27 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx
28 ; CHECK-I686-NEXT: movw (%ecx), %cx
29 ; CHECK-I686-NEXT: movw %cx, (%eax)
30 ; CHECK-I686-NEXT: retl
31 %val = load half, half* %in
32 store half %val, half* %out
36 define i16 @test_bitcast_from_half(half* %addr) #0 {
37 ; BWON-LABEL: test_bitcast_from_half:
39 ; BWON-NEXT: movzwl (%rdi), %eax
42 ; BWOFF-LABEL: test_bitcast_from_half:
44 ; BWOFF-NEXT: movw (%rdi), %ax
47 ; CHECK-I686-LABEL: test_bitcast_from_half:
48 ; CHECK-I686: # %bb.0:
49 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
50 ; CHECK-I686-NEXT: movw (%eax), %ax
51 ; CHECK-I686-NEXT: retl
52 %val = load half, half* %addr
53 %val_int = bitcast half %val to i16
57 define void @test_bitcast_to_half(half* %addr, i16 %in) #0 {
58 ; CHECK-LABEL: test_bitcast_to_half:
60 ; CHECK-NEXT: movw %si, (%rdi)
63 ; CHECK-I686-LABEL: test_bitcast_to_half:
64 ; CHECK-I686: # %bb.0:
65 ; CHECK-I686-NEXT: movw {{[0-9]+}}(%esp), %ax
66 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx
67 ; CHECK-I686-NEXT: movw %ax, (%ecx)
68 ; CHECK-I686-NEXT: retl
69 %val_fp = bitcast i16 %in to half
70 store half %val_fp, half* %addr
74 define float @test_extend32(half* %addr) #0 {
75 ; CHECK-LIBCALL-LABEL: test_extend32:
76 ; CHECK-LIBCALL: # %bb.0:
77 ; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi
78 ; CHECK-LIBCALL-NEXT: jmp __gnu_h2f_ieee # TAILCALL
80 ; BWON-F16C-LABEL: test_extend32:
82 ; BWON-F16C-NEXT: movswl (%rdi), %eax
83 ; BWON-F16C-NEXT: vmovd %eax, %xmm0
84 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
85 ; BWON-F16C-NEXT: retq
87 ; CHECK-I686-LABEL: test_extend32:
88 ; CHECK-I686: # %bb.0:
89 ; CHECK-I686-NEXT: subl $12, %esp
90 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
91 ; CHECK-I686-NEXT: movzwl (%eax), %eax
92 ; CHECK-I686-NEXT: movl %eax, (%esp)
93 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee
94 ; CHECK-I686-NEXT: addl $12, %esp
95 ; CHECK-I686-NEXT: retl
96 %val16 = load half, half* %addr
97 %val32 = fpext half %val16 to float
101 define double @test_extend64(half* %addr) #0 {
102 ; CHECK-LIBCALL-LABEL: test_extend64:
103 ; CHECK-LIBCALL: # %bb.0:
104 ; CHECK-LIBCALL-NEXT: pushq %rax
105 ; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi
106 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
107 ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0
108 ; CHECK-LIBCALL-NEXT: popq %rax
109 ; CHECK-LIBCALL-NEXT: retq
111 ; BWON-F16C-LABEL: test_extend64:
112 ; BWON-F16C: # %bb.0:
113 ; BWON-F16C-NEXT: movswl (%rdi), %eax
114 ; BWON-F16C-NEXT: vmovd %eax, %xmm0
115 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
116 ; BWON-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
117 ; BWON-F16C-NEXT: retq
119 ; CHECK-I686-LABEL: test_extend64:
120 ; CHECK-I686: # %bb.0:
121 ; CHECK-I686-NEXT: subl $12, %esp
122 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
123 ; CHECK-I686-NEXT: movzwl (%eax), %eax
124 ; CHECK-I686-NEXT: movl %eax, (%esp)
125 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee
126 ; CHECK-I686-NEXT: addl $12, %esp
127 ; CHECK-I686-NEXT: retl
128 %val16 = load half, half* %addr
129 %val32 = fpext half %val16 to double
133 define void @test_trunc32(float %in, half* %addr) #0 {
134 ; CHECK-LIBCALL-LABEL: test_trunc32:
135 ; CHECK-LIBCALL: # %bb.0:
136 ; CHECK-LIBCALL-NEXT: pushq %rbx
137 ; CHECK-LIBCALL-NEXT: movq %rdi, %rbx
138 ; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee
139 ; CHECK-LIBCALL-NEXT: movw %ax, (%rbx)
140 ; CHECK-LIBCALL-NEXT: popq %rbx
141 ; CHECK-LIBCALL-NEXT: retq
143 ; BWON-F16C-LABEL: test_trunc32:
144 ; BWON-F16C: # %bb.0:
145 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
146 ; BWON-F16C-NEXT: vmovd %xmm0, %eax
147 ; BWON-F16C-NEXT: movw %ax, (%rdi)
148 ; BWON-F16C-NEXT: retq
150 ; CHECK-I686-LABEL: test_trunc32:
151 ; CHECK-I686: # %bb.0:
152 ; CHECK-I686-NEXT: pushl %esi
153 ; CHECK-I686-NEXT: subl $8, %esp
154 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi
155 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
156 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
157 ; CHECK-I686-NEXT: calll __gnu_f2h_ieee
158 ; CHECK-I686-NEXT: movw %ax, (%esi)
159 ; CHECK-I686-NEXT: addl $8, %esp
160 ; CHECK-I686-NEXT: popl %esi
161 ; CHECK-I686-NEXT: retl
162 %val16 = fptrunc float %in to half
163 store half %val16, half* %addr
167 define void @test_trunc64(double %in, half* %addr) #0 {
168 ; CHECK-LABEL: test_trunc64:
170 ; CHECK-NEXT: pushq %rbx
171 ; CHECK-NEXT: movq %rdi, %rbx
172 ; CHECK-NEXT: callq __truncdfhf2
173 ; CHECK-NEXT: movw %ax, (%rbx)
174 ; CHECK-NEXT: popq %rbx
177 ; CHECK-I686-LABEL: test_trunc64:
178 ; CHECK-I686: # %bb.0:
179 ; CHECK-I686-NEXT: pushl %esi
180 ; CHECK-I686-NEXT: subl $8, %esp
181 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi
182 ; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
183 ; CHECK-I686-NEXT: movsd %xmm0, (%esp)
184 ; CHECK-I686-NEXT: calll __truncdfhf2
185 ; CHECK-I686-NEXT: movw %ax, (%esi)
186 ; CHECK-I686-NEXT: addl $8, %esp
187 ; CHECK-I686-NEXT: popl %esi
188 ; CHECK-I686-NEXT: retl
189 %val16 = fptrunc double %in to half
190 store half %val16, half* %addr
194 define i64 @test_fptosi_i64(half* %p) #0 {
195 ; CHECK-LIBCALL-LABEL: test_fptosi_i64:
196 ; CHECK-LIBCALL: # %bb.0:
197 ; CHECK-LIBCALL-NEXT: pushq %rax
198 ; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi
199 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
200 ; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax
201 ; CHECK-LIBCALL-NEXT: popq %rcx
202 ; CHECK-LIBCALL-NEXT: retq
204 ; BWON-F16C-LABEL: test_fptosi_i64:
205 ; BWON-F16C: # %bb.0:
206 ; BWON-F16C-NEXT: movswl (%rdi), %eax
207 ; BWON-F16C-NEXT: vmovd %eax, %xmm0
208 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
209 ; BWON-F16C-NEXT: vcvttss2si %xmm0, %rax
210 ; BWON-F16C-NEXT: retq
212 ; CHECK-I686-LABEL: test_fptosi_i64:
213 ; CHECK-I686: # %bb.0:
214 ; CHECK-I686-NEXT: subl $12, %esp
215 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
216 ; CHECK-I686-NEXT: movzwl (%eax), %eax
217 ; CHECK-I686-NEXT: movl %eax, (%esp)
218 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee
219 ; CHECK-I686-NEXT: fstps (%esp)
220 ; CHECK-I686-NEXT: calll __fixsfdi
221 ; CHECK-I686-NEXT: addl $12, %esp
222 ; CHECK-I686-NEXT: retl
223 %a = load half, half* %p, align 2
224 %r = fptosi half %a to i64
228 define void @test_sitofp_i64(i64 %a, half* %p) #0 {
229 ; CHECK-LIBCALL-LABEL: test_sitofp_i64:
230 ; CHECK-LIBCALL: # %bb.0:
231 ; CHECK-LIBCALL-NEXT: pushq %rbx
232 ; CHECK-LIBCALL-NEXT: movq %rsi, %rbx
233 ; CHECK-LIBCALL-NEXT: cvtsi2ss %rdi, %xmm0
234 ; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee
235 ; CHECK-LIBCALL-NEXT: movw %ax, (%rbx)
236 ; CHECK-LIBCALL-NEXT: popq %rbx
237 ; CHECK-LIBCALL-NEXT: retq
239 ; BWON-F16C-LABEL: test_sitofp_i64:
240 ; BWON-F16C: # %bb.0:
241 ; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
242 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
243 ; BWON-F16C-NEXT: vmovd %xmm0, %eax
244 ; BWON-F16C-NEXT: movw %ax, (%rsi)
245 ; BWON-F16C-NEXT: retq
247 ; CHECK-I686-LABEL: test_sitofp_i64:
248 ; CHECK-I686: # %bb.0:
249 ; CHECK-I686-NEXT: pushl %esi
250 ; CHECK-I686-NEXT: subl $24, %esp
251 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi
252 ; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
253 ; CHECK-I686-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
254 ; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp)
255 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
256 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
257 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
258 ; CHECK-I686-NEXT: calll __gnu_f2h_ieee
259 ; CHECK-I686-NEXT: movw %ax, (%esi)
260 ; CHECK-I686-NEXT: addl $24, %esp
261 ; CHECK-I686-NEXT: popl %esi
262 ; CHECK-I686-NEXT: retl
263 %r = sitofp i64 %a to half
264 store half %r, half* %p
268 define i64 @test_fptoui_i64(half* %p) #0 {
269 ; CHECK-LIBCALL-LABEL: test_fptoui_i64:
270 ; CHECK-LIBCALL: # %bb.0:
271 ; CHECK-LIBCALL-NEXT: pushq %rax
272 ; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi
273 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
274 ; CHECK-LIBCALL-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
275 ; CHECK-LIBCALL-NEXT: movaps %xmm0, %xmm2
276 ; CHECK-LIBCALL-NEXT: subss %xmm1, %xmm2
277 ; CHECK-LIBCALL-NEXT: cvttss2si %xmm2, %rax
278 ; CHECK-LIBCALL-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
279 ; CHECK-LIBCALL-NEXT: xorq %rax, %rcx
280 ; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax
281 ; CHECK-LIBCALL-NEXT: ucomiss %xmm1, %xmm0
282 ; CHECK-LIBCALL-NEXT: cmovaeq %rcx, %rax
283 ; CHECK-LIBCALL-NEXT: popq %rcx
284 ; CHECK-LIBCALL-NEXT: retq
286 ; BWON-F16C-LABEL: test_fptoui_i64:
287 ; BWON-F16C: # %bb.0:
288 ; BWON-F16C-NEXT: movswl (%rdi), %eax
289 ; BWON-F16C-NEXT: vmovd %eax, %xmm0
290 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
291 ; BWON-F16C-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
292 ; BWON-F16C-NEXT: vsubss %xmm1, %xmm0, %xmm2
293 ; BWON-F16C-NEXT: vcvttss2si %xmm2, %rax
294 ; BWON-F16C-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
295 ; BWON-F16C-NEXT: xorq %rax, %rcx
296 ; BWON-F16C-NEXT: vcvttss2si %xmm0, %rax
297 ; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0
298 ; BWON-F16C-NEXT: cmovaeq %rcx, %rax
299 ; BWON-F16C-NEXT: retq
301 ; CHECK-I686-LABEL: test_fptoui_i64:
302 ; CHECK-I686: # %bb.0:
303 ; CHECK-I686-NEXT: subl $12, %esp
304 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
305 ; CHECK-I686-NEXT: movzwl (%eax), %eax
306 ; CHECK-I686-NEXT: movl %eax, (%esp)
307 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee
308 ; CHECK-I686-NEXT: fstps (%esp)
309 ; CHECK-I686-NEXT: calll __fixunssfdi
310 ; CHECK-I686-NEXT: addl $12, %esp
311 ; CHECK-I686-NEXT: retl
312 %a = load half, half* %p, align 2
313 %r = fptoui half %a to i64
317 define void @test_uitofp_i64(i64 %a, half* %p) #0 {
318 ; CHECK-LIBCALL-LABEL: test_uitofp_i64:
319 ; CHECK-LIBCALL: # %bb.0:
320 ; CHECK-LIBCALL-NEXT: pushq %rbx
321 ; CHECK-LIBCALL-NEXT: movq %rsi, %rbx
322 ; CHECK-LIBCALL-NEXT: testq %rdi, %rdi
323 ; CHECK-LIBCALL-NEXT: js .LBB10_1
324 ; CHECK-LIBCALL-NEXT: # %bb.2:
325 ; CHECK-LIBCALL-NEXT: cvtsi2ss %rdi, %xmm0
326 ; CHECK-LIBCALL-NEXT: jmp .LBB10_3
327 ; CHECK-LIBCALL-NEXT: .LBB10_1:
328 ; CHECK-LIBCALL-NEXT: movq %rdi, %rax
329 ; CHECK-LIBCALL-NEXT: shrq %rax
330 ; CHECK-LIBCALL-NEXT: andl $1, %edi
331 ; CHECK-LIBCALL-NEXT: orq %rax, %rdi
332 ; CHECK-LIBCALL-NEXT: cvtsi2ss %rdi, %xmm0
333 ; CHECK-LIBCALL-NEXT: addss %xmm0, %xmm0
334 ; CHECK-LIBCALL-NEXT: .LBB10_3:
335 ; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee
336 ; CHECK-LIBCALL-NEXT: movw %ax, (%rbx)
337 ; CHECK-LIBCALL-NEXT: popq %rbx
338 ; CHECK-LIBCALL-NEXT: retq
340 ; BWON-F16C-LABEL: test_uitofp_i64:
341 ; BWON-F16C: # %bb.0:
342 ; BWON-F16C-NEXT: testq %rdi, %rdi
343 ; BWON-F16C-NEXT: js .LBB10_1
344 ; BWON-F16C-NEXT: # %bb.2:
345 ; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
346 ; BWON-F16C-NEXT: jmp .LBB10_3
347 ; BWON-F16C-NEXT: .LBB10_1:
348 ; BWON-F16C-NEXT: movq %rdi, %rax
349 ; BWON-F16C-NEXT: shrq %rax
350 ; BWON-F16C-NEXT: andl $1, %edi
351 ; BWON-F16C-NEXT: orq %rax, %rdi
352 ; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
353 ; BWON-F16C-NEXT: vaddss %xmm0, %xmm0, %xmm0
354 ; BWON-F16C-NEXT: .LBB10_3:
355 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
356 ; BWON-F16C-NEXT: vmovd %xmm0, %eax
357 ; BWON-F16C-NEXT: movw %ax, (%rsi)
358 ; BWON-F16C-NEXT: retq
360 ; CHECK-I686-LABEL: test_uitofp_i64:
361 ; CHECK-I686: # %bb.0:
362 ; CHECK-I686-NEXT: pushl %esi
363 ; CHECK-I686-NEXT: subl $24, %esp
364 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi
365 ; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
366 ; CHECK-I686-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
367 ; CHECK-I686-NEXT: xorl %eax, %eax
368 ; CHECK-I686-NEXT: cmpl $0, {{[0-9]+}}(%esp)
369 ; CHECK-I686-NEXT: setns %al
370 ; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp)
371 ; CHECK-I686-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
372 ; CHECK-I686-NEXT: fstps (%esp)
373 ; CHECK-I686-NEXT: calll __gnu_f2h_ieee
374 ; CHECK-I686-NEXT: movw %ax, (%esi)
375 ; CHECK-I686-NEXT: addl $24, %esp
376 ; CHECK-I686-NEXT: popl %esi
377 ; CHECK-I686-NEXT: retl
378 %r = uitofp i64 %a to half
379 store half %r, half* %p
383 define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 {
384 ; CHECK-LIBCALL-LABEL: test_extend32_vec4:
385 ; CHECK-LIBCALL: # %bb.0:
386 ; CHECK-LIBCALL-NEXT: pushq %rbx
387 ; CHECK-LIBCALL-NEXT: subq $48, %rsp
388 ; CHECK-LIBCALL-NEXT: movq %rdi, %rbx
389 ; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi
390 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
391 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
392 ; CHECK-LIBCALL-NEXT: movzwl 2(%rbx), %edi
393 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
394 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
395 ; CHECK-LIBCALL-NEXT: movzwl 4(%rbx), %edi
396 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
397 ; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
398 ; CHECK-LIBCALL-NEXT: movzwl 6(%rbx), %edi
399 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
400 ; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
401 ; CHECK-LIBCALL-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
402 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
403 ; CHECK-LIBCALL-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
404 ; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
405 ; CHECK-LIBCALL-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
406 ; CHECK-LIBCALL-NEXT: addq $48, %rsp
407 ; CHECK-LIBCALL-NEXT: popq %rbx
408 ; CHECK-LIBCALL-NEXT: retq
410 ; BWON-F16C-LABEL: test_extend32_vec4:
411 ; BWON-F16C: # %bb.0:
412 ; BWON-F16C-NEXT: movswl 6(%rdi), %eax
413 ; BWON-F16C-NEXT: vmovd %eax, %xmm0
414 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
415 ; BWON-F16C-NEXT: movswl 4(%rdi), %eax
416 ; BWON-F16C-NEXT: vmovd %eax, %xmm1
417 ; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
418 ; BWON-F16C-NEXT: movswl (%rdi), %eax
419 ; BWON-F16C-NEXT: vmovd %eax, %xmm2
420 ; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2
421 ; BWON-F16C-NEXT: movswl 2(%rdi), %eax
422 ; BWON-F16C-NEXT: vmovd %eax, %xmm3
423 ; BWON-F16C-NEXT: vcvtph2ps %xmm3, %xmm3
424 ; BWON-F16C-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
425 ; BWON-F16C-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
426 ; BWON-F16C-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
427 ; BWON-F16C-NEXT: retq
429 ; CHECK-I686-LABEL: test_extend32_vec4:
430 ; CHECK-I686: # %bb.0:
431 ; CHECK-I686-NEXT: pushl %esi
432 ; CHECK-I686-NEXT: subl $56, %esp
433 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi
434 ; CHECK-I686-NEXT: movzwl 2(%esi), %eax
435 ; CHECK-I686-NEXT: movl %eax, (%esp)
436 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee
437 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
438 ; CHECK-I686-NEXT: movzwl 4(%esi), %eax
439 ; CHECK-I686-NEXT: movl %eax, (%esp)
440 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee
441 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
442 ; CHECK-I686-NEXT: movzwl 6(%esi), %eax
443 ; CHECK-I686-NEXT: movl %eax, (%esp)
444 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee
445 ; CHECK-I686-NEXT: movzwl (%esi), %eax
446 ; CHECK-I686-NEXT: movl %eax, (%esp)
447 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
448 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
449 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
450 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
451 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
452 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee
453 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
454 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
455 ; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
456 ; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
457 ; CHECK-I686-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
458 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
459 ; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
460 ; CHECK-I686-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
461 ; CHECK-I686-NEXT: addl $56, %esp
462 ; CHECK-I686-NEXT: popl %esi
463 ; CHECK-I686-NEXT: retl
464 %a = load <4 x half>, <4 x half>* %p, align 8
465 %b = fpext <4 x half> %a to <4 x float>
469 define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 {
470 ; CHECK-LIBCALL-LABEL: test_extend64_vec4:
471 ; CHECK-LIBCALL: # %bb.0:
472 ; CHECK-LIBCALL-NEXT: pushq %rbx
473 ; CHECK-LIBCALL-NEXT: subq $16, %rsp
474 ; CHECK-LIBCALL-NEXT: movq %rdi, %rbx
475 ; CHECK-LIBCALL-NEXT: movzwl 4(%rdi), %edi
476 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
477 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
478 ; CHECK-LIBCALL-NEXT: movzwl 6(%rbx), %edi
479 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
480 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
481 ; CHECK-LIBCALL-NEXT: movzwl (%rbx), %edi
482 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
483 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
484 ; CHECK-LIBCALL-NEXT: movzwl 2(%rbx), %edi
485 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
486 ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm1
487 ; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
488 ; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero
489 ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0
490 ; CHECK-LIBCALL-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
491 ; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
492 ; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero
493 ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm1, %xmm2
494 ; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
495 ; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero
496 ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm1, %xmm1
497 ; CHECK-LIBCALL-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
498 ; CHECK-LIBCALL-NEXT: addq $16, %rsp
499 ; CHECK-LIBCALL-NEXT: popq %rbx
500 ; CHECK-LIBCALL-NEXT: retq
502 ; BWON-F16C-LABEL: test_extend64_vec4:
503 ; BWON-F16C: # %bb.0:
504 ; BWON-F16C-NEXT: movswl (%rdi), %eax
505 ; BWON-F16C-NEXT: vmovd %eax, %xmm0
506 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
507 ; BWON-F16C-NEXT: movswl 2(%rdi), %eax
508 ; BWON-F16C-NEXT: vmovd %eax, %xmm1
509 ; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
510 ; BWON-F16C-NEXT: movswl 4(%rdi), %eax
511 ; BWON-F16C-NEXT: vmovd %eax, %xmm2
512 ; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2
513 ; BWON-F16C-NEXT: movswl 6(%rdi), %eax
514 ; BWON-F16C-NEXT: vmovd %eax, %xmm3
515 ; BWON-F16C-NEXT: vcvtph2ps %xmm3, %xmm3
516 ; BWON-F16C-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
517 ; BWON-F16C-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
518 ; BWON-F16C-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
519 ; BWON-F16C-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
520 ; BWON-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
521 ; BWON-F16C-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
522 ; BWON-F16C-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
523 ; BWON-F16C-NEXT: retq
525 ; CHECK-I686-LABEL: test_extend64_vec4:
526 ; CHECK-I686: # %bb.0:
527 ; CHECK-I686-NEXT: pushl %esi
528 ; CHECK-I686-NEXT: subl $88, %esp
529 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi
530 ; CHECK-I686-NEXT: movzwl 6(%esi), %eax
531 ; CHECK-I686-NEXT: movl %eax, (%esp)
532 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee
533 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
534 ; CHECK-I686-NEXT: movzwl 4(%esi), %eax
535 ; CHECK-I686-NEXT: movl %eax, (%esp)
536 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee
537 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
538 ; CHECK-I686-NEXT: movzwl 2(%esi), %eax
539 ; CHECK-I686-NEXT: movl %eax, (%esp)
540 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee
541 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
542 ; CHECK-I686-NEXT: movzwl (%esi), %eax
543 ; CHECK-I686-NEXT: movl %eax, (%esp)
544 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee
545 ; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp)
546 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
547 ; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp)
548 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
549 ; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp)
550 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
551 ; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp)
552 ; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
553 ; CHECK-I686-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
554 ; CHECK-I686-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
555 ; CHECK-I686-NEXT: movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
556 ; CHECK-I686-NEXT: addl $88, %esp
557 ; CHECK-I686-NEXT: popl %esi
558 ; CHECK-I686-NEXT: retl
559 %a = load <4 x half>, <4 x half>* %p, align 8
560 %b = fpext <4 x half> %a to <4 x double>
564 define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) #0 {
565 ; BWON-NOF16C-LABEL: test_trunc32_vec4:
566 ; BWON-NOF16C: # %bb.0:
567 ; BWON-NOF16C-NEXT: pushq %rbp
568 ; BWON-NOF16C-NEXT: pushq %r15
569 ; BWON-NOF16C-NEXT: pushq %r14
570 ; BWON-NOF16C-NEXT: pushq %rbx
571 ; BWON-NOF16C-NEXT: subq $24, %rsp
572 ; BWON-NOF16C-NEXT: movq %rdi, %rbx
573 ; BWON-NOF16C-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
574 ; BWON-NOF16C-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
575 ; BWON-NOF16C-NEXT: callq __gnu_f2h_ieee
576 ; BWON-NOF16C-NEXT: movl %eax, %r14d
577 ; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
578 ; BWON-NOF16C-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
579 ; BWON-NOF16C-NEXT: callq __gnu_f2h_ieee
580 ; BWON-NOF16C-NEXT: movl %eax, %r15d
581 ; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
582 ; BWON-NOF16C-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
583 ; BWON-NOF16C-NEXT: callq __gnu_f2h_ieee
584 ; BWON-NOF16C-NEXT: movl %eax, %ebp
585 ; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
586 ; BWON-NOF16C-NEXT: callq __gnu_f2h_ieee
587 ; BWON-NOF16C-NEXT: movw %ax, (%rbx)
588 ; BWON-NOF16C-NEXT: movw %bp, 6(%rbx)
589 ; BWON-NOF16C-NEXT: movw %r15w, 4(%rbx)
590 ; BWON-NOF16C-NEXT: movw %r14w, 2(%rbx)
591 ; BWON-NOF16C-NEXT: addq $24, %rsp
592 ; BWON-NOF16C-NEXT: popq %rbx
593 ; BWON-NOF16C-NEXT: popq %r14
594 ; BWON-NOF16C-NEXT: popq %r15
595 ; BWON-NOF16C-NEXT: popq %rbp
596 ; BWON-NOF16C-NEXT: retq
598 ; BWOFF-LABEL: test_trunc32_vec4:
600 ; BWOFF-NEXT: pushq %rbp
601 ; BWOFF-NEXT: pushq %r15
602 ; BWOFF-NEXT: pushq %r14
603 ; BWOFF-NEXT: pushq %rbx
604 ; BWOFF-NEXT: subq $24, %rsp
605 ; BWOFF-NEXT: movq %rdi, %rbx
606 ; BWOFF-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
607 ; BWOFF-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
608 ; BWOFF-NEXT: callq __gnu_f2h_ieee
609 ; BWOFF-NEXT: movw %ax, %r14w
610 ; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
611 ; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
612 ; BWOFF-NEXT: callq __gnu_f2h_ieee
613 ; BWOFF-NEXT: movw %ax, %r15w
614 ; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
615 ; BWOFF-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
616 ; BWOFF-NEXT: callq __gnu_f2h_ieee
617 ; BWOFF-NEXT: movw %ax, %bp
618 ; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
619 ; BWOFF-NEXT: callq __gnu_f2h_ieee
620 ; BWOFF-NEXT: movw %ax, (%rbx)
621 ; BWOFF-NEXT: movw %bp, 6(%rbx)
622 ; BWOFF-NEXT: movw %r15w, 4(%rbx)
623 ; BWOFF-NEXT: movw %r14w, 2(%rbx)
624 ; BWOFF-NEXT: addq $24, %rsp
625 ; BWOFF-NEXT: popq %rbx
626 ; BWOFF-NEXT: popq %r14
627 ; BWOFF-NEXT: popq %r15
628 ; BWOFF-NEXT: popq %rbp
631 ; BWON-F16C-LABEL: test_trunc32_vec4:
632 ; BWON-F16C: # %bb.0:
633 ; BWON-F16C-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
634 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1
635 ; BWON-F16C-NEXT: vmovd %xmm1, %eax
636 ; BWON-F16C-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
637 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1
638 ; BWON-F16C-NEXT: vmovd %xmm1, %ecx
639 ; BWON-F16C-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
640 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1
641 ; BWON-F16C-NEXT: vmovd %xmm1, %edx
642 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
643 ; BWON-F16C-NEXT: vmovd %xmm0, %esi
644 ; BWON-F16C-NEXT: movw %si, (%rdi)
645 ; BWON-F16C-NEXT: movw %dx, 6(%rdi)
646 ; BWON-F16C-NEXT: movw %cx, 4(%rdi)
647 ; BWON-F16C-NEXT: movw %ax, 2(%rdi)
648 ; BWON-F16C-NEXT: retq
650 ; CHECK-I686-LABEL: test_trunc32_vec4:
651 ; CHECK-I686: # %bb.0:
652 ; CHECK-I686-NEXT: pushl %ebp
653 ; CHECK-I686-NEXT: pushl %ebx
654 ; CHECK-I686-NEXT: pushl %edi
655 ; CHECK-I686-NEXT: pushl %esi
656 ; CHECK-I686-NEXT: subl $44, %esp
657 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
658 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ebp
659 ; CHECK-I686-NEXT: movaps %xmm0, %xmm1
660 ; CHECK-I686-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
661 ; CHECK-I686-NEXT: movss %xmm1, (%esp)
662 ; CHECK-I686-NEXT: calll __gnu_f2h_ieee
663 ; CHECK-I686-NEXT: movw %ax, %si
664 ; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
665 ; CHECK-I686-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
666 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
667 ; CHECK-I686-NEXT: calll __gnu_f2h_ieee
668 ; CHECK-I686-NEXT: movw %ax, %di
669 ; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
670 ; CHECK-I686-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
671 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
672 ; CHECK-I686-NEXT: calll __gnu_f2h_ieee
673 ; CHECK-I686-NEXT: movw %ax, %bx
674 ; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
675 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
676 ; CHECK-I686-NEXT: calll __gnu_f2h_ieee
677 ; CHECK-I686-NEXT: movw %ax, (%ebp)
678 ; CHECK-I686-NEXT: movw %bx, 6(%ebp)
679 ; CHECK-I686-NEXT: movw %di, 4(%ebp)
680 ; CHECK-I686-NEXT: movw %si, 2(%ebp)
681 ; CHECK-I686-NEXT: addl $44, %esp
682 ; CHECK-I686-NEXT: popl %esi
683 ; CHECK-I686-NEXT: popl %edi
684 ; CHECK-I686-NEXT: popl %ebx
685 ; CHECK-I686-NEXT: popl %ebp
686 ; CHECK-I686-NEXT: retl
687 %v = fptrunc <4 x float> %a to <4 x half>
688 store <4 x half> %v, <4 x half>* %p
692 define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) #0 {
693 ; BWON-NOF16C-LABEL: test_trunc64_vec4:
694 ; BWON-NOF16C: # %bb.0:
695 ; BWON-NOF16C-NEXT: pushq %rbp
696 ; BWON-NOF16C-NEXT: pushq %r15
697 ; BWON-NOF16C-NEXT: pushq %r14
698 ; BWON-NOF16C-NEXT: pushq %rbx
699 ; BWON-NOF16C-NEXT: subq $40, %rsp
700 ; BWON-NOF16C-NEXT: movq %rdi, %rbx
701 ; BWON-NOF16C-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill
702 ; BWON-NOF16C-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
703 ; BWON-NOF16C-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
704 ; BWON-NOF16C-NEXT: callq __truncdfhf2
705 ; BWON-NOF16C-NEXT: movl %eax, %r14d
706 ; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
707 ; BWON-NOF16C-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
708 ; BWON-NOF16C-NEXT: callq __truncdfhf2
709 ; BWON-NOF16C-NEXT: movl %eax, %r15d
710 ; BWON-NOF16C-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
711 ; BWON-NOF16C-NEXT: callq __truncdfhf2
712 ; BWON-NOF16C-NEXT: movl %eax, %ebp
713 ; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
714 ; BWON-NOF16C-NEXT: callq __truncdfhf2
715 ; BWON-NOF16C-NEXT: movw %ax, 4(%rbx)
716 ; BWON-NOF16C-NEXT: movw %bp, (%rbx)
717 ; BWON-NOF16C-NEXT: movw %r15w, 6(%rbx)
718 ; BWON-NOF16C-NEXT: movw %r14w, 2(%rbx)
719 ; BWON-NOF16C-NEXT: addq $40, %rsp
720 ; BWON-NOF16C-NEXT: popq %rbx
721 ; BWON-NOF16C-NEXT: popq %r14
722 ; BWON-NOF16C-NEXT: popq %r15
723 ; BWON-NOF16C-NEXT: popq %rbp
724 ; BWON-NOF16C-NEXT: retq
726 ; BWOFF-LABEL: test_trunc64_vec4:
728 ; BWOFF-NEXT: pushq %rbp
729 ; BWOFF-NEXT: pushq %r15
730 ; BWOFF-NEXT: pushq %r14
731 ; BWOFF-NEXT: pushq %rbx
732 ; BWOFF-NEXT: subq $40, %rsp
733 ; BWOFF-NEXT: movq %rdi, %rbx
734 ; BWOFF-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill
735 ; BWOFF-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
736 ; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
737 ; BWOFF-NEXT: callq __truncdfhf2
738 ; BWOFF-NEXT: movw %ax, %r14w
739 ; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
740 ; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
741 ; BWOFF-NEXT: callq __truncdfhf2
742 ; BWOFF-NEXT: movw %ax, %r15w
743 ; BWOFF-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
744 ; BWOFF-NEXT: callq __truncdfhf2
745 ; BWOFF-NEXT: movw %ax, %bp
746 ; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
747 ; BWOFF-NEXT: callq __truncdfhf2
748 ; BWOFF-NEXT: movw %ax, 4(%rbx)
749 ; BWOFF-NEXT: movw %bp, (%rbx)
750 ; BWOFF-NEXT: movw %r15w, 6(%rbx)
751 ; BWOFF-NEXT: movw %r14w, 2(%rbx)
752 ; BWOFF-NEXT: addq $40, %rsp
753 ; BWOFF-NEXT: popq %rbx
754 ; BWOFF-NEXT: popq %r14
755 ; BWOFF-NEXT: popq %r15
756 ; BWOFF-NEXT: popq %rbp
759 ; BWON-F16C-LABEL: test_trunc64_vec4:
760 ; BWON-F16C: # %bb.0:
761 ; BWON-F16C-NEXT: pushq %rbp
762 ; BWON-F16C-NEXT: pushq %r15
763 ; BWON-F16C-NEXT: pushq %r14
764 ; BWON-F16C-NEXT: pushq %rbx
765 ; BWON-F16C-NEXT: subq $88, %rsp
766 ; BWON-F16C-NEXT: movq %rdi, %rbx
767 ; BWON-F16C-NEXT: vmovupd %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
768 ; BWON-F16C-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
769 ; BWON-F16C-NEXT: vzeroupper
770 ; BWON-F16C-NEXT: callq __truncdfhf2
771 ; BWON-F16C-NEXT: movl %eax, %r14d
772 ; BWON-F16C-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
773 ; BWON-F16C-NEXT: vextractf128 $1, %ymm0, %xmm0
774 ; BWON-F16C-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
775 ; BWON-F16C-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
776 ; BWON-F16C-NEXT: vzeroupper
777 ; BWON-F16C-NEXT: callq __truncdfhf2
778 ; BWON-F16C-NEXT: movl %eax, %r15d
779 ; BWON-F16C-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
780 ; BWON-F16C-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
781 ; BWON-F16C-NEXT: vzeroupper
782 ; BWON-F16C-NEXT: callq __truncdfhf2
783 ; BWON-F16C-NEXT: movl %eax, %ebp
784 ; BWON-F16C-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
785 ; BWON-F16C-NEXT: callq __truncdfhf2
786 ; BWON-F16C-NEXT: movw %ax, 4(%rbx)
787 ; BWON-F16C-NEXT: movw %bp, (%rbx)
788 ; BWON-F16C-NEXT: movw %r15w, 6(%rbx)
789 ; BWON-F16C-NEXT: movw %r14w, 2(%rbx)
790 ; BWON-F16C-NEXT: addq $88, %rsp
791 ; BWON-F16C-NEXT: popq %rbx
792 ; BWON-F16C-NEXT: popq %r14
793 ; BWON-F16C-NEXT: popq %r15
794 ; BWON-F16C-NEXT: popq %rbp
795 ; BWON-F16C-NEXT: retq
797 ; CHECK-I686-LABEL: test_trunc64_vec4:
798 ; CHECK-I686: # %bb.0:
799 ; CHECK-I686-NEXT: pushl %ebp
800 ; CHECK-I686-NEXT: pushl %ebx
801 ; CHECK-I686-NEXT: pushl %edi
802 ; CHECK-I686-NEXT: pushl %esi
803 ; CHECK-I686-NEXT: subl $60, %esp
804 ; CHECK-I686-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
805 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
806 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ebp
807 ; CHECK-I686-NEXT: movlps %xmm0, (%esp)
808 ; CHECK-I686-NEXT: calll __truncdfhf2
809 ; CHECK-I686-NEXT: movw %ax, %si
810 ; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
811 ; CHECK-I686-NEXT: movhps %xmm0, (%esp)
812 ; CHECK-I686-NEXT: calll __truncdfhf2
813 ; CHECK-I686-NEXT: movw %ax, %di
814 ; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
815 ; CHECK-I686-NEXT: movlps %xmm0, (%esp)
816 ; CHECK-I686-NEXT: calll __truncdfhf2
817 ; CHECK-I686-NEXT: movw %ax, %bx
818 ; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
819 ; CHECK-I686-NEXT: movhps %xmm0, (%esp)
820 ; CHECK-I686-NEXT: calll __truncdfhf2
821 ; CHECK-I686-NEXT: movw %ax, 6(%ebp)
822 ; CHECK-I686-NEXT: movw %bx, 4(%ebp)
823 ; CHECK-I686-NEXT: movw %di, 2(%ebp)
824 ; CHECK-I686-NEXT: movw %si, (%ebp)
825 ; CHECK-I686-NEXT: addl $60, %esp
826 ; CHECK-I686-NEXT: popl %esi
827 ; CHECK-I686-NEXT: popl %edi
828 ; CHECK-I686-NEXT: popl %ebx
829 ; CHECK-I686-NEXT: popl %ebp
830 ; CHECK-I686-NEXT: retl
831 %v = fptrunc <4 x double> %a to <4 x half>
832 store <4 x half> %v, <4 x half>* %p
836 declare float @test_floatret();
838 ; On i686, if SSE2 is available, the return value from test_floatret is loaded
839 ; to f80 and then rounded to f32. The DAG combiner should not combine this
840 ; fp_round and the subsequent fptrunc from float to half.
841 define half @test_f80trunc_nodagcombine() #0 {
842 ; CHECK-LIBCALL-LABEL: test_f80trunc_nodagcombine:
843 ; CHECK-LIBCALL: # %bb.0:
844 ; CHECK-LIBCALL-NEXT: pushq %rax
845 ; CHECK-LIBCALL-NEXT: callq test_floatret
846 ; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee
847 ; CHECK-LIBCALL-NEXT: movzwl %ax, %edi
848 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
849 ; CHECK-LIBCALL-NEXT: popq %rax
850 ; CHECK-LIBCALL-NEXT: retq
852 ; BWON-F16C-LABEL: test_f80trunc_nodagcombine:
853 ; BWON-F16C: # %bb.0:
854 ; BWON-F16C-NEXT: pushq %rax
855 ; BWON-F16C-NEXT: callq test_floatret
856 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
857 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
858 ; BWON-F16C-NEXT: popq %rax
859 ; BWON-F16C-NEXT: retq
861 ; CHECK-I686-LABEL: test_f80trunc_nodagcombine:
862 ; CHECK-I686: # %bb.0:
863 ; CHECK-I686-NEXT: subl $12, %esp
864 ; CHECK-I686-NEXT: calll test_floatret
865 ; CHECK-I686-NEXT: fstps (%esp)
866 ; CHECK-I686-NEXT: calll __gnu_f2h_ieee
867 ; CHECK-I686-NEXT: movzwl %ax, %eax
868 ; CHECK-I686-NEXT: movl %eax, (%esp)
869 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee
870 ; CHECK-I686-NEXT: addl $12, %esp
871 ; CHECK-I686-NEXT: retl
872 %1 = call float @test_floatret()
873 %2 = fptrunc float %1 to half
880 define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 {
881 ; CHECK-LIBCALL-LABEL: test_sitofp_fadd_i32:
882 ; CHECK-LIBCALL: # %bb.0:
883 ; CHECK-LIBCALL-NEXT: pushq %rbx
884 ; CHECK-LIBCALL-NEXT: subq $16, %rsp
885 ; CHECK-LIBCALL-NEXT: movl %edi, %ebx
886 ; CHECK-LIBCALL-NEXT: movzwl (%rsi), %edi
887 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
888 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
889 ; CHECK-LIBCALL-NEXT: cvtsi2ss %ebx, %xmm0
890 ; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee
891 ; CHECK-LIBCALL-NEXT: movzwl %ax, %edi
892 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
893 ; CHECK-LIBCALL-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
894 ; CHECK-LIBCALL-NEXT: addq $16, %rsp
895 ; CHECK-LIBCALL-NEXT: popq %rbx
896 ; CHECK-LIBCALL-NEXT: retq
898 ; BWON-F16C-LABEL: test_sitofp_fadd_i32:
899 ; BWON-F16C: # %bb.0:
900 ; BWON-F16C-NEXT: movswl (%rsi), %eax
901 ; BWON-F16C-NEXT: vmovd %eax, %xmm0
902 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
903 ; BWON-F16C-NEXT: vcvtsi2ss %edi, %xmm1, %xmm1
904 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1
905 ; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
906 ; BWON-F16C-NEXT: vaddss %xmm1, %xmm0, %xmm0
907 ; BWON-F16C-NEXT: retq
909 ; CHECK-I686-LABEL: test_sitofp_fadd_i32:
910 ; CHECK-I686: # %bb.0:
911 ; CHECK-I686-NEXT: subl $28, %esp
912 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
913 ; CHECK-I686-NEXT: movzwl (%eax), %eax
914 ; CHECK-I686-NEXT: movl %eax, (%esp)
915 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee
916 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
917 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
918 ; CHECK-I686-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
919 ; CHECK-I686-NEXT: xorps %xmm0, %xmm0
920 ; CHECK-I686-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
921 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
922 ; CHECK-I686-NEXT: calll __gnu_f2h_ieee
923 ; CHECK-I686-NEXT: movzwl %ax, %eax
924 ; CHECK-I686-NEXT: movl %eax, (%esp)
925 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee
926 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
927 ; CHECK-I686-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
928 ; CHECK-I686-NEXT: # xmm0 = mem[0],zero,zero,zero
929 ; CHECK-I686-NEXT: addss {{[0-9]+}}(%esp), %xmm0
930 ; CHECK-I686-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
931 ; CHECK-I686-NEXT: flds {{[0-9]+}}(%esp)
932 ; CHECK-I686-NEXT: addl $28, %esp
933 ; CHECK-I686-NEXT: retl
934 %tmp0 = load half, half* %b
935 %tmp1 = sitofp i32 %a to half
936 %tmp2 = fadd half %tmp0, %tmp1
937 %tmp3 = fpext half %tmp2 to float
941 define half @PR40273(half) #0 {
942 ; CHECK-LIBCALL-LABEL: PR40273:
943 ; CHECK-LIBCALL: # %bb.0:
944 ; CHECK-LIBCALL-NEXT: pushq %rax
945 ; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee
946 ; CHECK-LIBCALL-NEXT: movzwl %ax, %edi
947 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
948 ; CHECK-LIBCALL-NEXT: xorps %xmm1, %xmm1
949 ; CHECK-LIBCALL-NEXT: ucomiss %xmm1, %xmm0
950 ; CHECK-LIBCALL-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
951 ; CHECK-LIBCALL-NEXT: jne .LBB17_3
952 ; CHECK-LIBCALL-NEXT: # %bb.1:
953 ; CHECK-LIBCALL-NEXT: jp .LBB17_3
954 ; CHECK-LIBCALL-NEXT: # %bb.2:
955 ; CHECK-LIBCALL-NEXT: xorps %xmm0, %xmm0
956 ; CHECK-LIBCALL-NEXT: .LBB17_3:
957 ; CHECK-LIBCALL-NEXT: popq %rax
958 ; CHECK-LIBCALL-NEXT: retq
960 ; BWON-F16C-LABEL: PR40273:
961 ; BWON-F16C: # %bb.0:
962 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
963 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
964 ; BWON-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
965 ; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0
966 ; BWON-F16C-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
967 ; BWON-F16C-NEXT: jne .LBB17_3
968 ; BWON-F16C-NEXT: # %bb.1:
969 ; BWON-F16C-NEXT: jp .LBB17_3
970 ; BWON-F16C-NEXT: # %bb.2:
971 ; BWON-F16C-NEXT: vxorps %xmm0, %xmm0, %xmm0
972 ; BWON-F16C-NEXT: .LBB17_3:
973 ; BWON-F16C-NEXT: retq
975 ; CHECK-I686-LABEL: PR40273:
976 ; CHECK-I686: # %bb.0:
977 ; CHECK-I686-NEXT: subl $12, %esp
978 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
979 ; CHECK-I686-NEXT: movss %xmm0, (%esp)
980 ; CHECK-I686-NEXT: calll __gnu_f2h_ieee
981 ; CHECK-I686-NEXT: movzwl %ax, %eax
982 ; CHECK-I686-NEXT: movl %eax, (%esp)
983 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee
984 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
985 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
986 ; CHECK-I686-NEXT: xorps %xmm1, %xmm1
987 ; CHECK-I686-NEXT: ucomiss %xmm1, %xmm0
988 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
989 ; CHECK-I686-NEXT: jne .LBB17_3
990 ; CHECK-I686-NEXT: # %bb.1:
991 ; CHECK-I686-NEXT: jp .LBB17_3
992 ; CHECK-I686-NEXT: # %bb.2:
993 ; CHECK-I686-NEXT: xorps %xmm0, %xmm0
994 ; CHECK-I686-NEXT: .LBB17_3:
995 ; CHECK-I686-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
996 ; CHECK-I686-NEXT: flds {{[0-9]+}}(%esp)
997 ; CHECK-I686-NEXT: addl $12, %esp
998 ; CHECK-I686-NEXT: retl
999 %2 = fcmp une half %0, 0xH0000
1000 %3 = uitofp i1 %2 to half
1004 attributes #0 = { nounwind }