1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s --check-prefix=X32-NOF16C
3 ; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s --check-prefix=X32-F16C
4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s --check-prefix=X64-NOF16C
5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=f16c | FileCheck %s --check-prefix=X64-F16C
7 @a = global half 0xH0000, align 2
8 @b = global half 0xH0000, align 2
9 @c = global half 0xH0000, align 2
11 define float @half_to_float() strictfp {
12 ; X32-NOF16C-LABEL: half_to_float:
13 ; X32-NOF16C: ## %bb.0:
14 ; X32-NOF16C-NEXT: subl $12, %esp
15 ; X32-NOF16C-NEXT: .cfi_def_cfa_offset 16
16 ; X32-NOF16C-NEXT: movzwl _a, %eax
17 ; X32-NOF16C-NEXT: movl %eax, (%esp)
18 ; X32-NOF16C-NEXT: calll ___extendhfsf2
19 ; X32-NOF16C-NEXT: addl $12, %esp
20 ; X32-NOF16C-NEXT: retl
22 ; X32-F16C-LABEL: half_to_float:
24 ; X32-F16C-NEXT: subl $12, %esp
25 ; X32-F16C-NEXT: .cfi_def_cfa_offset 16
26 ; X32-F16C-NEXT: movzwl _a, %eax
27 ; X32-F16C-NEXT: movl %eax, (%esp)
28 ; X32-F16C-NEXT: calll ___extendhfsf2
29 ; X32-F16C-NEXT: addl $12, %esp
32 ; X64-NOF16C-LABEL: half_to_float:
33 ; X64-NOF16C: ## %bb.0:
34 ; X64-NOF16C-NEXT: pushq %rax
35 ; X64-NOF16C-NEXT: .cfi_def_cfa_offset 16
36 ; X64-NOF16C-NEXT: movzwl _a(%rip), %edi
37 ; X64-NOF16C-NEXT: callq ___extendhfsf2
38 ; X64-NOF16C-NEXT: popq %rax
39 ; X64-NOF16C-NEXT: retq
41 ; X64-F16C-LABEL: half_to_float:
43 ; X64-F16C-NEXT: movzwl _a(%rip), %eax
44 ; X64-F16C-NEXT: vmovd %eax, %xmm0
45 ; X64-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
47 %1 = load half, half* @a, align 2
48 %2 = tail call float @llvm.experimental.constrained.fpext.f32.f16(half %1, metadata !"fpexcept.strict") #0
52 define double @half_to_double() strictfp {
53 ; X32-NOF16C-LABEL: half_to_double:
54 ; X32-NOF16C: ## %bb.0:
55 ; X32-NOF16C-NEXT: subl $12, %esp
56 ; X32-NOF16C-NEXT: .cfi_def_cfa_offset 16
57 ; X32-NOF16C-NEXT: movzwl _a, %eax
58 ; X32-NOF16C-NEXT: movl %eax, (%esp)
59 ; X32-NOF16C-NEXT: calll ___extendhfsf2
60 ; X32-NOF16C-NEXT: addl $12, %esp
61 ; X32-NOF16C-NEXT: retl
63 ; X32-F16C-LABEL: half_to_double:
65 ; X32-F16C-NEXT: subl $12, %esp
66 ; X32-F16C-NEXT: .cfi_def_cfa_offset 16
67 ; X32-F16C-NEXT: movzwl _a, %eax
68 ; X32-F16C-NEXT: movl %eax, (%esp)
69 ; X32-F16C-NEXT: calll ___extendhfsf2
70 ; X32-F16C-NEXT: addl $12, %esp
73 ; X64-NOF16C-LABEL: half_to_double:
74 ; X64-NOF16C: ## %bb.0:
75 ; X64-NOF16C-NEXT: pushq %rax
76 ; X64-NOF16C-NEXT: .cfi_def_cfa_offset 16
77 ; X64-NOF16C-NEXT: movzwl _a(%rip), %edi
78 ; X64-NOF16C-NEXT: callq ___extendhfsf2
79 ; X64-NOF16C-NEXT: cvtss2sd %xmm0, %xmm0
80 ; X64-NOF16C-NEXT: popq %rax
81 ; X64-NOF16C-NEXT: retq
83 ; X64-F16C-LABEL: half_to_double:
85 ; X64-F16C-NEXT: movzwl _a(%rip), %eax
86 ; X64-F16C-NEXT: vmovd %eax, %xmm0
87 ; X64-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
88 ; X64-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
90 %1 = load half, half* @a, align 2
91 %2 = tail call double @llvm.experimental.constrained.fpext.f64.f16(half %1, metadata !"fpexcept.strict") #0
95 define x86_fp80 @half_to_fp80() strictfp {
96 ; X32-NOF16C-LABEL: half_to_fp80:
97 ; X32-NOF16C: ## %bb.0:
98 ; X32-NOF16C-NEXT: subl $12, %esp
99 ; X32-NOF16C-NEXT: .cfi_def_cfa_offset 16
100 ; X32-NOF16C-NEXT: movzwl _a, %eax
101 ; X32-NOF16C-NEXT: movl %eax, (%esp)
102 ; X32-NOF16C-NEXT: calll ___extendhfsf2
103 ; X32-NOF16C-NEXT: addl $12, %esp
104 ; X32-NOF16C-NEXT: retl
106 ; X32-F16C-LABEL: half_to_fp80:
107 ; X32-F16C: ## %bb.0:
108 ; X32-F16C-NEXT: subl $12, %esp
109 ; X32-F16C-NEXT: .cfi_def_cfa_offset 16
110 ; X32-F16C-NEXT: movzwl _a, %eax
111 ; X32-F16C-NEXT: movl %eax, (%esp)
112 ; X32-F16C-NEXT: calll ___extendhfsf2
113 ; X32-F16C-NEXT: addl $12, %esp
114 ; X32-F16C-NEXT: retl
116 ; X64-NOF16C-LABEL: half_to_fp80:
117 ; X64-NOF16C: ## %bb.0:
118 ; X64-NOF16C-NEXT: pushq %rax
119 ; X64-NOF16C-NEXT: .cfi_def_cfa_offset 16
120 ; X64-NOF16C-NEXT: movzwl _a(%rip), %edi
121 ; X64-NOF16C-NEXT: callq ___extendhfsf2
122 ; X64-NOF16C-NEXT: movss %xmm0, {{[0-9]+}}(%rsp)
123 ; X64-NOF16C-NEXT: flds {{[0-9]+}}(%rsp)
124 ; X64-NOF16C-NEXT: wait
125 ; X64-NOF16C-NEXT: popq %rax
126 ; X64-NOF16C-NEXT: retq
128 ; X64-F16C-LABEL: half_to_fp80:
129 ; X64-F16C: ## %bb.0:
130 ; X64-F16C-NEXT: movzwl _a(%rip), %eax
131 ; X64-F16C-NEXT: vmovd %eax, %xmm0
132 ; X64-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
133 ; X64-F16C-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp)
134 ; X64-F16C-NEXT: flds -{{[0-9]+}}(%rsp)
135 ; X64-F16C-NEXT: wait
136 ; X64-F16C-NEXT: retq
137 %1 = load half, half* @a, align 2
138 %2 = tail call x86_fp80 @llvm.experimental.constrained.fpext.f80.f16(half %1, metadata !"fpexcept.strict") #0
142 define void @float_to_half(float %0) strictfp {
143 ; X32-NOF16C-LABEL: float_to_half:
144 ; X32-NOF16C: ## %bb.0:
145 ; X32-NOF16C-NEXT: subl $12, %esp
146 ; X32-NOF16C-NEXT: .cfi_def_cfa_offset 16
147 ; X32-NOF16C-NEXT: flds {{[0-9]+}}(%esp)
148 ; X32-NOF16C-NEXT: fstps (%esp)
149 ; X32-NOF16C-NEXT: wait
150 ; X32-NOF16C-NEXT: calll ___truncsfhf2
151 ; X32-NOF16C-NEXT: movw %ax, _a
152 ; X32-NOF16C-NEXT: addl $12, %esp
153 ; X32-NOF16C-NEXT: retl
155 ; X32-F16C-LABEL: float_to_half:
156 ; X32-F16C: ## %bb.0:
157 ; X32-F16C-NEXT: subl $12, %esp
158 ; X32-F16C-NEXT: .cfi_def_cfa_offset 16
159 ; X32-F16C-NEXT: flds {{[0-9]+}}(%esp)
160 ; X32-F16C-NEXT: fstps (%esp)
161 ; X32-F16C-NEXT: wait
162 ; X32-F16C-NEXT: calll ___truncsfhf2
163 ; X32-F16C-NEXT: movw %ax, _a
164 ; X32-F16C-NEXT: addl $12, %esp
165 ; X32-F16C-NEXT: retl
167 ; X64-NOF16C-LABEL: float_to_half:
168 ; X64-NOF16C: ## %bb.0:
169 ; X64-NOF16C-NEXT: pushq %rax
170 ; X64-NOF16C-NEXT: .cfi_def_cfa_offset 16
171 ; X64-NOF16C-NEXT: callq ___truncsfhf2
172 ; X64-NOF16C-NEXT: movw %ax, _a(%rip)
173 ; X64-NOF16C-NEXT: popq %rax
174 ; X64-NOF16C-NEXT: retq
176 ; X64-F16C-LABEL: float_to_half:
177 ; X64-F16C: ## %bb.0:
178 ; X64-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
179 ; X64-F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
180 ; X64-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
181 ; X64-F16C-NEXT: vpextrw $0, %xmm0, _a(%rip)
182 ; X64-F16C-NEXT: retq
183 %2 = tail call half @llvm.experimental.constrained.fptrunc.f16.f32(float %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
184 store half %2, half* @a, align 2
188 define void @double_to_half(double %0) strictfp {
189 ; X32-NOF16C-LABEL: double_to_half:
190 ; X32-NOF16C: ## %bb.0:
191 ; X32-NOF16C-NEXT: subl $12, %esp
192 ; X32-NOF16C-NEXT: .cfi_def_cfa_offset 16
193 ; X32-NOF16C-NEXT: fldl {{[0-9]+}}(%esp)
194 ; X32-NOF16C-NEXT: fstpl (%esp)
195 ; X32-NOF16C-NEXT: wait
196 ; X32-NOF16C-NEXT: calll ___truncdfhf2
197 ; X32-NOF16C-NEXT: movw %ax, _a
198 ; X32-NOF16C-NEXT: addl $12, %esp
199 ; X32-NOF16C-NEXT: retl
201 ; X32-F16C-LABEL: double_to_half:
202 ; X32-F16C: ## %bb.0:
203 ; X32-F16C-NEXT: subl $12, %esp
204 ; X32-F16C-NEXT: .cfi_def_cfa_offset 16
205 ; X32-F16C-NEXT: fldl {{[0-9]+}}(%esp)
206 ; X32-F16C-NEXT: fstpl (%esp)
207 ; X32-F16C-NEXT: wait
208 ; X32-F16C-NEXT: calll ___truncdfhf2
209 ; X32-F16C-NEXT: movw %ax, _a
210 ; X32-F16C-NEXT: addl $12, %esp
211 ; X32-F16C-NEXT: retl
213 ; X64-NOF16C-LABEL: double_to_half:
214 ; X64-NOF16C: ## %bb.0:
215 ; X64-NOF16C-NEXT: pushq %rax
216 ; X64-NOF16C-NEXT: .cfi_def_cfa_offset 16
217 ; X64-NOF16C-NEXT: callq ___truncdfhf2
218 ; X64-NOF16C-NEXT: movw %ax, _a(%rip)
219 ; X64-NOF16C-NEXT: popq %rax
220 ; X64-NOF16C-NEXT: retq
222 ; X64-F16C-LABEL: double_to_half:
223 ; X64-F16C: ## %bb.0:
224 ; X64-F16C-NEXT: pushq %rax
225 ; X64-F16C-NEXT: .cfi_def_cfa_offset 16
226 ; X64-F16C-NEXT: callq ___truncdfhf2
227 ; X64-F16C-NEXT: movw %ax, _a(%rip)
228 ; X64-F16C-NEXT: popq %rax
229 ; X64-F16C-NEXT: retq
230 %2 = tail call half @llvm.experimental.constrained.fptrunc.f16.f64(double %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
231 store half %2, half* @a, align 2
235 define void @fp80_to_half(x86_fp80 %0) strictfp {
236 ; X32-NOF16C-LABEL: fp80_to_half:
237 ; X32-NOF16C: ## %bb.0:
238 ; X32-NOF16C-NEXT: subl $28, %esp
239 ; X32-NOF16C-NEXT: .cfi_def_cfa_offset 32
240 ; X32-NOF16C-NEXT: fldt {{[0-9]+}}(%esp)
241 ; X32-NOF16C-NEXT: fstpt (%esp)
242 ; X32-NOF16C-NEXT: wait
243 ; X32-NOF16C-NEXT: calll ___truncxfhf2
244 ; X32-NOF16C-NEXT: movw %ax, _a
245 ; X32-NOF16C-NEXT: addl $28, %esp
246 ; X32-NOF16C-NEXT: retl
248 ; X32-F16C-LABEL: fp80_to_half:
249 ; X32-F16C: ## %bb.0:
250 ; X32-F16C-NEXT: subl $28, %esp
251 ; X32-F16C-NEXT: .cfi_def_cfa_offset 32
252 ; X32-F16C-NEXT: fldt {{[0-9]+}}(%esp)
253 ; X32-F16C-NEXT: fstpt (%esp)
254 ; X32-F16C-NEXT: wait
255 ; X32-F16C-NEXT: calll ___truncxfhf2
256 ; X32-F16C-NEXT: movw %ax, _a
257 ; X32-F16C-NEXT: addl $28, %esp
258 ; X32-F16C-NEXT: retl
260 ; X64-NOF16C-LABEL: fp80_to_half:
261 ; X64-NOF16C: ## %bb.0:
262 ; X64-NOF16C-NEXT: subq $24, %rsp
263 ; X64-NOF16C-NEXT: .cfi_def_cfa_offset 32
264 ; X64-NOF16C-NEXT: fldt {{[0-9]+}}(%rsp)
265 ; X64-NOF16C-NEXT: fstpt (%rsp)
266 ; X64-NOF16C-NEXT: wait
267 ; X64-NOF16C-NEXT: callq ___truncxfhf2
268 ; X64-NOF16C-NEXT: movw %ax, _a(%rip)
269 ; X64-NOF16C-NEXT: addq $24, %rsp
270 ; X64-NOF16C-NEXT: retq
272 ; X64-F16C-LABEL: fp80_to_half:
273 ; X64-F16C: ## %bb.0:
274 ; X64-F16C-NEXT: subq $24, %rsp
275 ; X64-F16C-NEXT: .cfi_def_cfa_offset 32
276 ; X64-F16C-NEXT: fldt {{[0-9]+}}(%rsp)
277 ; X64-F16C-NEXT: fstpt (%rsp)
278 ; X64-F16C-NEXT: wait
279 ; X64-F16C-NEXT: callq ___truncxfhf2
280 ; X64-F16C-NEXT: movw %ax, _a(%rip)
281 ; X64-F16C-NEXT: addq $24, %rsp
282 ; X64-F16C-NEXT: retq
283 %2 = tail call half @llvm.experimental.constrained.fptrunc.f16.f80(x86_fp80 %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
284 store half %2, half* @a, align 2
288 define void @add() strictfp {
289 ; X32-NOF16C-LABEL: add:
290 ; X32-NOF16C: ## %bb.0:
291 ; X32-NOF16C-NEXT: subl $12, %esp
292 ; X32-NOF16C-NEXT: .cfi_def_cfa_offset 16
293 ; X32-NOF16C-NEXT: movzwl _a, %eax
294 ; X32-NOF16C-NEXT: movl %eax, (%esp)
295 ; X32-NOF16C-NEXT: calll ___extendhfsf2
296 ; X32-NOF16C-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
297 ; X32-NOF16C-NEXT: wait
298 ; X32-NOF16C-NEXT: movzwl _b, %eax
299 ; X32-NOF16C-NEXT: movl %eax, (%esp)
300 ; X32-NOF16C-NEXT: calll ___extendhfsf2
301 ; X32-NOF16C-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Reload
302 ; X32-NOF16C-NEXT: faddp %st, %st(1)
303 ; X32-NOF16C-NEXT: fstps (%esp)
304 ; X32-NOF16C-NEXT: wait
305 ; X32-NOF16C-NEXT: calll ___truncsfhf2
306 ; X32-NOF16C-NEXT: movw %ax, _c
307 ; X32-NOF16C-NEXT: addl $12, %esp
308 ; X32-NOF16C-NEXT: retl
310 ; X32-F16C-LABEL: add:
311 ; X32-F16C: ## %bb.0:
312 ; X32-F16C-NEXT: subl $12, %esp
313 ; X32-F16C-NEXT: .cfi_def_cfa_offset 16
314 ; X32-F16C-NEXT: movzwl _a, %eax
315 ; X32-F16C-NEXT: movl %eax, (%esp)
316 ; X32-F16C-NEXT: calll ___extendhfsf2
317 ; X32-F16C-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
318 ; X32-F16C-NEXT: wait
319 ; X32-F16C-NEXT: movzwl _b, %eax
320 ; X32-F16C-NEXT: movl %eax, (%esp)
321 ; X32-F16C-NEXT: calll ___extendhfsf2
322 ; X32-F16C-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Reload
323 ; X32-F16C-NEXT: faddp %st, %st(1)
324 ; X32-F16C-NEXT: fstps (%esp)
325 ; X32-F16C-NEXT: wait
326 ; X32-F16C-NEXT: calll ___truncsfhf2
327 ; X32-F16C-NEXT: movw %ax, _c
328 ; X32-F16C-NEXT: addl $12, %esp
329 ; X32-F16C-NEXT: retl
331 ; X64-NOF16C-LABEL: add:
332 ; X64-NOF16C: ## %bb.0:
333 ; X64-NOF16C-NEXT: pushq %rax
334 ; X64-NOF16C-NEXT: .cfi_def_cfa_offset 16
335 ; X64-NOF16C-NEXT: movzwl _a(%rip), %edi
336 ; X64-NOF16C-NEXT: callq ___extendhfsf2
337 ; X64-NOF16C-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
338 ; X64-NOF16C-NEXT: movzwl _b(%rip), %edi
339 ; X64-NOF16C-NEXT: callq ___extendhfsf2
340 ; X64-NOF16C-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 4-byte Folded Reload
341 ; X64-NOF16C-NEXT: callq ___truncsfhf2
342 ; X64-NOF16C-NEXT: movw %ax, _c(%rip)
343 ; X64-NOF16C-NEXT: popq %rax
344 ; X64-NOF16C-NEXT: retq
346 ; X64-F16C-LABEL: add:
347 ; X64-F16C: ## %bb.0:
348 ; X64-F16C-NEXT: movzwl _a(%rip), %eax
349 ; X64-F16C-NEXT: vmovd %eax, %xmm0
350 ; X64-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
351 ; X64-F16C-NEXT: movzwl _b(%rip), %eax
352 ; X64-F16C-NEXT: vmovd %eax, %xmm1
353 ; X64-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
354 ; X64-F16C-NEXT: vaddss %xmm1, %xmm0, %xmm0
355 ; X64-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
356 ; X64-F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
357 ; X64-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
358 ; X64-F16C-NEXT: vpextrw $0, %xmm0, _c(%rip)
359 ; X64-F16C-NEXT: retq
360 %1 = load half, half* @a, align 2
361 %2 = tail call float @llvm.experimental.constrained.fpext.f32.f16(half %1, metadata !"fpexcept.strict") #0
362 %3 = load half, half* @b, align 2
363 %4 = tail call float @llvm.experimental.constrained.fpext.f32.f16(half %3, metadata !"fpexcept.strict") #0
364 %5 = tail call float @llvm.experimental.constrained.fadd.f32(float %2, float %4, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
365 %6 = tail call half @llvm.experimental.constrained.fptrunc.f16.f32(float %5, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
366 store half %6, half* @c, align 2
370 declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata)
371 declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata)
372 declare x86_fp80 @llvm.experimental.constrained.fpext.f80.f16(half, metadata)
373 declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
374 declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata)
375 declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata)
376 declare half @llvm.experimental.constrained.fptrunc.f16.f80(x86_fp80, metadata, metadata)
378 attributes #0 = { strictfp }