1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK
5 declare <8 x half> @llvm.experimental.constrained.fadd.v8f16(<8 x half>, <8 x half>, metadata, metadata)
6 declare <8 x half> @llvm.experimental.constrained.fsub.v8f16(<8 x half>, <8 x half>, metadata, metadata)
7 declare <8 x half> @llvm.experimental.constrained.fmul.v8f16(<8 x half>, <8 x half>, metadata, metadata)
8 declare <8 x half> @llvm.experimental.constrained.fdiv.v8f16(<8 x half>, <8 x half>, metadata, metadata)
9 declare <8 x half> @llvm.experimental.constrained.sqrt.v8f16(<8 x half>, metadata, metadata)
10 declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata)
11 declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata)
12 declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata)
13 declare <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float>, metadata, metadata)
14 declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double>, metadata, metadata)
15 declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata)
16 declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata)
17 declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata)
18 declare <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half>, metadata)
19 declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(<2 x half>, metadata)
21 define <8 x half> @f2(<8 x half> %a, <8 x half> %b) #0 {
24 ; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm0
25 ; CHECK-NEXT: ret{{[l|q]}}
26 %ret = call <8 x half> @llvm.experimental.constrained.fadd.v8f16(<8 x half> %a, <8 x half> %b,
27 metadata !"round.dynamic",
28 metadata !"fpexcept.strict") #0
32 define <8 x half> @f4(<8 x half> %a, <8 x half> %b) #0 {
35 ; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm0
36 ; CHECK-NEXT: ret{{[l|q]}}
37 %ret = call <8 x half> @llvm.experimental.constrained.fsub.v8f16(<8 x half> %a, <8 x half> %b,
38 metadata !"round.dynamic",
39 metadata !"fpexcept.strict") #0
43 define <8 x half> @f6(<8 x half> %a, <8 x half> %b) #0 {
46 ; CHECK-NEXT: vmulph %xmm1, %xmm0, %xmm0
47 ; CHECK-NEXT: ret{{[l|q]}}
48 %ret = call <8 x half> @llvm.experimental.constrained.fmul.v8f16(<8 x half> %a, <8 x half> %b,
49 metadata !"round.dynamic",
50 metadata !"fpexcept.strict") #0
54 define <8 x half> @f8(<8 x half> %a, <8 x half> %b) #0 {
57 ; CHECK-NEXT: vdivph %xmm1, %xmm0, %xmm0
58 ; CHECK-NEXT: ret{{[l|q]}}
59 %ret = call <8 x half> @llvm.experimental.constrained.fdiv.v8f16(<8 x half> %a, <8 x half> %b,
60 metadata !"round.dynamic",
61 metadata !"fpexcept.strict") #0
65 define <8 x half> @f10(<8 x half> %a) #0 {
68 ; CHECK-NEXT: vsqrtph %xmm0, %xmm0
69 ; CHECK-NEXT: ret{{[l|q]}}
70 %sqrt = call <8 x half> @llvm.experimental.constrained.sqrt.v8f16(
72 metadata !"round.dynamic",
73 metadata !"fpexcept.strict") #0
77 define <8 x half> @f11(<2 x double> %a0, <8 x half> %a1) #0 {
80 ; CHECK-NEXT: vcvtsd2sh %xmm0, %xmm0, %xmm0
81 ; CHECK-NEXT: vmovsh %xmm0, %xmm1, %xmm0
82 ; CHECK-NEXT: ret{{[l|q]}}
83 %ext = extractelement <2 x double> %a0, i32 0
84 %cvt = call half @llvm.experimental.constrained.fptrunc.f16.f64(double %ext,
85 metadata !"round.dynamic",
86 metadata !"fpexcept.strict") #0
87 %res = insertelement <8 x half> %a1, half %cvt, i32 0
91 define <2 x double> @f12(<2 x double> %a0, <8 x half> %a1) #0 {
94 ; CHECK-NEXT: vcvtsh2sd %xmm1, %xmm1, %xmm1
95 ; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
96 ; CHECK-NEXT: ret{{[l|q]}}
97 %ext = extractelement <8 x half> %a1, i32 0
98 %cvt = call double @llvm.experimental.constrained.fpext.f64.f16(half %ext,
99 metadata !"fpexcept.strict") #0
100 %res = insertelement <2 x double> %a0, double %cvt, i32 0
101 ret <2 x double> %res
104 define <2 x double> @f15(<2 x half> %a) #0 {
107 ; CHECK-NEXT: vcvtph2pd %xmm0, %xmm0
108 ; CHECK-NEXT: ret{{[l|q]}}
109 %ret = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(
111 metadata !"fpexcept.strict") #0
112 ret <2 x double> %ret
115 define <2 x half> @f16(<2 x double> %a) #0 {
118 ; CHECK-NEXT: vcvtpd2ph %xmm0, %xmm0
119 ; CHECK-NEXT: ret{{[l|q]}}
120 %ret = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(
122 metadata !"round.dynamic",
123 metadata !"fpexcept.strict") #0
127 define <8 x half> @f17(<4 x float> %a0, <8 x half> %a1) #0 {
130 ; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
131 ; CHECK-NEXT: vmovsh %xmm0, %xmm1, %xmm0
132 ; CHECK-NEXT: ret{{[l|q]}}
133 %ext = extractelement <4 x float> %a0, i32 0
134 %cvt = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %ext,
135 metadata !"round.dynamic",
136 metadata !"fpexcept.strict") #0
137 %res = insertelement <8 x half> %a1, half %cvt, i32 0
141 define <4 x float> @f18(<4 x float> %a0, <8 x half> %a1) #0 {
144 ; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1
145 ; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
146 ; CHECK-NEXT: ret{{[l|q]}}
147 %ext = extractelement <8 x half> %a1, i32 0
148 %cvt = call float @llvm.experimental.constrained.fpext.f32.f16(half %ext,
149 metadata !"fpexcept.strict") #0
150 %res = insertelement <4 x float> %a0, float %cvt, i32 0
154 define <2 x float> @f19(<2 x half> %a) #0 {
157 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
158 ; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
159 ; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0
160 ; CHECK-NEXT: ret{{[l|q]}}
161 %ret = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(
163 metadata !"fpexcept.strict") #0
167 define <4 x float> @f20(<4 x half> %a) #0 {
170 ; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0
171 ; CHECK-NEXT: ret{{[l|q]}}
172 %ret = call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(
174 metadata !"fpexcept.strict") #0
178 define <2 x half> @f21(<2 x float> %a) #0 {
181 ; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
182 ; CHECK-NEXT: vcvtps2phx %xmm0, %xmm0
183 ; CHECK-NEXT: ret{{[l|q]}}
184 %ret = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(
186 metadata !"round.dynamic",
187 metadata !"fpexcept.strict") #0
191 define <4 x half> @f22(<4 x float> %a) #0 {
194 ; CHECK-NEXT: vcvtps2phx %xmm0, %xmm0
195 ; CHECK-NEXT: ret{{[l|q]}}
196 %ret = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(
198 metadata !"round.dynamic",
199 metadata !"fpexcept.strict") #0
203 attributes #0 = { strictfp }