1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
3 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
5 define <8 x float> @sitofp00(<8 x i32> %a) nounwind {
6 ; CHECK-LABEL: sitofp00:
8 ; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
10 %b = sitofp <8 x i32> %a to <8 x float>
14 define <8 x i32> @fptosi00(<8 x float> %a) nounwind {
15 ; CHECK-LABEL: fptosi00:
17 ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
19 %b = fptosi <8 x float> %a to <8 x i32>
23 define <4 x double> @sitofp01(<4 x i32> %a) {
24 ; CHECK-LABEL: sitofp01:
26 ; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0
28 %b = sitofp <4 x i32> %a to <4 x double>
32 define <8 x float> @sitofp02(<8 x i16> %a) {
33 ; AVX-LABEL: sitofp02:
35 ; AVX-NEXT: vpmovsxwd %xmm0, %xmm1
36 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
37 ; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
38 ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
39 ; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
42 ; AVX512-LABEL: sitofp02:
44 ; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
45 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
47 %b = sitofp <8 x i16> %a to <8 x float>
51 define <4 x i32> @fptosi01(<4 x double> %a) {
52 ; CHECK-LABEL: fptosi01:
54 ; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0
55 ; CHECK-NEXT: vzeroupper
57 %b = fptosi <4 x double> %a to <4 x i32>
61 define <8 x float> @fptrunc00(<8 x double> %b) nounwind {
62 ; AVX-LABEL: fptrunc00:
64 ; AVX-NEXT: vcvtpd2ps %ymm0, %xmm0
65 ; AVX-NEXT: vcvtpd2ps %ymm1, %xmm1
66 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
69 ; AVX512-LABEL: fptrunc00:
71 ; AVX512-NEXT: vcvtpd2ps %zmm0, %ymm0
73 %a = fptrunc <8 x double> %b to <8 x float>
77 define <4 x float> @fptrunc01(<2 x double> %a0, <4 x float> %a1) nounwind {
78 ; CHECK-LABEL: fptrunc01:
80 ; CHECK-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0
82 %ext = extractelement <2 x double> %a0, i32 0
83 %cvt = fptrunc double %ext to float
84 %res = insertelement <4 x float> %a1, float %cvt, i32 0
88 define <4 x double> @fpext00(<4 x float> %b) nounwind {
89 ; CHECK-LABEL: fpext00:
91 ; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0
93 %a = fpext <4 x float> %b to <4 x double>
97 define <2 x double> @fpext01(<2 x double> %a0, <4 x float> %a1) nounwind {
98 ; CHECK-LABEL: fpext01:
100 ; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0
102 %ext = extractelement <4 x float> %a1, i32 0
103 %cvt = fpext float %ext to double
104 %res = insertelement <2 x double> %a0, double %cvt, i32 0
105 ret <2 x double> %res
108 define double @funcA(i64* nocapture %e) nounwind uwtable readonly ssp {
109 ; CHECK-LABEL: funcA:
111 ; CHECK-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0
113 %tmp1 = load i64, i64* %e, align 8
114 %conv = sitofp i64 %tmp1 to double
118 define double @funcB(i32* nocapture %e) nounwind uwtable readonly ssp {
119 ; CHECK-LABEL: funcB:
121 ; CHECK-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0
123 %tmp1 = load i32, i32* %e, align 4
124 %conv = sitofp i32 %tmp1 to double
128 define float @funcC(i32* nocapture %e) nounwind uwtable readonly ssp {
129 ; CHECK-LABEL: funcC:
131 ; CHECK-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0
133 %tmp1 = load i32, i32* %e, align 4
134 %conv = sitofp i32 %tmp1 to float
138 define float @funcD(i64* nocapture %e) nounwind uwtable readonly ssp {
139 ; CHECK-LABEL: funcD:
141 ; CHECK-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0
143 %tmp1 = load i64, i64* %e, align 8
144 %conv = sitofp i64 %tmp1 to float
148 define void @fpext() nounwind uwtable {
149 ; CHECK-LABEL: fpext:
151 ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
152 ; CHECK-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
153 ; CHECK-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp)
155 %f = alloca float, align 4
156 %d = alloca double, align 8
157 %tmp = load float, float* %f, align 4
158 %conv = fpext float %tmp to double
159 store double %conv, double* %d, align 8
163 define double @nearbyint_f64(double %a) {
164 ; CHECK-LABEL: nearbyint_f64:
166 ; CHECK-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0
168 %res = call double @llvm.nearbyint.f64(double %a)
171 declare double @llvm.nearbyint.f64(double %p)
173 define float @floor_f32(float %a) {
174 ; CHECK-LABEL: floor_f32:
176 ; CHECK-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
178 %res = call float @llvm.floor.f32(float %a)
181 declare float @llvm.floor.f32(float %p)
183 define float @floor_f32_load(float* %aptr) optsize {
184 ; CHECK-LABEL: floor_f32_load:
186 ; CHECK-NEXT: vroundss $9, (%rdi), %xmm0, %xmm0
188 %a = load float, float* %aptr
189 %res = call float @llvm.floor.f32(float %a)
193 define float @floor_f32_load_pgso(float* %aptr) !prof !14 {
194 ; CHECK-LABEL: floor_f32_load_pgso:
196 ; CHECK-NEXT: vroundss $9, (%rdi), %xmm0, %xmm0
198 %a = load float, float* %aptr
199 %res = call float @llvm.floor.f32(float %a)
203 define double @nearbyint_f64_load(double* %aptr) optsize {
204 ; CHECK-LABEL: nearbyint_f64_load:
206 ; CHECK-NEXT: vroundsd $12, (%rdi), %xmm0, %xmm0
208 %a = load double, double* %aptr
209 %res = call double @llvm.nearbyint.f64(double %a)
213 define double @nearbyint_f64_load_pgso(double* %aptr) !prof !14 {
214 ; CHECK-LABEL: nearbyint_f64_load_pgso:
216 ; CHECK-NEXT: vroundsd $12, (%rdi), %xmm0, %xmm0
218 %a = load double, double* %aptr
219 %res = call double @llvm.nearbyint.f64(double %a)
223 !llvm.module.flags = !{!0}
224 !0 = !{i32 1, !"ProfileSummary", !1}
225 !1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
226 !2 = !{!"ProfileFormat", !"InstrProf"}
227 !3 = !{!"TotalCount", i64 10000}
228 !4 = !{!"MaxCount", i64 10}
229 !5 = !{!"MaxInternalCount", i64 1}
230 !6 = !{!"MaxFunctionCount", i64 1000}
231 !7 = !{!"NumCounts", i64 3}
232 !8 = !{!"NumFunctions", i64 3}
233 !9 = !{!"DetailedSummary", !10}
234 !10 = !{!11, !12, !13}
235 !11 = !{i32 10000, i64 100, i32 1}
236 !12 = !{i32 999000, i64 100, i32 1}
237 !13 = !{i32 999999, i64 1, i32 2}
238 !14 = !{!"function_entry_count", i64 0}