1 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
3 define <2 x float> @frecps_2s(ptr %A, ptr %B) nounwind {
4 ;CHECK-LABEL: frecps_2s:
6 %tmp1 = load <2 x float>, ptr %A
7 %tmp2 = load <2 x float>, ptr %B
8 %tmp3 = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
12 define <4 x float> @frecps_4s(ptr %A, ptr %B) nounwind {
13 ;CHECK-LABEL: frecps_4s:
15 %tmp1 = load <4 x float>, ptr %A
16 %tmp2 = load <4 x float>, ptr %B
17 %tmp3 = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
21 define <2 x double> @frecps_2d(ptr %A, ptr %B) nounwind {
22 ;CHECK-LABEL: frecps_2d:
24 %tmp1 = load <2 x double>, ptr %A
25 %tmp2 = load <2 x double>, ptr %B
26 %tmp3 = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
27 ret <2 x double> %tmp3
30 declare <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float>, <2 x float>) nounwind readnone
31 declare <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
32 declare <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double>, <2 x double>) nounwind readnone
35 define <2 x float> @frsqrts_2s(ptr %A, ptr %B) nounwind {
36 ;CHECK-LABEL: frsqrts_2s:
38 %tmp1 = load <2 x float>, ptr %A
39 %tmp2 = load <2 x float>, ptr %B
40 %tmp3 = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
44 define <4 x float> @frsqrts_4s(ptr %A, ptr %B) nounwind {
45 ;CHECK-LABEL: frsqrts_4s:
47 %tmp1 = load <4 x float>, ptr %A
48 %tmp2 = load <4 x float>, ptr %B
49 %tmp3 = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
53 define <2 x double> @frsqrts_2d(ptr %A, ptr %B) nounwind {
54 ;CHECK-LABEL: frsqrts_2d:
56 %tmp1 = load <2 x double>, ptr %A
57 %tmp2 = load <2 x double>, ptr %B
58 %tmp3 = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
59 ret <2 x double> %tmp3
62 declare <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone
63 declare <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
64 declare <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double>, <2 x double>) nounwind readnone
66 define <2 x float> @frecpe_2s(ptr %A) nounwind {
67 ;CHECK-LABEL: frecpe_2s:
69 %tmp1 = load <2 x float>, ptr %A
70 %tmp3 = call <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float> %tmp1)
74 define <4 x float> @frecpe_4s(ptr %A) nounwind {
75 ;CHECK-LABEL: frecpe_4s:
77 %tmp1 = load <4 x float>, ptr %A
78 %tmp3 = call <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float> %tmp1)
82 define <2 x double> @frecpe_2d(ptr %A) nounwind {
83 ;CHECK-LABEL: frecpe_2d:
85 %tmp1 = load <2 x double>, ptr %A
86 %tmp3 = call <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double> %tmp1)
87 ret <2 x double> %tmp3
90 define float @frecpe_s(ptr %A) nounwind {
91 ;CHECK-LABEL: frecpe_s:
92 ;CHECK: frecpe s0, {{s[0-9]+}}
93 %tmp1 = load float, ptr %A
94 %tmp3 = call float @llvm.aarch64.neon.frecpe.f32(float %tmp1)
98 define double @frecpe_d(ptr %A) nounwind {
99 ;CHECK-LABEL: frecpe_d:
100 ;CHECK: frecpe d0, {{d[0-9]+}}
101 %tmp1 = load double, ptr %A
102 %tmp3 = call double @llvm.aarch64.neon.frecpe.f64(double %tmp1)
106 declare <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float>) nounwind readnone
107 declare <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float>) nounwind readnone
108 declare <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double>) nounwind readnone
109 declare float @llvm.aarch64.neon.frecpe.f32(float) nounwind readnone
110 declare double @llvm.aarch64.neon.frecpe.f64(double) nounwind readnone
112 define float @frecpx_s(ptr %A) nounwind {
113 ;CHECK-LABEL: frecpx_s:
114 ;CHECK: frecpx s0, {{s[0-9]+}}
115 %tmp1 = load float, ptr %A
116 %tmp3 = call float @llvm.aarch64.neon.frecpx.f32(float %tmp1)
120 define double @frecpx_d(ptr %A) nounwind {
121 ;CHECK-LABEL: frecpx_d:
122 ;CHECK: frecpx d0, {{d[0-9]+}}
123 %tmp1 = load double, ptr %A
124 %tmp3 = call double @llvm.aarch64.neon.frecpx.f64(double %tmp1)
128 declare float @llvm.aarch64.neon.frecpx.f32(float) nounwind readnone
129 declare double @llvm.aarch64.neon.frecpx.f64(double) nounwind readnone
131 define <2 x float> @frsqrte_2s(ptr %A) nounwind {
132 ;CHECK-LABEL: frsqrte_2s:
134 %tmp1 = load <2 x float>, ptr %A
135 %tmp3 = call <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float> %tmp1)
136 ret <2 x float> %tmp3
139 define <4 x float> @frsqrte_4s(ptr %A) nounwind {
140 ;CHECK-LABEL: frsqrte_4s:
142 %tmp1 = load <4 x float>, ptr %A
143 %tmp3 = call <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float> %tmp1)
144 ret <4 x float> %tmp3
147 define <2 x double> @frsqrte_2d(ptr %A) nounwind {
148 ;CHECK-LABEL: frsqrte_2d:
150 %tmp1 = load <2 x double>, ptr %A
151 %tmp3 = call <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double> %tmp1)
152 ret <2 x double> %tmp3
155 define float @frsqrte_s(ptr %A) nounwind {
156 ;CHECK-LABEL: frsqrte_s:
157 ;CHECK: frsqrte s0, {{s[0-9]+}}
158 %tmp1 = load float, ptr %A
159 %tmp3 = call float @llvm.aarch64.neon.frsqrte.f32(float %tmp1)
163 define double @frsqrte_d(ptr %A) nounwind {
164 ;CHECK-LABEL: frsqrte_d:
165 ;CHECK: frsqrte d0, {{d[0-9]+}}
166 %tmp1 = load double, ptr %A
167 %tmp3 = call double @llvm.aarch64.neon.frsqrte.f64(double %tmp1)
171 declare <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float>) nounwind readnone
172 declare <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float>) nounwind readnone
173 declare <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double>) nounwind readnone
174 declare float @llvm.aarch64.neon.frsqrte.f32(float) nounwind readnone
175 declare double @llvm.aarch64.neon.frsqrte.f64(double) nounwind readnone
177 define <2 x i32> @urecpe_2s(ptr %A) nounwind {
178 ;CHECK-LABEL: urecpe_2s:
180 %tmp1 = load <2 x i32>, ptr %A
181 %tmp3 = call <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32> %tmp1)
185 define <4 x i32> @urecpe_4s(ptr %A) nounwind {
186 ;CHECK-LABEL: urecpe_4s:
188 %tmp1 = load <4 x i32>, ptr %A
189 %tmp3 = call <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32> %tmp1)
193 declare <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32>) nounwind readnone
194 declare <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32>) nounwind readnone
196 define <2 x i32> @ursqrte_2s(ptr %A) nounwind {
197 ;CHECK-LABEL: ursqrte_2s:
199 %tmp1 = load <2 x i32>, ptr %A
200 %tmp3 = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> %tmp1)
204 define <4 x i32> @ursqrte_4s(ptr %A) nounwind {
205 ;CHECK-LABEL: ursqrte_4s:
207 %tmp1 = load <4 x i32>, ptr %A
208 %tmp3 = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> %tmp1)
212 declare <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32>) nounwind readnone
213 declare <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32>) nounwind readnone
215 define float @f1(float %a, float %b) nounwind readnone optsize ssp {
217 ; CHECK: frsqrts s0, s0, s1
219 %vrsqrtss.i = tail call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b) nounwind
220 ret float %vrsqrtss.i
223 define double @f2(double %a, double %b) nounwind readnone optsize ssp {
225 ; CHECK: frsqrts d0, d0, d1
227 %vrsqrtsd.i = tail call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b) nounwind
228 ret double %vrsqrtsd.i
231 declare double @llvm.aarch64.neon.frsqrts.f64(double, double) nounwind readnone
232 declare float @llvm.aarch64.neon.frsqrts.f32(float, float) nounwind readnone