1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -passes=inject-tli-mappings,slp-vectorizer -vector-library=Accelerate -S %s | FileCheck %s
3 ; RUN: opt -passes=inject-tli-mappings,slp-vectorizer -S %s | FileCheck --check-prefix NOACCELERATE %s
5 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
6 target triple = "arm64-apple-ios14.0.0"
8 declare float @llvm.sin.f32(float)
10 ; Accelerate provides sin() for <4 x float>
11 define <4 x float> @int_sin_4x(ptr %a) {
12 ; CHECK-LABEL: @int_sin_4x(
14 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
15 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vsinf(<4 x float> [[TMP0]])
16 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
18 ; NOACCELERATE-LABEL: @int_sin_4x(
19 ; NOACCELERATE-NEXT: entry:
20 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
21 ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
22 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
23 ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
24 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
25 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
26 ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
27 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
28 ; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]])
29 ; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
30 ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
31 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
34 %0 = load <4 x float>, ptr %a, align 16
35 %vecext = extractelement <4 x float> %0, i32 0
36 %1 = tail call fast float @llvm.sin.f32(float %vecext)
37 %vecins = insertelement <4 x float> poison, float %1, i32 0
38 %vecext.1 = extractelement <4 x float> %0, i32 1
39 %2 = tail call fast float @llvm.sin.f32(float %vecext.1)
40 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
41 %vecext.2 = extractelement <4 x float> %0, i32 2
42 %3 = tail call fast float @llvm.sin.f32(float %vecext.2)
43 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
44 %vecext.3 = extractelement <4 x float> %0, i32 3
45 %4 = tail call fast float @llvm.sin.f32(float %vecext.3)
46 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
47 ret <4 x float> %vecins.3
50 declare float @ceilf(float) readonly nounwind willreturn
52 define <4 x float> @ceil_4x(ptr %a) {
53 ; CHECK-LABEL: @ceil_4x(
55 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
56 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]])
57 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
59 ; NOACCELERATE-LABEL: @ceil_4x(
60 ; NOACCELERATE-NEXT: entry:
61 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
62 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]])
63 ; NOACCELERATE-NEXT: ret <4 x float> [[TMP1]]
66 %0 = load <4 x float>, ptr %a, align 16
67 %vecext = extractelement <4 x float> %0, i32 0
68 %1 = tail call fast float @ceilf(float %vecext)
69 %vecins = insertelement <4 x float> poison, float %1, i32 0
70 %vecext.1 = extractelement <4 x float> %0, i32 1
71 %2 = tail call fast float @ceilf(float %vecext.1)
72 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
73 %vecext.2 = extractelement <4 x float> %0, i32 2
74 %3 = tail call fast float @ceilf(float %vecext.2)
75 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
76 %vecext.3 = extractelement <4 x float> %0, i32 3
77 %4 = tail call fast float @ceilf(float %vecext.3)
78 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
79 ret <4 x float> %vecins.3
82 declare float @fabsf(float) readonly nounwind willreturn
84 define <4 x float> @fabs_4x(ptr %a) {
85 ; CHECK-LABEL: @fabs_4x(
87 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
88 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
89 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
91 ; NOACCELERATE-LABEL: @fabs_4x(
92 ; NOACCELERATE-NEXT: entry:
93 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
94 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
95 ; NOACCELERATE-NEXT: ret <4 x float> [[TMP1]]
98 %0 = load <4 x float>, ptr %a, align 16
99 %vecext = extractelement <4 x float> %0, i32 0
100 %1 = tail call fast float @fabsf(float %vecext)
101 %vecins = insertelement <4 x float> poison, float %1, i32 0
102 %vecext.1 = extractelement <4 x float> %0, i32 1
103 %2 = tail call fast float @fabsf(float %vecext.1)
104 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
105 %vecext.2 = extractelement <4 x float> %0, i32 2
106 %3 = tail call fast float @fabsf(float %vecext.2)
107 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
108 %vecext.3 = extractelement <4 x float> %0, i32 3
109 %4 = tail call fast float @fabsf(float %vecext.3)
110 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
111 ret <4 x float> %vecins.3
113 declare float @llvm.fabs.f32(float) nounwind willreturn
114 define <4 x float> @int_fabs_4x(ptr %a) {
115 ; CHECK-LABEL: @int_fabs_4x(
117 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
118 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
119 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
121 ; NOACCELERATE-LABEL: @int_fabs_4x(
122 ; NOACCELERATE-NEXT: entry:
123 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
124 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
125 ; NOACCELERATE-NEXT: ret <4 x float> [[TMP1]]
128 %0 = load <4 x float>, ptr %a, align 16
129 %vecext = extractelement <4 x float> %0, i32 0
130 %1 = tail call fast float @llvm.fabs.f32(float %vecext)
131 %vecins = insertelement <4 x float> poison, float %1, i32 0
132 %vecext.1 = extractelement <4 x float> %0, i32 1
133 %2 = tail call fast float @llvm.fabs.f32(float %vecext.1)
134 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
135 %vecext.2 = extractelement <4 x float> %0, i32 2
136 %3 = tail call fast float @llvm.fabs.f32(float %vecext.2)
137 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
138 %vecext.3 = extractelement <4 x float> %0, i32 3
139 %4 = tail call fast float @llvm.fabs.f32(float %vecext.3)
140 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
141 ret <4 x float> %vecins.3
143 declare float @floorf(float) readonly nounwind willreturn
144 define <4 x float> @floor_4x(ptr %a) {
145 ; CHECK-LABEL: @floor_4x(
147 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
148 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]])
149 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
151 ; NOACCELERATE-LABEL: @floor_4x(
152 ; NOACCELERATE-NEXT: entry:
153 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
154 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]])
155 ; NOACCELERATE-NEXT: ret <4 x float> [[TMP1]]
158 %0 = load <4 x float>, ptr %a, align 16
159 %vecext = extractelement <4 x float> %0, i32 0
160 %1 = tail call fast float @floorf(float %vecext)
161 %vecins = insertelement <4 x float> poison, float %1, i32 0
162 %vecext.1 = extractelement <4 x float> %0, i32 1
163 %2 = tail call fast float @floorf(float %vecext.1)
164 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
165 %vecext.2 = extractelement <4 x float> %0, i32 2
166 %3 = tail call fast float @floorf(float %vecext.2)
167 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
168 %vecext.3 = extractelement <4 x float> %0, i32 3
169 %4 = tail call fast float @floorf(float %vecext.3)
170 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
171 ret <4 x float> %vecins.3
173 declare float @sqrtf(float) readonly nounwind willreturn
174 define <4 x float> @sqrt_4x(ptr %a) {
175 ; CHECK-LABEL: @sqrt_4x(
177 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
178 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
179 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
181 ; NOACCELERATE-LABEL: @sqrt_4x(
182 ; NOACCELERATE-NEXT: entry:
183 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
184 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
185 ; NOACCELERATE-NEXT: ret <4 x float> [[TMP1]]
188 %0 = load <4 x float>, ptr %a, align 16
189 %vecext = extractelement <4 x float> %0, i32 0
190 %1 = tail call fast float @sqrtf(float %vecext)
191 %vecins = insertelement <4 x float> poison, float %1, i32 0
192 %vecext.1 = extractelement <4 x float> %0, i32 1
193 %2 = tail call fast float @sqrtf(float %vecext.1)
194 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
195 %vecext.2 = extractelement <4 x float> %0, i32 2
196 %3 = tail call fast float @sqrtf(float %vecext.2)
197 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
198 %vecext.3 = extractelement <4 x float> %0, i32 3
199 %4 = tail call fast float @sqrtf(float %vecext.3)
200 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
201 ret <4 x float> %vecins.3
203 declare float @expf(float) readonly nounwind willreturn
204 define <4 x float> @exp_4x(ptr %a) {
205 ; CHECK-LABEL: @exp_4x(
207 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
208 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vexpf(<4 x float> [[TMP0]])
209 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
211 ; NOACCELERATE-LABEL: @exp_4x(
212 ; NOACCELERATE-NEXT: entry:
213 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
214 ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
215 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
216 ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
217 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
218 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
219 ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
220 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
221 ; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]])
222 ; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
223 ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
224 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
227 %0 = load <4 x float>, ptr %a, align 16
228 %vecext = extractelement <4 x float> %0, i32 0
229 %1 = tail call fast float @expf(float %vecext)
230 %vecins = insertelement <4 x float> poison, float %1, i32 0
231 %vecext.1 = extractelement <4 x float> %0, i32 1
232 %2 = tail call fast float @expf(float %vecext.1)
233 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
234 %vecext.2 = extractelement <4 x float> %0, i32 2
235 %3 = tail call fast float @expf(float %vecext.2)
236 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
237 %vecext.3 = extractelement <4 x float> %0, i32 3
238 %4 = tail call fast float @expf(float %vecext.3)
239 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
240 ret <4 x float> %vecins.3
242 declare float @expm1f(float) readonly nounwind willreturn
243 define <4 x float> @expm1_4x(ptr %a) {
244 ; CHECK-LABEL: @expm1_4x(
246 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
247 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vexpm1f(<4 x float> [[TMP0]])
248 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
250 ; NOACCELERATE-LABEL: @expm1_4x(
251 ; NOACCELERATE-NEXT: entry:
252 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
253 ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
254 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @expm1f(float [[VECEXT]])
255 ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
256 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
257 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @expm1f(float [[VECEXT_1]])
258 ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
259 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
260 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @expm1f(float [[VECEXT_2]])
261 ; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
262 ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
263 ; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @expm1f(float [[VECEXT_3]])
264 ; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
265 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
268 %0 = load <4 x float>, ptr %a, align 16
269 %vecext = extractelement <4 x float> %0, i32 0
270 %1 = tail call fast float @expm1f(float %vecext)
271 %vecins = insertelement <4 x float> poison, float %1, i32 0
272 %vecext.1 = extractelement <4 x float> %0, i32 1
273 %2 = tail call fast float @expm1f(float %vecext.1)
274 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
275 %vecext.2 = extractelement <4 x float> %0, i32 2
276 %3 = tail call fast float @expm1f(float %vecext.2)
277 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
278 %vecext.3 = extractelement <4 x float> %0, i32 3
279 %4 = tail call fast float @expm1f(float %vecext.3)
280 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
281 ret <4 x float> %vecins.3
283 declare float @logf(float) readonly nounwind willreturn
284 define <4 x float> @log_4x(ptr %a) {
285 ; CHECK-LABEL: @log_4x(
287 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
288 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vlogf(<4 x float> [[TMP0]])
289 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
291 ; NOACCELERATE-LABEL: @log_4x(
292 ; NOACCELERATE-NEXT: entry:
293 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
294 ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
295 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
296 ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
297 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
298 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
299 ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
300 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
301 ; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]])
302 ; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
303 ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
304 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
307 %0 = load <4 x float>, ptr %a, align 16
308 %vecext = extractelement <4 x float> %0, i32 0
309 %1 = tail call fast float @logf(float %vecext)
310 %vecins = insertelement <4 x float> poison, float %1, i32 0
311 %vecext.1 = extractelement <4 x float> %0, i32 1
312 %2 = tail call fast float @logf(float %vecext.1)
313 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
314 %vecext.2 = extractelement <4 x float> %0, i32 2
315 %3 = tail call fast float @logf(float %vecext.2)
316 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
317 %vecext.3 = extractelement <4 x float> %0, i32 3
318 %4 = tail call fast float @logf(float %vecext.3)
319 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
320 ret <4 x float> %vecins.3
322 declare float @log1pf(float) readonly nounwind willreturn
323 define <4 x float> @log1p_4x(ptr %a) {
324 ; CHECK-LABEL: @log1p_4x(
326 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
327 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vlog1pf(<4 x float> [[TMP0]])
328 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
330 ; NOACCELERATE-LABEL: @log1p_4x(
331 ; NOACCELERATE-NEXT: entry:
332 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
333 ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
334 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @log1pf(float [[VECEXT]])
335 ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
336 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
337 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @log1pf(float [[VECEXT_1]])
338 ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
339 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
340 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @log1pf(float [[VECEXT_2]])
341 ; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
342 ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
343 ; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @log1pf(float [[VECEXT_3]])
344 ; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
345 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
348 %0 = load <4 x float>, ptr %a, align 16
349 %vecext = extractelement <4 x float> %0, i32 0
350 %1 = tail call fast float @log1pf(float %vecext)
351 %vecins = insertelement <4 x float> poison, float %1, i32 0
352 %vecext.1 = extractelement <4 x float> %0, i32 1
353 %2 = tail call fast float @log1pf(float %vecext.1)
354 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
355 %vecext.2 = extractelement <4 x float> %0, i32 2
356 %3 = tail call fast float @log1pf(float %vecext.2)
357 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
358 %vecext.3 = extractelement <4 x float> %0, i32 3
359 %4 = tail call fast float @log1pf(float %vecext.3)
360 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
361 ret <4 x float> %vecins.3
363 declare float @log10pf(float) readonly nounwind willreturn
364 define <4 x float> @log10p_4x(ptr %a) {
365 ; CHECK-LABEL: @log10p_4x(
367 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
368 ; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
369 ; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @log10pf(float [[VECEXT]])
370 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
371 ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
372 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @log10pf(float [[VECEXT_1]])
373 ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
374 ; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
375 ; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @log10pf(float [[VECEXT_2]])
376 ; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
377 ; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
378 ; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @log10pf(float [[VECEXT_3]])
379 ; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
380 ; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
382 ; NOACCELERATE-LABEL: @log10p_4x(
383 ; NOACCELERATE-NEXT: entry:
384 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
385 ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
386 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @log10pf(float [[VECEXT]])
387 ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
388 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
389 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @log10pf(float [[VECEXT_1]])
390 ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
391 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
392 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @log10pf(float [[VECEXT_2]])
393 ; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
394 ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
395 ; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @log10pf(float [[VECEXT_3]])
396 ; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
397 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
400 %0 = load <4 x float>, ptr %a, align 16
401 %vecext = extractelement <4 x float> %0, i32 0
402 %1 = tail call fast float @log10pf(float %vecext)
403 %vecins = insertelement <4 x float> poison, float %1, i32 0
404 %vecext.1 = extractelement <4 x float> %0, i32 1
405 %2 = tail call fast float @log10pf(float %vecext.1)
406 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
407 %vecext.2 = extractelement <4 x float> %0, i32 2
408 %3 = tail call fast float @log10pf(float %vecext.2)
409 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
410 %vecext.3 = extractelement <4 x float> %0, i32 3
411 %4 = tail call fast float @log10pf(float %vecext.3)
412 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
413 ret <4 x float> %vecins.3
415 declare float @logbf(float) readonly nounwind willreturn
416 define <4 x float> @logb_4x(ptr %a) {
417 ; CHECK-LABEL: @logb_4x(
419 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
420 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vlogbf(<4 x float> [[TMP0]])
421 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
423 ; NOACCELERATE-LABEL: @logb_4x(
424 ; NOACCELERATE-NEXT: entry:
425 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
426 ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
427 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @logbf(float [[VECEXT]])
428 ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
429 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
430 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @logbf(float [[VECEXT_1]])
431 ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
432 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
433 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @logbf(float [[VECEXT_2]])
434 ; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
435 ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
436 ; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @logbf(float [[VECEXT_3]])
437 ; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
438 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
441 %0 = load <4 x float>, ptr %a, align 16
442 %vecext = extractelement <4 x float> %0, i32 0
443 %1 = tail call fast float @logbf(float %vecext)
444 %vecins = insertelement <4 x float> poison, float %1, i32 0
445 %vecext.1 = extractelement <4 x float> %0, i32 1
446 %2 = tail call fast float @logbf(float %vecext.1)
447 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
448 %vecext.2 = extractelement <4 x float> %0, i32 2
449 %3 = tail call fast float @logbf(float %vecext.2)
450 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
451 %vecext.3 = extractelement <4 x float> %0, i32 3
452 %4 = tail call fast float @logbf(float %vecext.3)
453 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
454 ret <4 x float> %vecins.3
456 declare float @sinf(float) readonly nounwind willreturn
457 define <4 x float> @sin_4x(ptr %a) {
458 ; CHECK-LABEL: @sin_4x(
460 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
461 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vsinf(<4 x float> [[TMP0]])
462 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
464 ; NOACCELERATE-LABEL: @sin_4x(
465 ; NOACCELERATE-NEXT: entry:
466 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
467 ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
468 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]])
469 ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
470 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
471 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]])
472 ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
473 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
474 ; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]])
475 ; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
476 ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
477 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
480 %0 = load <4 x float>, ptr %a, align 16
481 %vecext = extractelement <4 x float> %0, i32 0
482 %1 = tail call fast float @sinf(float %vecext)
483 %vecins = insertelement <4 x float> poison, float %1, i32 0
484 %vecext.1 = extractelement <4 x float> %0, i32 1
485 %2 = tail call fast float @sinf(float %vecext.1)
486 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
487 %vecext.2 = extractelement <4 x float> %0, i32 2
488 %3 = tail call fast float @sinf(float %vecext.2)
489 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
490 %vecext.3 = extractelement <4 x float> %0, i32 3
491 %4 = tail call fast float @sinf(float %vecext.3)
492 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
493 ret <4 x float> %vecins.3
495 declare float @cosf(float) readonly nounwind willreturn
496 define <4 x float> @cos_4x(ptr %a) {
497 ; CHECK-LABEL: @cos_4x(
499 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
500 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vcosf(<4 x float> [[TMP0]])
501 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
503 ; NOACCELERATE-LABEL: @cos_4x(
504 ; NOACCELERATE-NEXT: entry:
505 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
506 ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
507 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]])
508 ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
509 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
510 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]])
511 ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
512 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
513 ; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP3]])
514 ; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
515 ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
516 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
519 %0 = load <4 x float>, ptr %a, align 16
520 %vecext = extractelement <4 x float> %0, i32 0
521 %1 = tail call fast float @cosf(float %vecext)
522 %vecins = insertelement <4 x float> poison, float %1, i32 0
523 %vecext.1 = extractelement <4 x float> %0, i32 1
524 %2 = tail call fast float @cosf(float %vecext.1)
525 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
526 %vecext.2 = extractelement <4 x float> %0, i32 2
527 %3 = tail call fast float @cosf(float %vecext.2)
528 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
529 %vecext.3 = extractelement <4 x float> %0, i32 3
530 %4 = tail call fast float @cosf(float %vecext.3)
531 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
532 ret <4 x float> %vecins.3
534 declare float @tanf(float) readonly nounwind willreturn
535 define <4 x float> @tan_4x(ptr %a) {
536 ; CHECK-LABEL: @tan_4x(
538 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
539 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vtanf(<4 x float> [[TMP0]])
540 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
542 ; NOACCELERATE-LABEL: @tan_4x(
543 ; NOACCELERATE-NEXT: entry:
544 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
545 ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
546 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]])
547 ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
548 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
549 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
550 ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
551 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
552 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
553 ; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
554 ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
555 ; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
556 ; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
557 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
560 %0 = load <4 x float>, ptr %a, align 16
561 %vecext = extractelement <4 x float> %0, i32 0
562 %1 = tail call fast float @tanf(float %vecext)
563 %vecins = insertelement <4 x float> poison, float %1, i32 0
564 %vecext.1 = extractelement <4 x float> %0, i32 1
565 %2 = tail call fast float @tanf(float %vecext.1)
566 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
567 %vecext.2 = extractelement <4 x float> %0, i32 2
568 %3 = tail call fast float @tanf(float %vecext.2)
569 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
570 %vecext.3 = extractelement <4 x float> %0, i32 3
571 %4 = tail call fast float @tanf(float %vecext.3)
572 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
573 ret <4 x float> %vecins.3
575 declare float @asinf(float) readonly nounwind willreturn
576 define <4 x float> @asin_4x(ptr %a) {
577 ; CHECK-LABEL: @asin_4x(
579 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
580 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vasinf(<4 x float> [[TMP0]])
581 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
583 ; NOACCELERATE-LABEL: @asin_4x(
584 ; NOACCELERATE-NEXT: entry:
585 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
586 ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
587 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]])
588 ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
589 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
590 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]])
591 ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
592 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
593 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]])
594 ; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
595 ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
596 ; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]])
597 ; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
598 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
601 %0 = load <4 x float>, ptr %a, align 16
602 %vecext = extractelement <4 x float> %0, i32 0
603 %1 = tail call fast float @asinf(float %vecext)
604 %vecins = insertelement <4 x float> poison, float %1, i32 0
605 %vecext.1 = extractelement <4 x float> %0, i32 1
606 %2 = tail call fast float @asinf(float %vecext.1)
607 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
608 %vecext.2 = extractelement <4 x float> %0, i32 2
609 %3 = tail call fast float @asinf(float %vecext.2)
610 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
611 %vecext.3 = extractelement <4 x float> %0, i32 3
612 %4 = tail call fast float @asinf(float %vecext.3)
613 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
614 ret <4 x float> %vecins.3
616 declare float @acosf(float) readonly nounwind willreturn
617 define <4 x float> @acos_4x(ptr %a) {
618 ; CHECK-LABEL: @acos_4x(
620 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
621 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vacosf(<4 x float> [[TMP0]])
622 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
624 ; NOACCELERATE-LABEL: @acos_4x(
625 ; NOACCELERATE-NEXT: entry:
626 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
627 ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
628 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]])
629 ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
630 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
631 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]])
632 ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
633 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
634 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]])
635 ; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
636 ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
637 ; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]])
638 ; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
639 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
642 %0 = load <4 x float>, ptr %a, align 16
643 %vecext = extractelement <4 x float> %0, i32 0
644 %1 = tail call fast float @acosf(float %vecext)
645 %vecins = insertelement <4 x float> poison, float %1, i32 0
646 %vecext.1 = extractelement <4 x float> %0, i32 1
647 %2 = tail call fast float @acosf(float %vecext.1)
648 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
649 %vecext.2 = extractelement <4 x float> %0, i32 2
650 %3 = tail call fast float @acosf(float %vecext.2)
651 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
652 %vecext.3 = extractelement <4 x float> %0, i32 3
653 %4 = tail call fast float @acosf(float %vecext.3)
654 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
655 ret <4 x float> %vecins.3
657 declare float @atanf(float) readonly nounwind willreturn
658 define <4 x float> @atan_4x(ptr %a) {
659 ; CHECK-LABEL: @atan_4x(
661 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
662 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vatanf(<4 x float> [[TMP0]])
663 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
665 ; NOACCELERATE-LABEL: @atan_4x(
666 ; NOACCELERATE-NEXT: entry:
667 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
668 ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
669 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]])
670 ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
671 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
672 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]])
673 ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
674 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
675 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]])
676 ; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
677 ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
678 ; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]])
679 ; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
680 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
683 %0 = load <4 x float>, ptr %a, align 16
684 %vecext = extractelement <4 x float> %0, i32 0
685 %1 = tail call fast float @atanf(float %vecext)
686 %vecins = insertelement <4 x float> poison, float %1, i32 0
687 %vecext.1 = extractelement <4 x float> %0, i32 1
688 %2 = tail call fast float @atanf(float %vecext.1)
689 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
690 %vecext.2 = extractelement <4 x float> %0, i32 2
691 %3 = tail call fast float @atanf(float %vecext.2)
692 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
693 %vecext.3 = extractelement <4 x float> %0, i32 3
694 %4 = tail call fast float @atanf(float %vecext.3)
695 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
696 ret <4 x float> %vecins.3
698 declare float @sinhf(float) readonly nounwind willreturn
699 define <4 x float> @sinh_4x(ptr %a) {
700 ; CHECK-LABEL: @sinh_4x(
702 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
703 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vsinhf(<4 x float> [[TMP0]])
704 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
706 ; NOACCELERATE-LABEL: @sinh_4x(
707 ; NOACCELERATE-NEXT: entry:
708 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
709 ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
710 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]])
711 ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
712 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
713 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]])
714 ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
715 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
716 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]])
717 ; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
718 ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
719 ; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]])
720 ; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
721 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
724 %0 = load <4 x float>, ptr %a, align 16
725 %vecext = extractelement <4 x float> %0, i32 0
726 %1 = tail call fast float @sinhf(float %vecext)
727 %vecins = insertelement <4 x float> poison, float %1, i32 0
728 %vecext.1 = extractelement <4 x float> %0, i32 1
729 %2 = tail call fast float @sinhf(float %vecext.1)
730 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
731 %vecext.2 = extractelement <4 x float> %0, i32 2
732 %3 = tail call fast float @sinhf(float %vecext.2)
733 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
734 %vecext.3 = extractelement <4 x float> %0, i32 3
735 %4 = tail call fast float @sinhf(float %vecext.3)
736 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
737 ret <4 x float> %vecins.3
739 declare float @coshf(float) readonly nounwind willreturn
740 define <4 x float> @cosh_4x(ptr %a) {
741 ; CHECK-LABEL: @cosh_4x(
743 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
744 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vcoshf(<4 x float> [[TMP0]])
745 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
747 ; NOACCELERATE-LABEL: @cosh_4x(
748 ; NOACCELERATE-NEXT: entry:
749 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
750 ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
751 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]])
752 ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
753 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
754 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]])
755 ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
756 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
757 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]])
758 ; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
759 ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
760 ; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]])
761 ; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
762 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
765 %0 = load <4 x float>, ptr %a, align 16
766 %vecext = extractelement <4 x float> %0, i32 0
767 %1 = tail call fast float @coshf(float %vecext)
768 %vecins = insertelement <4 x float> poison, float %1, i32 0
769 %vecext.1 = extractelement <4 x float> %0, i32 1
770 %2 = tail call fast float @coshf(float %vecext.1)
771 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
772 %vecext.2 = extractelement <4 x float> %0, i32 2
773 %3 = tail call fast float @coshf(float %vecext.2)
774 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
775 %vecext.3 = extractelement <4 x float> %0, i32 3
776 %4 = tail call fast float @coshf(float %vecext.3)
777 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
778 ret <4 x float> %vecins.3
780 declare float @tanhf(float) readonly nounwind willreturn
781 define <4 x float> @tanh_4x(ptr %a) {
782 ; CHECK-LABEL: @tanh_4x(
784 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
785 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vtanhf(<4 x float> [[TMP0]])
786 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
788 ; NOACCELERATE-LABEL: @tanh_4x(
789 ; NOACCELERATE-NEXT: entry:
790 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
791 ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
792 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]])
793 ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
794 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
795 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]])
796 ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
797 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
798 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]])
799 ; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
800 ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
801 ; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]])
802 ; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
803 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
806 %0 = load <4 x float>, ptr %a, align 16
807 %vecext = extractelement <4 x float> %0, i32 0
808 %1 = tail call fast float @tanhf(float %vecext)
809 %vecins = insertelement <4 x float> poison, float %1, i32 0
810 %vecext.1 = extractelement <4 x float> %0, i32 1
811 %2 = tail call fast float @tanhf(float %vecext.1)
812 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
813 %vecext.2 = extractelement <4 x float> %0, i32 2
814 %3 = tail call fast float @tanhf(float %vecext.2)
815 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
816 %vecext.3 = extractelement <4 x float> %0, i32 3
817 %4 = tail call fast float @tanhf(float %vecext.3)
818 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
819 ret <4 x float> %vecins.3
821 declare float @asinhf(float) readonly nounwind willreturn
822 define <4 x float> @asinh_4x(ptr %a) {
823 ; CHECK-LABEL: @asinh_4x(
825 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
826 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vasinhf(<4 x float> [[TMP0]])
827 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
829 ; NOACCELERATE-LABEL: @asinh_4x(
830 ; NOACCELERATE-NEXT: entry:
831 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
832 ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
833 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]])
834 ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
835 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
836 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]])
837 ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
838 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
839 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]])
840 ; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
841 ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
842 ; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]])
843 ; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
844 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
847 %0 = load <4 x float>, ptr %a, align 16
848 %vecext = extractelement <4 x float> %0, i32 0
849 %1 = tail call fast float @asinhf(float %vecext)
850 %vecins = insertelement <4 x float> poison, float %1, i32 0
851 %vecext.1 = extractelement <4 x float> %0, i32 1
852 %2 = tail call fast float @asinhf(float %vecext.1)
853 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
854 %vecext.2 = extractelement <4 x float> %0, i32 2
855 %3 = tail call fast float @asinhf(float %vecext.2)
856 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
857 %vecext.3 = extractelement <4 x float> %0, i32 3
858 %4 = tail call fast float @asinhf(float %vecext.3)
859 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
860 ret <4 x float> %vecins.3
862 declare float @acoshf(float) readonly nounwind willreturn
863 define <4 x float> @acosh_4x(ptr %a) {
864 ; CHECK-LABEL: @acosh_4x(
866 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
867 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vacoshf(<4 x float> [[TMP0]])
868 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
870 ; NOACCELERATE-LABEL: @acosh_4x(
871 ; NOACCELERATE-NEXT: entry:
872 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
873 ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
874 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]])
875 ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
876 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
877 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]])
878 ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
879 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
880 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]])
881 ; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
882 ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
883 ; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]])
884 ; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
885 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
888 %0 = load <4 x float>, ptr %a, align 16
889 %vecext = extractelement <4 x float> %0, i32 0
890 %1 = tail call fast float @acoshf(float %vecext)
891 %vecins = insertelement <4 x float> poison, float %1, i32 0
892 %vecext.1 = extractelement <4 x float> %0, i32 1
893 %2 = tail call fast float @acoshf(float %vecext.1)
894 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
895 %vecext.2 = extractelement <4 x float> %0, i32 2
896 %3 = tail call fast float @acoshf(float %vecext.2)
897 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
898 %vecext.3 = extractelement <4 x float> %0, i32 3
899 %4 = tail call fast float @acoshf(float %vecext.3)
900 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
901 ret <4 x float> %vecins.3
903 declare float @atanhf(float) readonly nounwind willreturn
904 define <4 x float> @atanh_4x(ptr %a) {
905 ; CHECK-LABEL: @atanh_4x(
907 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
908 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vatanhf(<4 x float> [[TMP0]])
909 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
911 ; NOACCELERATE-LABEL: @atanh_4x(
912 ; NOACCELERATE-NEXT: entry:
913 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
914 ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
915 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]])
916 ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
917 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
918 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]])
919 ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
920 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
921 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]])
922 ; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
923 ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
924 ; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]])
925 ; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
926 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
929 %0 = load <4 x float>, ptr %a, align 16
930 %vecext = extractelement <4 x float> %0, i32 0
931 %1 = tail call fast float @atanhf(float %vecext)
932 %vecins = insertelement <4 x float> poison, float %1, i32 0
933 %vecext.1 = extractelement <4 x float> %0, i32 1
934 %2 = tail call fast float @atanhf(float %vecext.1)
935 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
936 %vecext.2 = extractelement <4 x float> %0, i32 2
937 %3 = tail call fast float @atanhf(float %vecext.2)
938 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
939 %vecext.3 = extractelement <4 x float> %0, i32 3
940 %4 = tail call fast float @atanhf(float %vecext.3)
941 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
942 ret <4 x float> %vecins.3
945 ; Accelerate *does not* provide sin() for <2 x float>.
946 define <2 x float> @sin_2x(ptr %a) {
947 ; CHECK-LABEL: @sin_2x(
949 ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A:%.*]], align 16
950 ; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
951 ; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]]) #[[ATTR2:[0-9]+]]
952 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
953 ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i32 1
954 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) #[[ATTR2]]
955 ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <2 x float> [[VECINS]], float [[TMP2]], i32 1
956 ; CHECK-NEXT: ret <2 x float> [[VECINS_1]]
958 ; NOACCELERATE-LABEL: @sin_2x(
959 ; NOACCELERATE-NEXT: entry:
960 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A:%.*]], align 16
961 ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
962 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
963 ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
964 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i32 1
965 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
966 ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <2 x float> [[VECINS]], float [[TMP2]], i32 1
967 ; NOACCELERATE-NEXT: ret <2 x float> [[VECINS_1]]
970 %0 = load <2 x float>, ptr %a, align 16
971 %vecext = extractelement <2 x float> %0, i32 0
972 %1 = tail call fast float @llvm.sin.f32(float %vecext)
973 %vecins = insertelement <2 x float> poison, float %1, i32 0
974 %vecext.1 = extractelement <2 x float> %0, i32 1
975 %2 = tail call fast float @llvm.sin.f32(float %vecext.1)
976 %vecins.1 = insertelement <2 x float> %vecins, float %2, i32 1
977 ret <2 x float> %vecins.1
981 declare float @llvm.cos.f32(float)
983 ; Accelerate provides cos() for <4 x float>
984 define <4 x float> @int_cos_4x(ptr %a) {
985 ; CHECK-LABEL: @int_cos_4x(
987 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
988 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vcosf(<4 x float> [[TMP0]])
989 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
991 ; NOACCELERATE-LABEL: @int_cos_4x(
992 ; NOACCELERATE-NEXT: entry:
993 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
994 ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
995 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]])
996 ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
997 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
998 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]])
999 ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1000 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
1001 ; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP3]])
1002 ; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
1003 ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1004 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
1007 %0 = load <4 x float>, ptr %a, align 16
1008 %vecext = extractelement <4 x float> %0, i32 0
1009 %1 = tail call fast float @llvm.cos.f32(float %vecext)
1010 %vecins = insertelement <4 x float> poison, float %1, i32 0
1011 %vecext.1 = extractelement <4 x float> %0, i32 1
1012 %2 = tail call fast float @llvm.cos.f32(float %vecext.1)
1013 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1014 %vecext.2 = extractelement <4 x float> %0, i32 2
1015 %3 = tail call fast float @llvm.cos.f32(float %vecext.2)
1016 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1017 %vecext.3 = extractelement <4 x float> %0, i32 3
1018 %4 = tail call fast float @llvm.cos.f32(float %vecext.3)
1019 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1020 ret <4 x float> %vecins.3
1023 ; Accelerate *does not* provide cos() for <2 x float>.
1024 define <2 x float> @cos_2x(ptr %a) {
1025 ; CHECK-LABEL: @cos_2x(
1026 ; CHECK-NEXT: entry:
1027 ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A:%.*]], align 16
1028 ; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
1029 ; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]]) #[[ATTR3:[0-9]+]]
1030 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
1031 ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i32 1
1032 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]]) #[[ATTR3]]
1033 ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <2 x float> [[VECINS]], float [[TMP2]], i32 1
1034 ; CHECK-NEXT: ret <2 x float> [[VECINS_1]]
1036 ; NOACCELERATE-LABEL: @cos_2x(
1037 ; NOACCELERATE-NEXT: entry:
1038 ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A:%.*]], align 16
1039 ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
1040 ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]])
1041 ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
1042 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i32 1
1043 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]])
1044 ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <2 x float> [[VECINS]], float [[TMP2]], i32 1
1045 ; NOACCELERATE-NEXT: ret <2 x float> [[VECINS_1]]
1048 %0 = load <2 x float>, ptr %a, align 16
1049 %vecext = extractelement <2 x float> %0, i32 0
1050 %1 = tail call fast float @llvm.cos.f32(float %vecext)
1051 %vecins = insertelement <2 x float> poison, float %1, i32 0
1052 %vecext.1 = extractelement <2 x float> %0, i32 1
1053 %2 = tail call fast float @llvm.cos.f32(float %vecext.1)
1054 %vecins.1 = insertelement <2 x float> %vecins, float %2, i32 1
1055 ret <2 x float> %vecins.1