1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call.*(cos|exp|log|sin|pow|ceil|copysign|fabs|floor|fma|m..num|nearbyint|rint|round|sqrt|trunc)" --version 2
3 ; RUN: opt -mattr=+neon -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=SLEEF-NEON
4 ; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | FileCheck %s --check-prefix=SLEEF-SVE
5 ; RUN: opt -mattr=+neon -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=ARMPL-NEON
6 ; RUN: opt -mattr=+sve -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | FileCheck %s --check-prefix=ARMPL-SVE
8 target triple = "aarch64-unknown-linux-gnu"
10 ; We are checking whether loops containing intrinsic calls can be vectorized,
11 ; when the compiler provides TLI mappings to their vector variants. The tests
12 ; are checking fixed width vectorization with NEON and scalable vectorization
15 declare double @llvm.ceil.f64(double)
16 declare float @llvm.ceil.f32(float)
18 define void @ceil_f64(ptr noalias %in.ptr, ptr %out.ptr) {
19 ; SLEEF-NEON-LABEL: define void @ceil_f64
20 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
21 ; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
23 ; SLEEF-SVE-LABEL: define void @ceil_f64
24 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
25 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
27 ; ARMPL-NEON-LABEL: define void @ceil_f64
28 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
29 ; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
31 ; ARMPL-SVE-LABEL: define void @ceil_f64
32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
33 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
39 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
40 %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
41 %in = load double, ptr %in.gep, align 8
42 %call = tail call double @llvm.ceil.f64(double %in)
43 %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
44 store double %call, ptr %out.gep, align 8
45 %iv.next = add nuw nsw i64 %iv, 1
46 %exitcond = icmp eq i64 %iv.next, 1000
47 br i1 %exitcond, label %for.end, label %for.body
53 define void @ceil_f32(ptr noalias %in.ptr, ptr %out.ptr) {
54 ; SLEEF-NEON-LABEL: define void @ceil_f32
55 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
56 ; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
58 ; SLEEF-SVE-LABEL: define void @ceil_f32
59 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
60 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
62 ; ARMPL-NEON-LABEL: define void @ceil_f32
63 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
64 ; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
66 ; ARMPL-SVE-LABEL: define void @ceil_f32
67 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
68 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
74 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
75 %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
76 %in = load float, ptr %in.gep, align 8
77 %call = tail call float @llvm.ceil.f32(float %in)
78 %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
79 store float %call, ptr %out.gep, align 4
80 %iv.next = add nuw nsw i64 %iv, 1
81 %exitcond = icmp eq i64 %iv.next, 1000
82 br i1 %exitcond, label %for.end, label %for.body
88 declare double @llvm.copysign.f64(double, double)
89 declare float @llvm.copysign.f32(float, float)
91 define void @copysign_f64(ptr noalias %in.ptr, ptr %out.ptr) {
92 ; SLEEF-NEON-LABEL: define void @copysign_f64
93 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
94 ; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
96 ; SLEEF-SVE-LABEL: define void @copysign_f64
97 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
98 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
100 ; ARMPL-NEON-LABEL: define void @copysign_f64
101 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
102 ; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
104 ; ARMPL-SVE-LABEL: define void @copysign_f64
105 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
106 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
112 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
113 %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
114 %in = load double, ptr %in.gep, align 8
115 %call = tail call double @llvm.copysign.f64(double %in, double %in)
116 %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
117 store double %call, ptr %out.gep, align 8
118 %iv.next = add nuw nsw i64 %iv, 1
119 %exitcond = icmp eq i64 %iv.next, 1000
120 br i1 %exitcond, label %for.end, label %for.body
126 define void @copysign_f32(ptr noalias %in.ptr, ptr %out.ptr) {
127 ; SLEEF-NEON-LABEL: define void @copysign_f32
128 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
129 ; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
131 ; SLEEF-SVE-LABEL: define void @copysign_f32
132 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
133 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
135 ; ARMPL-NEON-LABEL: define void @copysign_f32
136 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
137 ; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
139 ; ARMPL-SVE-LABEL: define void @copysign_f32
140 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
141 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
147 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
148 %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
149 %in = load float, ptr %in.gep, align 8
150 %call = tail call float @llvm.copysign.f32(float %in, float %in)
151 %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
152 store float %call, ptr %out.gep, align 4
153 %iv.next = add nuw nsw i64 %iv, 1
154 %exitcond = icmp eq i64 %iv.next, 1000
155 br i1 %exitcond, label %for.end, label %for.body
161 declare double @llvm.cos.f64(double)
162 declare float @llvm.cos.f32(float)
164 define void @cos_f64(ptr noalias %in.ptr, ptr %out.ptr) {
165 ; SLEEF-NEON-LABEL: define void @cos_f64
166 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
167 ; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_cos(<2 x double> [[WIDE_LOAD:%.*]])
169 ; SLEEF-SVE-LABEL: define void @cos_f64
170 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
171 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cos(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
173 ; ARMPL-NEON-LABEL: define void @cos_f64
174 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
175 ; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vcosq_f64(<2 x double> [[WIDE_LOAD:%.*]])
177 ; ARMPL-SVE-LABEL: define void @cos_f64
178 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
179 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svcos_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
185 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
186 %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
187 %in = load double, ptr %in.gep, align 8
188 %call = tail call double @llvm.cos.f64(double %in)
189 %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
190 store double %call, ptr %out.gep, align 8
191 %iv.next = add nuw nsw i64 %iv, 1
192 %exitcond = icmp eq i64 %iv.next, 1000
193 br i1 %exitcond, label %for.end, label %for.body
199 define void @cos_f32(ptr noalias %in.ptr, ptr %out.ptr) {
200 ; SLEEF-NEON-LABEL: define void @cos_f32
201 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
202 ; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_cosf(<4 x float> [[WIDE_LOAD:%.*]])
204 ; SLEEF-SVE-LABEL: define void @cos_f32
205 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
206 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cosf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
208 ; ARMPL-NEON-LABEL: define void @cos_f32
209 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
210 ; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vcosq_f32(<4 x float> [[WIDE_LOAD:%.*]])
212 ; ARMPL-SVE-LABEL: define void @cos_f32
213 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
214 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svcos_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
220 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
221 %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
222 %in = load float, ptr %in.gep, align 8
223 %call = tail call float @llvm.cos.f32(float %in)
224 %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
225 store float %call, ptr %out.gep, align 4
226 %iv.next = add nuw nsw i64 %iv, 1
227 %exitcond = icmp eq i64 %iv.next, 1000
228 br i1 %exitcond, label %for.end, label %for.body
234 declare double @llvm.exp.f64(double)
235 declare float @llvm.exp.f32(float)
237 define void @exp_f64(ptr noalias %in.ptr, ptr %out.ptr) {
238 ; SLEEF-NEON-LABEL: define void @exp_f64
239 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
240 ; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp(<2 x double> [[WIDE_LOAD:%.*]])
242 ; SLEEF-SVE-LABEL: define void @exp_f64
243 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
244 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
246 ; ARMPL-NEON-LABEL: define void @exp_f64
247 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
248 ; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vexpq_f64(<2 x double> [[WIDE_LOAD:%.*]])
250 ; ARMPL-SVE-LABEL: define void @exp_f64
251 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
252 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svexp_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
258 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
259 %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
260 %in = load double, ptr %in.gep, align 8
261 %call = tail call double @llvm.exp.f64(double %in)
262 %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
263 store double %call, ptr %out.gep, align 8
264 %iv.next = add nuw nsw i64 %iv, 1
265 %exitcond = icmp eq i64 %iv.next, 1000
266 br i1 %exitcond, label %for.end, label %for.body
272 define void @exp_f32(ptr noalias %in.ptr, ptr %out.ptr) {
273 ; SLEEF-NEON-LABEL: define void @exp_f32
274 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
275 ; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_expf(<4 x float> [[WIDE_LOAD:%.*]])
277 ; SLEEF-SVE-LABEL: define void @exp_f32
278 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
279 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_expf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
281 ; ARMPL-NEON-LABEL: define void @exp_f32
282 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
283 ; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vexpq_f32(<4 x float> [[WIDE_LOAD:%.*]])
285 ; ARMPL-SVE-LABEL: define void @exp_f32
286 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
287 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svexp_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
293 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
294 %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
295 %in = load float, ptr %in.gep, align 8
296 %call = tail call float @llvm.exp.f32(float %in)
297 %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
298 store float %call, ptr %out.gep, align 4
299 %iv.next = add nuw nsw i64 %iv, 1
300 %exitcond = icmp eq i64 %iv.next, 1000
301 br i1 %exitcond, label %for.end, label %for.body
307 declare double @llvm.exp10.f64(double)
308 declare float @llvm.exp10.f32(float)
310 define void @exp10_f64(ptr noalias %in.ptr, ptr %out.ptr) {
311 ; SLEEF-NEON-LABEL: define void @exp10_f64
312 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
313 ; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]])
315 ; SLEEF-SVE-LABEL: define void @exp10_f64
316 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
317 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
319 ; ARMPL-NEON-LABEL: define void @exp10_f64
320 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
321 ; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vexp10q_f64(<2 x double> [[WIDE_LOAD:%.*]])
323 ; ARMPL-SVE-LABEL: define void @exp10_f64
324 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
325 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
331 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
332 %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
333 %in = load double, ptr %in.gep, align 8
334 %call = tail call double @llvm.exp10.f64(double %in)
335 %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
336 store double %call, ptr %out.gep, align 8
337 %iv.next = add nuw nsw i64 %iv, 1
338 %exitcond = icmp eq i64 %iv.next, 1000
339 br i1 %exitcond, label %for.end, label %for.body
345 define void @exp10_f32(ptr noalias %in.ptr, ptr %out.ptr) {
346 ; SLEEF-NEON-LABEL: define void @exp10_f32
347 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
348 ; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[WIDE_LOAD:%.*]])
350 ; SLEEF-SVE-LABEL: define void @exp10_f32
351 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
352 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
354 ; ARMPL-NEON-LABEL: define void @exp10_f32
355 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
356 ; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vexp10q_f32(<4 x float> [[WIDE_LOAD:%.*]])
358 ; ARMPL-SVE-LABEL: define void @exp10_f32
359 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
360 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
366 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
367 %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
368 %in = load float, ptr %in.gep, align 8
369 %call = tail call float @llvm.exp10.f32(float %in)
370 %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
371 store float %call, ptr %out.gep, align 4
372 %iv.next = add nuw nsw i64 %iv, 1
373 %exitcond = icmp eq i64 %iv.next, 1000
374 br i1 %exitcond, label %for.end, label %for.body
380 declare double @llvm.exp2.f64(double)
381 declare float @llvm.exp2.f32(float)
383 define void @exp2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
384 ; SLEEF-NEON-LABEL: define void @exp2_f64
385 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
386 ; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]])
388 ; SLEEF-SVE-LABEL: define void @exp2_f64
389 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
390 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
392 ; ARMPL-NEON-LABEL: define void @exp2_f64
393 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
394 ; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vexp2q_f64(<2 x double> [[WIDE_LOAD:%.*]])
396 ; ARMPL-SVE-LABEL: define void @exp2_f64
397 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
398 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
404 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
405 %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
406 %in = load double, ptr %in.gep, align 8
407 %call = tail call double @llvm.exp2.f64(double %in)
408 %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
409 store double %call, ptr %out.gep, align 8
410 %iv.next = add nuw nsw i64 %iv, 1
411 %exitcond = icmp eq i64 %iv.next, 1000
412 br i1 %exitcond, label %for.end, label %for.body
418 define void @exp2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
419 ; SLEEF-NEON-LABEL: define void @exp2_f32
420 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
421 ; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[WIDE_LOAD:%.*]])
423 ; SLEEF-SVE-LABEL: define void @exp2_f32
424 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
425 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
427 ; ARMPL-NEON-LABEL: define void @exp2_f32
428 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
429 ; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vexp2q_f32(<4 x float> [[WIDE_LOAD:%.*]])
431 ; ARMPL-SVE-LABEL: define void @exp2_f32
432 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
433 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
439 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
440 %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
441 %in = load float, ptr %in.gep, align 8
442 %call = tail call float @llvm.exp2.f32(float %in)
443 %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
444 store float %call, ptr %out.gep, align 4
445 %iv.next = add nuw nsw i64 %iv, 1
446 %exitcond = icmp eq i64 %iv.next, 1000
447 br i1 %exitcond, label %for.end, label %for.body
453 declare double @llvm.fabs.f64(double)
454 declare float @llvm.fabs.f32(float)
456 define void @fabs_f64(ptr noalias %in.ptr, ptr %out.ptr) {
457 ; SLEEF-NEON-LABEL: define void @fabs_f64
458 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
459 ; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
461 ; SLEEF-SVE-LABEL: define void @fabs_f64
462 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
463 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
465 ; ARMPL-NEON-LABEL: define void @fabs_f64
466 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
467 ; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
469 ; ARMPL-SVE-LABEL: define void @fabs_f64
470 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
471 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
477 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
478 %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
479 %in = load double, ptr %in.gep, align 8
480 %call = tail call double @llvm.fabs.f64(double %in)
481 %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
482 store double %call, ptr %out.gep, align 8
483 %iv.next = add nuw nsw i64 %iv, 1
484 %exitcond = icmp eq i64 %iv.next, 1000
485 br i1 %exitcond, label %for.end, label %for.body
491 define void @fabs_f32(ptr noalias %in.ptr, ptr %out.ptr) {
492 ; SLEEF-NEON-LABEL: define void @fabs_f32
493 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
494 ; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
496 ; SLEEF-SVE-LABEL: define void @fabs_f32
497 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
498 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
500 ; ARMPL-NEON-LABEL: define void @fabs_f32
501 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
502 ; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
504 ; ARMPL-SVE-LABEL: define void @fabs_f32
505 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
506 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
512 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
513 %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
514 %in = load float, ptr %in.gep, align 8
515 %call = tail call float @llvm.fabs.f32(float %in)
516 %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
517 store float %call, ptr %out.gep, align 4
518 %iv.next = add nuw nsw i64 %iv, 1
519 %exitcond = icmp eq i64 %iv.next, 1000
520 br i1 %exitcond, label %for.end, label %for.body
526 declare double @llvm.floor.f64(double)
527 declare float @llvm.floor.f32(float)
529 define void @floor_f64(ptr noalias %in.ptr, ptr %out.ptr) {
530 ; SLEEF-NEON-LABEL: define void @floor_f64
531 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
532 ; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
534 ; SLEEF-SVE-LABEL: define void @floor_f64
535 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
536 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
538 ; ARMPL-NEON-LABEL: define void @floor_f64
539 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
540 ; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
542 ; ARMPL-SVE-LABEL: define void @floor_f64
543 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
544 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
550 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
551 %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
552 %in = load double, ptr %in.gep, align 8
553 %call = tail call double @llvm.floor.f64(double %in)
554 %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
555 store double %call, ptr %out.gep, align 8
556 %iv.next = add nuw nsw i64 %iv, 1
557 %exitcond = icmp eq i64 %iv.next, 1000
558 br i1 %exitcond, label %for.end, label %for.body
564 define void @floor_f32(ptr noalias %in.ptr, ptr %out.ptr) {
565 ; SLEEF-NEON-LABEL: define void @floor_f32
566 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
567 ; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
569 ; SLEEF-SVE-LABEL: define void @floor_f32
570 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
571 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
573 ; ARMPL-NEON-LABEL: define void @floor_f32
574 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
575 ; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
577 ; ARMPL-SVE-LABEL: define void @floor_f32
578 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
579 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
585 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
586 %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
587 %in = load float, ptr %in.gep, align 8
588 %call = tail call float @llvm.floor.f32(float %in)
589 %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
590 store float %call, ptr %out.gep, align 4
591 %iv.next = add nuw nsw i64 %iv, 1
592 %exitcond = icmp eq i64 %iv.next, 1000
593 br i1 %exitcond, label %for.end, label %for.body
599 declare double @llvm.fma.f64(double, double, double)
600 declare float @llvm.fma.f32(float, float, float)
602 define void @fma_f64(ptr noalias %in.ptr, ptr %out.ptr) {
603 ; SLEEF-NEON-LABEL: define void @fma_f64
604 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
605 ; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]])
607 ; SLEEF-SVE-LABEL: define void @fma_f64
608 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
609 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
611 ; ARMPL-NEON-LABEL: define void @fma_f64
612 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
613 ; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]])
615 ; ARMPL-SVE-LABEL: define void @fma_f64
616 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
617 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
623 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
624 %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
625 %in = load double, ptr %in.gep, align 8
626 %call = tail call double @llvm.fma.f64(double %in, double %in, double %in)
627 %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
628 store double %call, ptr %out.gep, align 8
629 %iv.next = add nuw nsw i64 %iv, 1
630 %exitcond = icmp eq i64 %iv.next, 1000
631 br i1 %exitcond, label %for.end, label %for.body
637 define void @fma_f32(ptr noalias %in.ptr, ptr %out.ptr) {
638 ; SLEEF-NEON-LABEL: define void @fma_f32
639 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
640 ; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]])
642 ; SLEEF-SVE-LABEL: define void @fma_f32
643 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
644 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
646 ; ARMPL-NEON-LABEL: define void @fma_f32
647 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
648 ; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]])
650 ; ARMPL-SVE-LABEL: define void @fma_f32
651 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
652 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
658 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
659 %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
660 %in = load float, ptr %in.gep, align 8
661 %call = tail call float @llvm.fma.f32(float %in, float %in, float %in)
662 %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
663 store float %call, ptr %out.gep, align 4
664 %iv.next = add nuw nsw i64 %iv, 1
665 %exitcond = icmp eq i64 %iv.next, 1000
666 br i1 %exitcond, label %for.end, label %for.body
672 declare double @llvm.log.f64(double)
673 declare float @llvm.log.f32(float)
675 define void @log_f64(ptr noalias %in.ptr, ptr %out.ptr) {
676 ; SLEEF-NEON-LABEL: define void @log_f64
677 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
678 ; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]])
680 ; SLEEF-SVE-LABEL: define void @log_f64
681 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
682 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
684 ; ARMPL-NEON-LABEL: define void @log_f64
685 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
686 ; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vlogq_f64(<2 x double> [[WIDE_LOAD:%.*]])
688 ; ARMPL-SVE-LABEL: define void @log_f64
689 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
690 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svlog_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
696 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
697 %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
698 %in = load double, ptr %in.gep, align 8
699 %call = tail call double @llvm.log.f64(double %in)
700 %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
701 store double %call, ptr %out.gep, align 8
702 %iv.next = add nuw nsw i64 %iv, 1
703 %exitcond = icmp eq i64 %iv.next, 1000
704 br i1 %exitcond, label %for.end, label %for.body
710 define void @log_f32(ptr noalias %in.ptr, ptr %out.ptr) {
711 ; SLEEF-NEON-LABEL: define void @log_f32
712 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
713 ; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[WIDE_LOAD:%.*]])
715 ; SLEEF-SVE-LABEL: define void @log_f32
716 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
717 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
719 ; ARMPL-NEON-LABEL: define void @log_f32
720 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
721 ; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vlogq_f32(<4 x float> [[WIDE_LOAD:%.*]])
723 ; ARMPL-SVE-LABEL: define void @log_f32
724 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
725 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svlog_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
731 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
732 %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
733 %in = load float, ptr %in.gep, align 8
734 %call = tail call float @llvm.log.f32(float %in)
735 %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
736 store float %call, ptr %out.gep, align 4
737 %iv.next = add nuw nsw i64 %iv, 1
738 %exitcond = icmp eq i64 %iv.next, 1000
739 br i1 %exitcond, label %for.end, label %for.body
745 declare double @llvm.log10.f64(double)
746 declare float @llvm.log10.f32(float)
748 define void @log10_f64(ptr noalias %in.ptr, ptr %out.ptr) {
749 ; SLEEF-NEON-LABEL: define void @log10_f64
750 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
751 ; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]])
753 ; SLEEF-SVE-LABEL: define void @log10_f64
754 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
755 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
757 ; ARMPL-NEON-LABEL: define void @log10_f64
758 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
759 ; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vlog10q_f64(<2 x double> [[WIDE_LOAD:%.*]])
761 ; ARMPL-SVE-LABEL: define void @log10_f64
762 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
763 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
769 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
770 %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
771 %in = load double, ptr %in.gep, align 8
772 %call = tail call double @llvm.log10.f64(double %in)
773 %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
774 store double %call, ptr %out.gep, align 8
775 %iv.next = add nuw nsw i64 %iv, 1
776 %exitcond = icmp eq i64 %iv.next, 1000
777 br i1 %exitcond, label %for.end, label %for.body
783 define void @log10_f32(ptr noalias %in.ptr, ptr %out.ptr) {
784 ; SLEEF-NEON-LABEL: define void @log10_f32
785 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
786 ; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[WIDE_LOAD:%.*]])
788 ; SLEEF-SVE-LABEL: define void @log10_f32
789 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
790 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
792 ; ARMPL-NEON-LABEL: define void @log10_f32
793 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
794 ; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vlog10q_f32(<4 x float> [[WIDE_LOAD:%.*]])
796 ; ARMPL-SVE-LABEL: define void @log10_f32
797 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
798 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
804 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
805 %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
806 %in = load float, ptr %in.gep, align 8
807 %call = tail call float @llvm.log10.f32(float %in)
808 %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
809 store float %call, ptr %out.gep, align 4
810 %iv.next = add nuw nsw i64 %iv, 1
811 %exitcond = icmp eq i64 %iv.next, 1000
812 br i1 %exitcond, label %for.end, label %for.body
818 declare double @llvm.log2.f64(double)
819 declare float @llvm.log2.f32(float)
821 define void @log2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
822 ; SLEEF-NEON-LABEL: define void @log2_f64
823 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
824 ; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]])
826 ; SLEEF-SVE-LABEL: define void @log2_f64
827 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
828 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
830 ; ARMPL-NEON-LABEL: define void @log2_f64
831 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
832 ; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vlog2q_f64(<2 x double> [[WIDE_LOAD:%.*]])
834 ; ARMPL-SVE-LABEL: define void @log2_f64
835 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
836 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
842 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
843 %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
844 %in = load double, ptr %in.gep, align 8
845 %call = tail call double @llvm.log2.f64(double %in)
846 %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
847 store double %call, ptr %out.gep, align 8
848 %iv.next = add nuw nsw i64 %iv, 1
849 %exitcond = icmp eq i64 %iv.next, 1000
850 br i1 %exitcond, label %for.end, label %for.body
856 define void @log2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
857 ; SLEEF-NEON-LABEL: define void @log2_f32
858 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
859 ; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_log2f(<4 x float> [[WIDE_LOAD:%.*]])
861 ; SLEEF-SVE-LABEL: define void @log2_f32
862 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
863 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
865 ; ARMPL-NEON-LABEL: define void @log2_f32
866 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
867 ; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vlog2q_f32(<4 x float> [[WIDE_LOAD:%.*]])
869 ; ARMPL-SVE-LABEL: define void @log2_f32
870 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
871 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
877 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
878 %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
879 %in = load float, ptr %in.gep, align 8
880 %call = tail call float @llvm.log2.f32(float %in)
881 %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
882 store float %call, ptr %out.gep, align 4
883 %iv.next = add nuw nsw i64 %iv, 1
884 %exitcond = icmp eq i64 %iv.next, 1000
885 br i1 %exitcond, label %for.end, label %for.body
891 declare double @llvm.maxnum.f64(double, double)
892 declare float @llvm.maxnum.f32(float, float)
894 define void @maxnum_f64(ptr noalias %in.ptr, ptr %out.ptr) {
895 ; SLEEF-NEON-LABEL: define void @maxnum_f64
896 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
897 ; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
899 ; SLEEF-SVE-LABEL: define void @maxnum_f64
900 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
901 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
903 ; ARMPL-NEON-LABEL: define void @maxnum_f64
904 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
905 ; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
907 ; ARMPL-SVE-LABEL: define void @maxnum_f64
908 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
909 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
915 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
916 %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
917 %in = load double, ptr %in.gep, align 8
918 %call = tail call double @llvm.maxnum.f64(double %in, double %in)
919 %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
920 store double %call, ptr %out.gep, align 8
921 %iv.next = add nuw nsw i64 %iv, 1
922 %exitcond = icmp eq i64 %iv.next, 1000
923 br i1 %exitcond, label %for.end, label %for.body
929 define void @maxnum_f32(ptr noalias %in.ptr, ptr %out.ptr) {
930 ; SLEEF-NEON-LABEL: define void @maxnum_f32
931 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
932 ; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
934 ; SLEEF-SVE-LABEL: define void @maxnum_f32
935 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
936 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
938 ; ARMPL-NEON-LABEL: define void @maxnum_f32
939 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
940 ; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
942 ; ARMPL-SVE-LABEL: define void @maxnum_f32
943 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
944 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
950 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
951 %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
952 %in = load float, ptr %in.gep, align 8
953 %call = tail call float @llvm.maxnum.f32(float %in, float %in)
954 %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
955 store float %call, ptr %out.gep, align 4
956 %iv.next = add nuw nsw i64 %iv, 1
957 %exitcond = icmp eq i64 %iv.next, 1000
958 br i1 %exitcond, label %for.end, label %for.body
964 declare double @llvm.minnum.f64(double, double)
965 declare float @llvm.minnum.f32(float, float)
967 define void @minnum_f64(ptr noalias %in.ptr, ptr %out.ptr) {
968 ; SLEEF-NEON-LABEL: define void @minnum_f64
969 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
970 ; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
972 ; SLEEF-SVE-LABEL: define void @minnum_f64
973 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
974 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
976 ; ARMPL-NEON-LABEL: define void @minnum_f64
977 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
978 ; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
980 ; ARMPL-SVE-LABEL: define void @minnum_f64
981 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
982 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
988 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
989 %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
990 %in = load double, ptr %in.gep, align 8
991 %call = tail call double @llvm.minnum.f64(double %in, double %in)
992 %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
993 store double %call, ptr %out.gep, align 8
994 %iv.next = add nuw nsw i64 %iv, 1
995 %exitcond = icmp eq i64 %iv.next, 1000
996 br i1 %exitcond, label %for.end, label %for.body
1002 define void @minnum_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1003 ; SLEEF-NEON-LABEL: define void @minnum_f32
1004 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1005 ; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
1007 ; SLEEF-SVE-LABEL: define void @minnum_f32
1008 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1009 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
1011 ; ARMPL-NEON-LABEL: define void @minnum_f32
1012 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1013 ; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
1015 ; ARMPL-SVE-LABEL: define void @minnum_f32
1016 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1017 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
1023 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1024 %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1025 %in = load float, ptr %in.gep, align 8
1026 %call = tail call float @llvm.minnum.f32(float %in, float %in)
1027 %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1028 store float %call, ptr %out.gep, align 4
1029 %iv.next = add nuw nsw i64 %iv, 1
1030 %exitcond = icmp eq i64 %iv.next, 1000
1031 br i1 %exitcond, label %for.end, label %for.body
1037 declare double @llvm.nearbyint.f64(double)
1038 declare float @llvm.nearbyint.f32(float)
1040 define void @nearbyint_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1041 ; SLEEF-NEON-LABEL: define void @nearbyint_f64
1042 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1043 ; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1045 ; SLEEF-SVE-LABEL: define void @nearbyint_f64
1046 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1047 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1049 ; ARMPL-NEON-LABEL: define void @nearbyint_f64
1050 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1051 ; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1053 ; ARMPL-SVE-LABEL: define void @nearbyint_f64
1054 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1055 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1061 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1062 %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1063 %in = load double, ptr %in.gep, align 8
1064 %call = tail call double @llvm.nearbyint.f64(double %in)
1065 %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1066 store double %call, ptr %out.gep, align 8
1067 %iv.next = add nuw nsw i64 %iv, 1
1068 %exitcond = icmp eq i64 %iv.next, 1000
1069 br i1 %exitcond, label %for.end, label %for.body
1075 define void @nearbyint_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1076 ; SLEEF-NEON-LABEL: define void @nearbyint_f32
1077 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1078 ; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1080 ; SLEEF-SVE-LABEL: define void @nearbyint_f32
1081 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1082 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1084 ; ARMPL-NEON-LABEL: define void @nearbyint_f32
1085 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1086 ; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1088 ; ARMPL-SVE-LABEL: define void @nearbyint_f32
1089 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1090 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1096 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1097 %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1098 %in = load float, ptr %in.gep, align 8
1099 %call = tail call float @llvm.nearbyint.f32(float %in)
1100 %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1101 store float %call, ptr %out.gep, align 4
1102 %iv.next = add nuw nsw i64 %iv, 1
1103 %exitcond = icmp eq i64 %iv.next, 1000
1104 br i1 %exitcond, label %for.end, label %for.body
1110 declare double @llvm.pow.f64(double, double)
1111 declare float @llvm.pow.f32(float, float)
1113 define void @pow_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1114 ; SLEEF-NEON-LABEL: define void @pow_f64
1115 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1116 ; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
1118 ; SLEEF-SVE-LABEL: define void @pow_f64
1119 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1120 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
1122 ; ARMPL-NEON-LABEL: define void @pow_f64
1123 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1124 ; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vpowq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
1126 ; ARMPL-SVE-LABEL: define void @pow_f64
1127 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1128 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svpow_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
1134 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1135 %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1136 %in = load double, ptr %in.gep, align 8
1137 %call = tail call double @llvm.pow.f64(double %in, double %in)
1138 %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1139 store double %call, ptr %out.gep, align 8
1140 %iv.next = add nuw nsw i64 %iv, 1
1141 %exitcond = icmp eq i64 %iv.next, 1000
1142 br i1 %exitcond, label %for.end, label %for.body
1148 define void @pow_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1149 ; SLEEF-NEON-LABEL: define void @pow_f32
1150 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1151 ; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
1153 ; SLEEF-SVE-LABEL: define void @pow_f32
1154 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1155 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
1157 ; ARMPL-NEON-LABEL: define void @pow_f32
1158 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1159 ; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vpowq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
1161 ; ARMPL-SVE-LABEL: define void @pow_f32
1162 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1163 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svpow_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
1169 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1170 %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1171 %in = load float, ptr %in.gep, align 8
1172 %call = tail call float @llvm.pow.f32(float %in, float %in)
1173 %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1174 store float %call, ptr %out.gep, align 4
1175 %iv.next = add nuw nsw i64 %iv, 1
1176 %exitcond = icmp eq i64 %iv.next, 1000
1177 br i1 %exitcond, label %for.end, label %for.body
1183 declare double @llvm.rint.f64(double)
1184 declare float @llvm.rint.f32(float)
1186 define void @rint_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1187 ; SLEEF-NEON-LABEL: define void @rint_f64
1188 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1189 ; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1191 ; SLEEF-SVE-LABEL: define void @rint_f64
1192 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1193 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1195 ; ARMPL-NEON-LABEL: define void @rint_f64
1196 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1197 ; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1199 ; ARMPL-SVE-LABEL: define void @rint_f64
1200 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1201 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1207 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1208 %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1209 %in = load double, ptr %in.gep, align 8
1210 %call = tail call double @llvm.rint.f64(double %in)
1211 %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1212 store double %call, ptr %out.gep, align 8
1213 %iv.next = add nuw nsw i64 %iv, 1
1214 %exitcond = icmp eq i64 %iv.next, 1000
1215 br i1 %exitcond, label %for.end, label %for.body
1221 define void @rint_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1222 ; SLEEF-NEON-LABEL: define void @rint_f32
1223 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1224 ; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1226 ; SLEEF-SVE-LABEL: define void @rint_f32
1227 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1228 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1230 ; ARMPL-NEON-LABEL: define void @rint_f32
1231 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1232 ; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1234 ; ARMPL-SVE-LABEL: define void @rint_f32
1235 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1236 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1242 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1243 %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1244 %in = load float, ptr %in.gep, align 8
1245 %call = tail call float @llvm.rint.f32(float %in)
1246 %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1247 store float %call, ptr %out.gep, align 4
1248 %iv.next = add nuw nsw i64 %iv, 1
1249 %exitcond = icmp eq i64 %iv.next, 1000
1250 br i1 %exitcond, label %for.end, label %for.body
1256 declare double @llvm.round.f64(double)
1257 declare float @llvm.round.f32(float)
1259 define void @round_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1260 ; SLEEF-NEON-LABEL: define void @round_f64
1261 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1262 ; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1264 ; SLEEF-SVE-LABEL: define void @round_f64
1265 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1266 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1268 ; ARMPL-NEON-LABEL: define void @round_f64
1269 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1270 ; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1272 ; ARMPL-SVE-LABEL: define void @round_f64
1273 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1274 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1280 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1281 %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1282 %in = load double, ptr %in.gep, align 8
1283 %call = tail call double @llvm.round.f64(double %in)
1284 %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1285 store double %call, ptr %out.gep, align 8
1286 %iv.next = add nuw nsw i64 %iv, 1
1287 %exitcond = icmp eq i64 %iv.next, 1000
1288 br i1 %exitcond, label %for.end, label %for.body
1294 define void @round_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1295 ; SLEEF-NEON-LABEL: define void @round_f32
1296 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1297 ; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1299 ; SLEEF-SVE-LABEL: define void @round_f32
1300 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1301 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1303 ; ARMPL-NEON-LABEL: define void @round_f32
1304 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1305 ; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1307 ; ARMPL-SVE-LABEL: define void @round_f32
1308 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1309 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1315 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1316 %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1317 %in = load float, ptr %in.gep, align 8
1318 %call = tail call float @llvm.round.f32(float %in)
1319 %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1320 store float %call, ptr %out.gep, align 4
1321 %iv.next = add nuw nsw i64 %iv, 1
1322 %exitcond = icmp eq i64 %iv.next, 1000
1323 br i1 %exitcond, label %for.end, label %for.body
1329 declare double @llvm.sin.f64(double)
1330 declare float @llvm.sin.f32(float)
1332 define void @sin_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1333 ; SLEEF-NEON-LABEL: define void @sin_f64
1334 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1335 ; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]])
1337 ; SLEEF-SVE-LABEL: define void @sin_f64
1338 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1339 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
1341 ; ARMPL-NEON-LABEL: define void @sin_f64
1342 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1343 ; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vsinq_f64(<2 x double> [[WIDE_LOAD:%.*]])
1345 ; ARMPL-SVE-LABEL: define void @sin_f64
1346 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1347 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svsin_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
1353 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1354 %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1355 %in = load double, ptr %in.gep, align 8
1356 %call = tail call double @llvm.sin.f64(double %in)
1357 %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1358 store double %call, ptr %out.gep, align 8
1359 %iv.next = add nuw nsw i64 %iv, 1
1360 %exitcond = icmp eq i64 %iv.next, 1000
1361 br i1 %exitcond, label %for.end, label %for.body
1367 define void @sin_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1368 ; SLEEF-NEON-LABEL: define void @sin_f32
1369 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1370 ; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[WIDE_LOAD:%.*]])
1372 ; SLEEF-SVE-LABEL: define void @sin_f32
1373 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1374 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
1376 ; ARMPL-NEON-LABEL: define void @sin_f32
1377 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1378 ; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vsinq_f32(<4 x float> [[WIDE_LOAD:%.*]])
1380 ; ARMPL-SVE-LABEL: define void @sin_f32
1381 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1382 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svsin_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
1388 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1389 %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1390 %in = load float, ptr %in.gep, align 8
1391 %call = tail call float @llvm.sin.f32(float %in)
1392 %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1393 store float %call, ptr %out.gep, align 4
1394 %iv.next = add nuw nsw i64 %iv, 1
1395 %exitcond = icmp eq i64 %iv.next, 1000
1396 br i1 %exitcond, label %for.end, label %for.body
1402 declare double @llvm.sqrt.f64(double)
1403 declare float @llvm.sqrt.f32(float)
1405 define void @sqrt_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1406 ; SLEEF-NEON-LABEL: define void @sqrt_f64
1407 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1408 ; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1410 ; SLEEF-SVE-LABEL: define void @sqrt_f64
1411 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1412 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1414 ; ARMPL-NEON-LABEL: define void @sqrt_f64
1415 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1416 ; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1418 ; ARMPL-SVE-LABEL: define void @sqrt_f64
1419 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1420 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1426 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1427 %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1428 %in = load double, ptr %in.gep, align 8
1429 %call = tail call double @llvm.sqrt.f64(double %in)
1430 %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1431 store double %call, ptr %out.gep, align 8
1432 %iv.next = add nuw nsw i64 %iv, 1
1433 %exitcond = icmp eq i64 %iv.next, 1000
1434 br i1 %exitcond, label %for.end, label %for.body
1440 define void @sqrt_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1441 ; SLEEF-NEON-LABEL: define void @sqrt_f32
1442 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1443 ; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1445 ; SLEEF-SVE-LABEL: define void @sqrt_f32
1446 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1447 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1449 ; ARMPL-NEON-LABEL: define void @sqrt_f32
1450 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1451 ; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1453 ; ARMPL-SVE-LABEL: define void @sqrt_f32
1454 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1455 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1461 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1462 %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1463 %in = load float, ptr %in.gep, align 8
1464 %call = tail call float @llvm.sqrt.f32(float %in)
1465 %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1466 store float %call, ptr %out.gep, align 4
1467 %iv.next = add nuw nsw i64 %iv, 1
1468 %exitcond = icmp eq i64 %iv.next, 1000
1469 br i1 %exitcond, label %for.end, label %for.body
1475 declare double @llvm.trunc.f64(double)
1476 declare float @llvm.trunc.f32(float)
1478 define void @trunc_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1479 ; SLEEF-NEON-LABEL: define void @trunc_f64
1480 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1481 ; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1483 ; SLEEF-SVE-LABEL: define void @trunc_f64
1484 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1485 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1487 ; ARMPL-NEON-LABEL: define void @trunc_f64
1488 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1489 ; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1491 ; ARMPL-SVE-LABEL: define void @trunc_f64
1492 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1493 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1499 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1500 %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1501 %in = load double, ptr %in.gep, align 8
1502 %call = tail call double @llvm.trunc.f64(double %in)
1503 %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1504 store double %call, ptr %out.gep, align 8
1505 %iv.next = add nuw nsw i64 %iv, 1
1506 %exitcond = icmp eq i64 %iv.next, 1000
1507 br i1 %exitcond, label %for.end, label %for.body
1513 define void @trunc_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1514 ; SLEEF-NEON-LABEL: define void @trunc_f32
1515 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1516 ; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1518 ; SLEEF-SVE-LABEL: define void @trunc_f32
1519 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1520 ; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1522 ; ARMPL-NEON-LABEL: define void @trunc_f32
1523 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1524 ; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1526 ; ARMPL-SVE-LABEL: define void @trunc_f32
1527 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1528 ; ARMPL-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1534 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1535 %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1536 %in = load float, ptr %in.gep, align 8
1537 %call = tail call float @llvm.trunc.f32(float %in)
1538 %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1539 store float %call, ptr %out.gep, align 4
1540 %iv.next = add nuw nsw i64 %iv, 1
1541 %exitcond = icmp eq i64 %iv.next, 1000
1542 br i1 %exitcond, label %for.end, label %for.body