1 ; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s
2 ; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -mattr=+avx512f -S < %s | FileCheck %s --check-prefix=CHECK-AVX512-VF8
3 ; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=16 -force-vector-interleave=1 -mattr=+avx512f -S < %s | FileCheck %s --check-prefix=CHECK-AVX512-VF16
5 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
6 target triple = "x86_64-unknown-linux-gnu"
8 declare double @sin(double) #0
9 declare float @sinf(float) #0
10 declare double @llvm.sin.f64(double) #0
11 declare float @llvm.sin.f32(float) #0
13 declare double @cos(double) #0
14 declare float @cosf(float) #0
15 declare double @llvm.cos.f64(double) #0
16 declare float @llvm.cos.f32(float) #0
18 declare double @tan(double) #0
19 declare float @tanf(float) #0
20 declare double @llvm.tan.f64(double) #0
21 declare float @llvm.tan.f32(float) #0
23 declare double @pow(double, double) #0
24 declare float @powf(float, float) #0
25 declare double @llvm.pow.f64(double, double) #0
26 declare float @llvm.pow.f32(float, float) #0
28 declare double @exp(double) #0
29 declare float @expf(float) #0
30 declare double @llvm.exp.f64(double) #0
31 declare float @llvm.exp.f32(float) #0
33 declare double @log(double) #0
34 declare float @logf(float) #0
35 declare double @llvm.log.f64(double) #0
36 declare float @llvm.log.f32(float) #0
38 declare double @log2(double) #0
39 declare float @log2f(float) #0
40 declare double @llvm.log2.f64(double) #0
41 declare float @llvm.log2.f32(float) #0
43 declare double @log10(double) #0
44 declare float @log10f(float) #0
45 declare double @llvm.log10.f64(double) #0
46 declare float @llvm.log10.f32(float) #0
48 declare double @sqrt(double) #0
49 declare float @sqrtf(float) #0
51 declare double @exp2(double) #0
52 declare float @exp2f(float) #0
53 declare double @llvm.exp2.f64(double) #0
54 declare float @llvm.exp2.f32(float) #0
56 define void @sin_f64(ptr nocapture %varray) {
57 ; CHECK-LABEL: @sin_f64(
58 ; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_sin(<4 x double> [[TMP4:%.*]])
61 ; CHECK-AVX512-VF8-LABEL: @sin_f64(
62 ; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_sin(<8 x double> [[TMP4:%.*]])
63 ; CHECK-AVX512-VF8: ret void
69 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
70 %tmp = trunc i64 %iv to i32
71 %conv = sitofp i32 %tmp to double
72 %call = tail call double @sin(double %conv)
73 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
74 store double %call, ptr %arrayidx, align 4
75 %iv.next = add nuw nsw i64 %iv, 1
76 %exitcond = icmp eq i64 %iv.next, 1000
77 br i1 %exitcond, label %for.end, label %for.body
83 define void @sin_f32(ptr nocapture %varray) {
84 ; CHECK-LABEL: @sin_f32(
85 ; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_sinf(<4 x float> [[TMP4:%.*]])
88 ; CHECK-AVX512-VF16-LABEL: @sin_f32(
89 ; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_sinf(<16 x float> [[TMP4:%.*]])
90 ; CHECK-AVX512-VF16: ret void
96 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
97 %tmp = trunc i64 %iv to i32
98 %conv = sitofp i32 %tmp to float
99 %call = tail call float @sinf(float %conv)
100 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
101 store float %call, ptr %arrayidx, align 4
102 %iv.next = add nuw nsw i64 %iv, 1
103 %exitcond = icmp eq i64 %iv.next, 1000
104 br i1 %exitcond, label %for.end, label %for.body
110 define void @sin_f64_intrinsic(ptr nocapture %varray) {
111 ; CHECK-LABEL: @sin_f64_intrinsic(
112 ; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_sin(<4 x double> [[TMP4:%.*]])
115 ; CHECK-AVX512-VF8-LABEL: @sin_f64_intrinsic(
116 ; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_sin(<8 x double> [[TMP4:%.*]])
117 ; CHECK-AVX512-VF8: ret void
123 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
124 %tmp = trunc i64 %iv to i32
125 %conv = sitofp i32 %tmp to double
126 %call = tail call double @llvm.sin.f64(double %conv)
127 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
128 store double %call, ptr %arrayidx, align 4
129 %iv.next = add nuw nsw i64 %iv, 1
130 %exitcond = icmp eq i64 %iv.next, 1000
131 br i1 %exitcond, label %for.end, label %for.body
137 define void @sin_f32_intrinsic(ptr nocapture %varray) {
138 ; CHECK-LABEL: @sin_f32_intrinsic(
139 ; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_sinf(<4 x float> [[TMP4:%.*]])
142 ; CHECK-AVX512-VF16-LABEL: @sin_f32_intrinsic(
143 ; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_sinf(<16 x float> [[TMP4:%.*]])
144 ; CHECK-AVX512-VF16: ret void
150 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
151 %tmp = trunc i64 %iv to i32
152 %conv = sitofp i32 %tmp to float
153 %call = tail call float @llvm.sin.f32(float %conv)
154 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
155 store float %call, ptr %arrayidx, align 4
156 %iv.next = add nuw nsw i64 %iv, 1
157 %exitcond = icmp eq i64 %iv.next, 1000
158 br i1 %exitcond, label %for.end, label %for.body
164 define void @cos_f64(ptr nocapture %varray) {
165 ; CHECK-LABEL: @cos_f64(
166 ; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_cos(<4 x double> [[TMP4:%.*]])
169 ; CHECK-AVX512-VF8-LABEL: @cos_f64(
170 ; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_cos(<8 x double> [[TMP4:%.*]])
171 ; CHECK-AVX512-VF8: ret void
177 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
178 %tmp = trunc i64 %iv to i32
179 %conv = sitofp i32 %tmp to double
180 %call = tail call double @cos(double %conv)
181 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
182 store double %call, ptr %arrayidx, align 4
183 %iv.next = add nuw nsw i64 %iv, 1
184 %exitcond = icmp eq i64 %iv.next, 1000
185 br i1 %exitcond, label %for.end, label %for.body
191 define void @cos_f32(ptr nocapture %varray) {
192 ; CHECK-LABEL: @cos_f32(
193 ; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_cosf(<4 x float> [[TMP4:%.*]])
196 ; CHECK-AVX512-VF16-LABEL: @cos_f32(
197 ; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_cosf(<16 x float> [[TMP4:%.*]])
198 ; CHECK-AVX512-VF16: ret void
204 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
205 %tmp = trunc i64 %iv to i32
206 %conv = sitofp i32 %tmp to float
207 %call = tail call float @cosf(float %conv)
208 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
209 store float %call, ptr %arrayidx, align 4
210 %iv.next = add nuw nsw i64 %iv, 1
211 %exitcond = icmp eq i64 %iv.next, 1000
212 br i1 %exitcond, label %for.end, label %for.body
218 define void @cos_f64_intrinsic(ptr nocapture %varray) {
219 ; CHECK-LABEL: @cos_f64_intrinsic(
220 ; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_cos(<4 x double> [[TMP4:%.*]])
223 ; CHECK-AVX512-VF8-LABEL: @cos_f64_intrinsic(
224 ; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_cos(<8 x double> [[TMP4:%.*]])
225 ; CHECK-AVX512-VF8: ret void
231 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
232 %tmp = trunc i64 %iv to i32
233 %conv = sitofp i32 %tmp to double
234 %call = tail call double @llvm.cos.f64(double %conv)
235 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
236 store double %call, ptr %arrayidx, align 4
237 %iv.next = add nuw nsw i64 %iv, 1
238 %exitcond = icmp eq i64 %iv.next, 1000
239 br i1 %exitcond, label %for.end, label %for.body
245 define void @cos_f32_intrinsic(ptr nocapture %varray) {
246 ; CHECK-LABEL: @cos_f32_intrinsic(
247 ; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_cosf(<4 x float> [[TMP4:%.*]])
250 ; CHECK-AVX512-VF16-LABEL: @cos_f32_intrinsic(
251 ; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_cosf(<16 x float> [[TMP4:%.*]])
252 ; CHECK-AVX512-VF16: ret void
258 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
259 %tmp = trunc i64 %iv to i32
260 %conv = sitofp i32 %tmp to float
261 %call = tail call float @llvm.cos.f32(float %conv)
262 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
263 store float %call, ptr %arrayidx, align 4
264 %iv.next = add nuw nsw i64 %iv, 1
265 %exitcond = icmp eq i64 %iv.next, 1000
266 br i1 %exitcond, label %for.end, label %for.body
272 define void @tan_f64(ptr nocapture %varray) {
273 ; CHECK-LABEL: @tan_f64(
274 ; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]])
277 ; CHECK-AVX512-VF8-LABEL: @tan_f64(
278 ; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]])
279 ; CHECK-AVX512-VF8: ret void
285 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
286 %tmp = trunc i64 %iv to i32
287 %conv = sitofp i32 %tmp to double
288 %call = tail call double @tan(double %conv)
289 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
290 store double %call, ptr %arrayidx, align 4
291 %iv.next = add nuw nsw i64 %iv, 1
292 %exitcond = icmp eq i64 %iv.next, 1000
293 br i1 %exitcond, label %for.end, label %for.body
299 define void @tan_f32(ptr nocapture %varray) {
300 ; CHECK-LABEL: @tan_f32(
301 ; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]])
304 ; CHECK-AVX512-VF16-LABEL: @tan_f32(
305 ; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]])
306 ; CHECK-AVX512-VF16: ret void
312 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
313 %tmp = trunc i64 %iv to i32
314 %conv = sitofp i32 %tmp to float
315 %call = tail call float @tanf(float %conv)
316 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
317 store float %call, ptr %arrayidx, align 4
318 %iv.next = add nuw nsw i64 %iv, 1
319 %exitcond = icmp eq i64 %iv.next, 1000
320 br i1 %exitcond, label %for.end, label %for.body
326 define void @tan_f64_intrinsic(ptr nocapture %varray) {
327 ; CHECK-LABEL: @tan_f64_intrinsic(
328 ; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]])
331 ; CHECK-AVX512-VF8-LABEL: @tan_f64_intrinsic(
332 ; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]])
333 ; CHECK-AVX512-VF8: ret void
339 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
340 %tmp = trunc i64 %iv to i32
341 %conv = sitofp i32 %tmp to double
342 %call = tail call double @llvm.tan.f64(double %conv)
343 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
344 store double %call, ptr %arrayidx, align 4
345 %iv.next = add nuw nsw i64 %iv, 1
346 %exitcond = icmp eq i64 %iv.next, 1000
347 br i1 %exitcond, label %for.end, label %for.body
353 define void @tan_f32_intrinsic(ptr nocapture %varray) {
354 ; CHECK-LABEL: @tan_f32_intrinsic(
355 ; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]])
358 ; CHECK-AVX512-VF16-LABEL: @tan_f32_intrinsic(
359 ; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]])
360 ; CHECK-AVX512-VF16: ret void
366 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
367 %tmp = trunc i64 %iv to i32
368 %conv = sitofp i32 %tmp to float
369 %call = tail call float @llvm.tan.f32(float %conv)
370 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
371 store float %call, ptr %arrayidx, align 4
372 %iv.next = add nuw nsw i64 %iv, 1
373 %exitcond = icmp eq i64 %iv.next, 1000
374 br i1 %exitcond, label %for.end, label %for.body
380 define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
381 ; CHECK-LABEL: @pow_f64(
382 ; CHECK: [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
385 ; CHECK-AVX512-VF8-LABEL: @pow_f64(
386 ; CHECK-AVX512-VF8: [[TMP8:%.*]] = call <8 x double> @amd_vrd8_pow(<8 x double> [[TMP4:%.*]], <8 x double> [[WIDE_LOAD:%.*]])
387 ; CHECK-AVX512-VF8: ret void
393 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
394 %tmp = trunc i64 %iv to i32
395 %conv = sitofp i32 %tmp to double
396 %arrayidx = getelementptr inbounds double, ptr %exp, i64 %iv
397 %tmp1 = load double, ptr %arrayidx, align 4
398 %tmp2 = tail call double @pow(double %conv, double %tmp1)
399 %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %iv
400 store double %tmp2, ptr %arrayidx2, align 4
401 %iv.next = add nuw nsw i64 %iv, 1
402 %exitcond = icmp eq i64 %iv.next, 1000
403 br i1 %exitcond, label %for.end, label %for.body
409 define void @pow_f64_intrinsic(ptr nocapture %varray, ptr nocapture readonly %exp) {
410 ; CHECK-LABEL: @pow_f64_intrinsic(
411 ; CHECK: [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
414 ; CHECK-AVX512-VF8-LABEL: @pow_f64_intrinsic(
415 ; CHECK-AVX512-VF8: [[TMP8:%.*]] = call <8 x double> @amd_vrd8_pow(<8 x double> [[TMP4:%.*]], <8 x double> [[WIDE_LOAD:%.*]])
416 ; CHECK-AVX512-VF8: ret void
422 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
423 %tmp = trunc i64 %iv to i32
424 %conv = sitofp i32 %tmp to double
425 %arrayidx = getelementptr inbounds double, ptr %exp, i64 %iv
426 %tmp1 = load double, ptr %arrayidx, align 4
427 %tmp2 = tail call double @llvm.pow.f64(double %conv, double %tmp1)
428 %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %iv
429 store double %tmp2, ptr %arrayidx2, align 4
430 %iv.next = add nuw nsw i64 %iv, 1
431 %exitcond = icmp eq i64 %iv.next, 1000
432 br i1 %exitcond, label %for.end, label %for.body
438 define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) {
439 ; CHECK-LABEL: @pow_f32(
440 ; CHECK: [[TMP8:%.*]] = call <4 x float> @amd_vrs4_powf(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
443 ; CHECK-AVX512-VF16-LABEL: @pow_f32(
444 ; CHECK-AVX512-VF16: [[TMP8:%.*]] = call <16 x float> @amd_vrs16_powf(<16 x float> [[TMP4:%.*]], <16 x float> [[WIDE_LOAD:%.*]])
445 ; CHECK-AVX512-VF16: ret void
451 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
452 %tmp = trunc i64 %iv to i32
453 %conv = sitofp i32 %tmp to float
454 %arrayidx = getelementptr inbounds float, ptr %exp, i64 %iv
455 %tmp1 = load float, ptr %arrayidx, align 4
456 %tmp2 = tail call float @powf(float %conv, float %tmp1)
457 %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %iv
458 store float %tmp2, ptr %arrayidx2, align 4
459 %iv.next = add nuw nsw i64 %iv, 1
460 %exitcond = icmp eq i64 %iv.next, 1000
461 br i1 %exitcond, label %for.end, label %for.body
467 define void @pow_f32_intrinsic(ptr nocapture %varray, ptr nocapture readonly %exp) {
468 ; CHECK-LABEL: @pow_f32_intrinsic(
469 ; CHECK: [[TMP8:%.*]] = call <4 x float> @amd_vrs4_powf(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
472 ; CHECK-AVX512-VF16-LABEL: @pow_f32_intrinsic(
473 ; CHECK-AVX512-VF16: [[TMP8:%.*]] = call <16 x float> @amd_vrs16_powf(<16 x float> [[TMP4:%.*]], <16 x float> [[WIDE_LOAD:%.*]])
474 ; CHECK-AVX512-VF16: ret void
480 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
481 %tmp = trunc i64 %iv to i32
482 %conv = sitofp i32 %tmp to float
483 %arrayidx = getelementptr inbounds float, ptr %exp, i64 %iv
484 %tmp1 = load float, ptr %arrayidx, align 4
485 %tmp2 = tail call float @llvm.pow.f32(float %conv, float %tmp1)
486 %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %iv
487 store float %tmp2, ptr %arrayidx2, align 4
488 %iv.next = add nuw nsw i64 %iv, 1
489 %exitcond = icmp eq i64 %iv.next, 1000
490 br i1 %exitcond, label %for.end, label %for.body
496 define void @exp_f64(ptr nocapture %varray) {
497 ; CHECK-LABEL: @exp_f64(
498 ; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp(<4 x double> [[TMP4:%.*]])
501 ; CHECK-AVX512-VF8-LABEL: @exp_f64(
502 ; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp(<8 x double> [[TMP4:%.*]])
503 ; CHECK-AVX512-VF8: ret void
509 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
510 %tmp = trunc i64 %iv to i32
511 %conv = sitofp i32 %tmp to double
512 %call = tail call double @exp(double %conv)
513 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
514 store double %call, ptr %arrayidx, align 4
515 %iv.next = add nuw nsw i64 %iv, 1
516 %exitcond = icmp eq i64 %iv.next, 1000
517 br i1 %exitcond, label %for.end, label %for.body
523 define void @exp_f32(ptr nocapture %varray) {
524 ; CHECK-LABEL: @exp_f32(
525 ; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_expf(<4 x float> [[TMP4:%.*]])
528 ; CHECK-AVX512-VF16-LABEL: @exp_f32(
529 ; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_expf(<16 x float> [[TMP4:%.*]])
530 ; CHECK-AVX512-VF16: ret void
536 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
537 %tmp = trunc i64 %iv to i32
538 %conv = sitofp i32 %tmp to float
539 %call = tail call float @expf(float %conv)
540 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
541 store float %call, ptr %arrayidx, align 4
542 %iv.next = add nuw nsw i64 %iv, 1
543 %exitcond = icmp eq i64 %iv.next, 1000
544 br i1 %exitcond, label %for.end, label %for.body
550 define void @exp_f64_intrinsic(ptr nocapture %varray) {
551 ; CHECK-LABEL: @exp_f64_intrinsic(
552 ; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp(<4 x double> [[TMP4:%.*]])
555 ; CHECK-AVX512-VF8-LABEL: @exp_f64_intrinsic(
556 ; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp(<8 x double> [[TMP4:%.*]])
557 ; CHECK-AVX512-VF8: ret void
563 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
564 %tmp = trunc i64 %iv to i32
565 %conv = sitofp i32 %tmp to double
566 %call = tail call double @llvm.exp.f64(double %conv)
567 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
568 store double %call, ptr %arrayidx, align 4
569 %iv.next = add nuw nsw i64 %iv, 1
570 %exitcond = icmp eq i64 %iv.next, 1000
571 br i1 %exitcond, label %for.end, label %for.body
577 define void @exp_f32_intrinsic(ptr nocapture %varray) {
578 ; CHECK-LABEL: @exp_f32_intrinsic(
579 ; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_expf(<4 x float> [[TMP4:%.*]])
582 ; CHECK-AVX512-VF16-LABEL: @exp_f32_intrinsic(
583 ; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_expf(<16 x float> [[TMP4:%.*]])
584 ; CHECK-AVX512-VF16: ret void
590 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
591 %tmp = trunc i64 %iv to i32
592 %conv = sitofp i32 %tmp to float
593 %call = tail call float @llvm.exp.f32(float %conv)
594 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
595 store float %call, ptr %arrayidx, align 4
596 %iv.next = add nuw nsw i64 %iv, 1
597 %exitcond = icmp eq i64 %iv.next, 1000
598 br i1 %exitcond, label %for.end, label %for.body
604 define void @log_f64(ptr nocapture %varray) {
605 ; CHECK-LABEL: @log_f64(
606 ; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log(<4 x double> [[TMP4:%.*]])
609 ; CHECK-AVX512-VF8-LABEL: @log_f64(
610 ; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log(<8 x double> [[TMP4:%.*]])
611 ; CHECK-AVX512-VF8: ret void
617 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
618 %tmp = trunc i64 %iv to i32
619 %conv = sitofp i32 %tmp to double
620 %call = tail call double @log(double %conv)
621 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
622 store double %call, ptr %arrayidx, align 4
623 %iv.next = add nuw nsw i64 %iv, 1
624 %exitcond = icmp eq i64 %iv.next, 1000
625 br i1 %exitcond, label %for.end, label %for.body
631 define void @log_f32(ptr nocapture %varray) {
632 ; CHECK-LABEL: @log_f32(
633 ; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_logf(<4 x float> [[TMP4:%.*]])
636 ; CHECK-AVX512-VF16-LABEL: @log_f32(
637 ; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_logf(<16 x float> [[TMP4:%.*]])
638 ; CHECK-AVX512-VF16: ret void
644 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
645 %tmp = trunc i64 %iv to i32
646 %conv = sitofp i32 %tmp to float
647 %call = tail call float @logf(float %conv)
648 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
649 store float %call, ptr %arrayidx, align 4
650 %iv.next = add nuw nsw i64 %iv, 1
651 %exitcond = icmp eq i64 %iv.next, 1000
652 br i1 %exitcond, label %for.end, label %for.body
658 define void @log_f64_intrinsic(ptr nocapture %varray) {
659 ; CHECK-LABEL: @log_f64_intrinsic(
660 ; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log(<4 x double> [[TMP4:%.*]])
663 ; CHECK-AVX512-VF8-LABEL: @log_f64_intrinsic(
664 ; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log(<8 x double> [[TMP4:%.*]])
665 ; CHECK-AVX512-VF8: ret void
671 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
672 %tmp = trunc i64 %iv to i32
673 %conv = sitofp i32 %tmp to double
674 %call = tail call double @llvm.log.f64(double %conv)
675 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
676 store double %call, ptr %arrayidx, align 4
677 %iv.next = add nuw nsw i64 %iv, 1
678 %exitcond = icmp eq i64 %iv.next, 1000
679 br i1 %exitcond, label %for.end, label %for.body
685 define void @log_f32_intrinsic(ptr nocapture %varray) {
686 ; CHECK-LABEL: @log_f32_intrinsic(
687 ; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_logf(<4 x float> [[TMP4:%.*]])
690 ; CHECK-AVX512-VF16-LABEL: @log_f32_intrinsic(
691 ; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_logf(<16 x float> [[TMP4:%.*]])
692 ; CHECK-AVX512-VF16: ret void
698 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
699 %tmp = trunc i64 %iv to i32
700 %conv = sitofp i32 %tmp to float
701 %call = tail call float @llvm.log.f32(float %conv)
702 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
703 store float %call, ptr %arrayidx, align 4
704 %iv.next = add nuw nsw i64 %iv, 1
705 %exitcond = icmp eq i64 %iv.next, 1000
706 br i1 %exitcond, label %for.end, label %for.body
712 define void @log2_f64(ptr nocapture %varray) {
713 ; CHECK-LABEL: @log2_f64(
714 ; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log2(<4 x double> [[TMP4:%.*]])
717 ; CHECK-AVX512-VF8-LABEL: @log2_f64(
718 ; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log2(<8 x double> [[TMP4:%.*]])
719 ; CHECK-AVX512-VF8: ret void
725 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
726 %tmp = trunc i64 %iv to i32
727 %conv = sitofp i32 %tmp to double
728 %call = tail call double @log2(double %conv)
729 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
730 store double %call, ptr %arrayidx, align 4
731 %iv.next = add nuw nsw i64 %iv, 1
732 %exitcond = icmp eq i64 %iv.next, 1000
733 br i1 %exitcond, label %for.end, label %for.body
739 define void @log2_f32(ptr nocapture %varray) {
740 ; CHECK-LABEL: @log2_f32(
741 ; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log2f(<4 x float> [[TMP4:%.*]])
744 ; CHECK-AVX512-VF16-LABEL: @log2_f32(
745 ; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log2f(<16 x float> [[TMP4:%.*]])
746 ; CHECK-AVX512-VF16: ret void
752 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
753 %tmp = trunc i64 %iv to i32
754 %conv = sitofp i32 %tmp to float
755 %call = tail call float @log2f(float %conv)
756 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
757 store float %call, ptr %arrayidx, align 4
758 %iv.next = add nuw nsw i64 %iv, 1
759 %exitcond = icmp eq i64 %iv.next, 1000
760 br i1 %exitcond, label %for.end, label %for.body
766 define void @log2_f64_intrinsic(ptr nocapture %varray) {
767 ; CHECK-LABEL: @log2_f64_intrinsic(
768 ; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log2(<4 x double> [[TMP4:%.*]])
771 ; CHECK-AVX512-VF8-LABEL: @log2_f64_intrinsic(
772 ; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log2(<8 x double> [[TMP4:%.*]])
773 ; CHECK-AVX512-VF8: ret void
779 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
780 %tmp = trunc i64 %iv to i32
781 %conv = sitofp i32 %tmp to double
782 %call = tail call double @llvm.log2.f64(double %conv)
783 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
784 store double %call, ptr %arrayidx, align 4
785 %iv.next = add nuw nsw i64 %iv, 1
786 %exitcond = icmp eq i64 %iv.next, 1000
787 br i1 %exitcond, label %for.end, label %for.body
793 define void @log2_f32_intrinsic(ptr nocapture %varray) {
794 ; CHECK-LABEL: @log2_f32_intrinsic(
795 ; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log2f(<4 x float> [[TMP4:%.*]])
798 ; CHECK-AVX512-VF16-LABEL: @log2_f32_intrinsic(
799 ; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log2f(<16 x float> [[TMP4:%.*]])
800 ; CHECK-AVX512-VF16: ret void
806 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
807 %tmp = trunc i64 %iv to i32
808 %conv = sitofp i32 %tmp to float
809 %call = tail call float @llvm.log2.f32(float %conv)
810 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
811 store float %call, ptr %arrayidx, align 4
812 %iv.next = add nuw nsw i64 %iv, 1
813 %exitcond = icmp eq i64 %iv.next, 1000
814 br i1 %exitcond, label %for.end, label %for.body
820 define void @log10_f32(ptr nocapture %varray) {
821 ; CHECK-LABEL: @log10_f32(
822 ; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log10f(<4 x float> [[TMP4:%.*]])
825 ; CHECK-AVX512-VF16-LABEL: @log10_f32(
826 ; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log10f(<16 x float> [[TMP4:%.*]])
827 ; CHECK-AVX512-VF16: ret void
833 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
834 %tmp = trunc i64 %iv to i32
835 %conv = sitofp i32 %tmp to float
836 %call = tail call float @log10f(float %conv)
837 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
838 store float %call, ptr %arrayidx, align 4
839 %iv.next = add nuw nsw i64 %iv, 1
840 %exitcond = icmp eq i64 %iv.next, 1000
841 br i1 %exitcond, label %for.end, label %for.body
847 define void @log10_f32_intrinsic(ptr nocapture %varray) {
848 ; CHECK-LABEL: @log10_f32_intrinsic(
849 ; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log10f(<4 x float> [[TMP4:%.*]])
852 ; CHECK-AVX512-VF16-LABEL: @log10_f32_intrinsic(
853 ; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log10f(<16 x float> [[TMP4:%.*]])
854 ; CHECK-AVX512-VF16: ret void
860 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
861 %tmp = trunc i64 %iv to i32
862 %conv = sitofp i32 %tmp to float
863 %call = tail call float @llvm.log10.f32(float %conv)
864 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
865 store float %call, ptr %arrayidx, align 4
866 %iv.next = add nuw nsw i64 %iv, 1
867 %exitcond = icmp eq i64 %iv.next, 1000
868 br i1 %exitcond, label %for.end, label %for.body
874 define void @exp2_f64(ptr nocapture %varray) {
875 ; CHECK-LABEL: @exp2_f64(
876 ; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp2(<4 x double> [[TMP4:%.*]])
879 ; CHECK-AVX512-VF8-LABEL: @exp2_f64(
880 ; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp2(<8 x double> [[TMP4:%.*]])
881 ; CHECK-AVX512-VF8: ret void
887 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
888 %tmp = trunc i64 %iv to i32
889 %conv = sitofp i32 %tmp to double
890 %call = tail call double @exp2(double %conv)
891 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
892 store double %call, ptr %arrayidx, align 4
893 %iv.next = add nuw nsw i64 %iv, 1
894 %exitcond = icmp eq i64 %iv.next, 1000
895 br i1 %exitcond, label %for.end, label %for.body
901 define void @exp2_f32(ptr nocapture %varray) {
902 ; CHECK-LABEL: @exp2_f32(
903 ; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp2f(<4 x float> [[TMP4:%.*]])
906 ; CHECK-AVX512-VF16-LABEL: @exp2_f32(
907 ; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_exp2f(<16 x float> [[TMP4:%.*]])
908 ; CHECK-AVX512-VF16: ret void
914 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
915 %tmp = trunc i64 %iv to i32
916 %conv = sitofp i32 %tmp to float
917 %call = tail call float @exp2f(float %conv)
918 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
919 store float %call, ptr %arrayidx, align 4
920 %iv.next = add nuw nsw i64 %iv, 1
921 %exitcond = icmp eq i64 %iv.next, 1000
922 br i1 %exitcond, label %for.end, label %for.body
928 define void @exp2_f64_intrinsic(ptr nocapture %varray) {
929 ; CHECK-LABEL: @exp2_f64_intrinsic(
930 ; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp2(<4 x double> [[TMP4:%.*]])
933 ; CHECK-AVX512-VF8-LABEL: @exp2_f64_intrinsic(
934 ; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp2(<8 x double> [[TMP4:%.*]])
935 ; CHECK-AVX512-VF8: ret void
941 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
942 %tmp = trunc i64 %iv to i32
943 %conv = sitofp i32 %tmp to double
944 %call = tail call double @llvm.exp2.f64(double %conv)
945 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
946 store double %call, ptr %arrayidx, align 4
947 %iv.next = add nuw nsw i64 %iv, 1
948 %exitcond = icmp eq i64 %iv.next, 1000
949 br i1 %exitcond, label %for.end, label %for.body
955 define void @exp2_f32_intrinsic(ptr nocapture %varray) {
956 ; CHECK-LABEL: @exp2_f32_intrinsic(
957 ; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp2f(<4 x float> [[TMP4:%.*]])
960 ; CHECK-AVX512-VF16-LABEL: @exp2_f32_intrinsic(
961 ; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_exp2f(<16 x float> [[TMP4:%.*]])
962 ; CHECK-AVX512-VF16: ret void
968 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
969 %tmp = trunc i64 %iv to i32
970 %conv = sitofp i32 %tmp to float
971 %call = tail call float @llvm.exp2.f32(float %conv)
972 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
973 store float %call, ptr %arrayidx, align 4
974 %iv.next = add nuw nsw i64 %iv, 1
975 %exitcond = icmp eq i64 %iv.next, 1000
976 br i1 %exitcond, label %for.end, label %for.body
982 attributes #0 = { nounwind readnone }