1 ; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VF4
2 ; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VF2
3 ; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -mattr=+avx512f -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VF8
4 ; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=16 -force-vector-interleave=1 -mattr=+avx512f -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VF16
6 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
7 target triple = "x86_64-unknown-linux-gnu"
9 declare double @sin(double) #0
10 declare float @sinf(float) #0
11 declare double @llvm.sin.f64(double) #0
12 declare float @llvm.sin.f32(float) #0
14 declare double @cos(double) #0
15 declare float @cosf(float) #0
16 declare double @llvm.cos.f64(double) #0
17 declare float @llvm.cos.f32(float) #0
19 declare double @tan(double) #0
20 declare float @tanf(float) #0
21 declare double @llvm.tan.f64(double) #0
22 declare float @llvm.tan.f32(float) #0
24 declare double @acos(double) #0
25 declare float @acosf(float) #0
26 declare double @llvm.acos.f64(double) #0
27 declare float @llvm.acos.f32(float) #0
29 declare double @asin(double) #0
30 declare float @asinf(float) #0
31 declare double @llvm.asin.f64(double) #0
32 declare float @llvm.asin.f32(float) #0
34 declare double @atan(double) #0
35 declare float @atanf(float) #0
36 declare double @llvm.atan.f64(double) #0
37 declare float @llvm.atan.f32(float) #0
39 declare double @sinh(double) #0
40 declare float @sinhf(float) #0
41 declare double @llvm.sinh.f64(double) #0
42 declare float @llvm.sinh.f32(float) #0
44 declare double @cosh(double) #0
45 declare float @coshf(float) #0
46 declare double @llvm.cosh.f64(double) #0
47 declare float @llvm.cosh.f32(float) #0
49 declare double @tanh(double) #0
50 declare float @tanhf(float) #0
51 declare double @llvm.tanh.f64(double) #0
52 declare float @llvm.tanh.f32(float) #0
54 declare double @pow(double, double) #0
55 declare float @powf(float, float) #0
56 declare double @llvm.pow.f64(double, double) #0
57 declare float @llvm.pow.f32(float, float) #0
59 declare double @exp(double) #0
60 declare float @expf(float) #0
61 declare double @llvm.exp.f64(double) #0
62 declare float @llvm.exp.f32(float) #0
64 declare double @log(double) #0
65 declare float @logf(float) #0
66 declare double @llvm.log.f64(double) #0
67 declare float @llvm.log.f32(float) #0
69 declare double @log2(double) #0
70 declare float @log2f(float) #0
71 declare double @llvm.log2.f64(double) #0
72 declare float @llvm.log2.f32(float) #0
74 declare double @log10(double) #0
75 declare float @log10f(float) #0
76 declare double @llvm.log10.f64(double) #0
77 declare float @llvm.log10.f32(float) #0
79 declare double @sqrt(double) #0
80 declare float @sqrtf(float) #0
82 declare double @exp2(double) #0
83 declare float @exp2f(float) #0
84 declare double @llvm.exp2.f64(double) #0
85 declare float @llvm.exp2.f32(float) #0
87 define void @sin_f64(ptr nocapture %varray) {
88 ; CHECK-LABEL: @sin_f64(
89 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_sin(<2 x double> [[TMP4:%.*]])
90 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_sin(<4 x double> [[TMP4:%.*]])
91 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_sin(<8 x double> [[TMP4:%.*]])
92 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.sin.v16f64(<16 x double> [[TMP4:%.*]])
99 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
100 %tmp = trunc i64 %iv to i32
101 %conv = sitofp i32 %tmp to double
102 %call = tail call double @sin(double %conv)
103 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
104 store double %call, ptr %arrayidx, align 4
105 %iv.next = add nuw nsw i64 %iv, 1
106 %exitcond = icmp eq i64 %iv.next, 1000
107 br i1 %exitcond, label %for.end, label %for.body
113 define void @sin_f32(ptr nocapture %varray) {
114 ; CHECK-LABEL: @sin_f32(
115 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP4:%.*]])
116 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_sinf(<4 x float> [[TMP4:%.*]])
117 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_sinf(<8 x float> [[TMP4:%.*]])
118 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_sinf(<16 x float> [[TMP4:%.*]])
125 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
126 %tmp = trunc i64 %iv to i32
127 %conv = sitofp i32 %tmp to float
128 %call = tail call float @sinf(float %conv)
129 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
130 store float %call, ptr %arrayidx, align 4
131 %iv.next = add nuw nsw i64 %iv, 1
132 %exitcond = icmp eq i64 %iv.next, 1000
133 br i1 %exitcond, label %for.end, label %for.body
139 define void @sin_f64_intrinsic(ptr nocapture %varray) {
140 ; CHECK-LABEL: @sin_f64_intrinsic(
141 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_sin(<2 x double> [[TMP4:%.*]])
142 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_sin(<4 x double> [[TMP4:%.*]])
143 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_sin(<8 x double> [[TMP4:%.*]])
144 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.sin.v16f64(<16 x double> [[TMP4:%.*]])
151 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
152 %tmp = trunc i64 %iv to i32
153 %conv = sitofp i32 %tmp to double
154 %call = tail call double @llvm.sin.f64(double %conv)
155 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
156 store double %call, ptr %arrayidx, align 4
157 %iv.next = add nuw nsw i64 %iv, 1
158 %exitcond = icmp eq i64 %iv.next, 1000
159 br i1 %exitcond, label %for.end, label %for.body
165 define void @sin_f32_intrinsic(ptr nocapture %varray) {
166 ; CHECK-LABEL: @sin_f32_intrinsic(
167 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP4:%.*]])
168 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_sinf(<4 x float> [[TMP4:%.*]])
169 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_sinf(<8 x float> [[TMP4:%.*]])
170 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_sinf(<16 x float> [[TMP4:%.*]])
177 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
178 %tmp = trunc i64 %iv to i32
179 %conv = sitofp i32 %tmp to float
180 %call = tail call float @llvm.sin.f32(float %conv)
181 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
182 store float %call, ptr %arrayidx, align 4
183 %iv.next = add nuw nsw i64 %iv, 1
184 %exitcond = icmp eq i64 %iv.next, 1000
185 br i1 %exitcond, label %for.end, label %for.body
191 define void @cos_f64(ptr nocapture %varray) {
192 ; CHECK-LABEL: @cos_f64(
193 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cos(<2 x double> [[TMP4:%.*]])
194 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_cos(<4 x double> [[TMP4:%.*]])
195 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_cos(<8 x double> [[TMP4:%.*]])
196 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.cos.v16f64(<16 x double> [[TMP4:%.*]])
203 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
204 %tmp = trunc i64 %iv to i32
205 %conv = sitofp i32 %tmp to double
206 %call = tail call double @cos(double %conv)
207 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
208 store double %call, ptr %arrayidx, align 4
209 %iv.next = add nuw nsw i64 %iv, 1
210 %exitcond = icmp eq i64 %iv.next, 1000
211 br i1 %exitcond, label %for.end, label %for.body
217 define void @cos_f32(ptr nocapture %varray) {
218 ; CHECK-LABEL: @cos_f32(
219 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP4:%.*]])
220 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_cosf(<4 x float> [[TMP4:%.*]])
221 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_cosf(<8 x float> [[TMP4:%.*]])
222 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_cosf(<16 x float> [[TMP4:%.*]])
229 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
230 %tmp = trunc i64 %iv to i32
231 %conv = sitofp i32 %tmp to float
232 %call = tail call float @cosf(float %conv)
233 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
234 store float %call, ptr %arrayidx, align 4
235 %iv.next = add nuw nsw i64 %iv, 1
236 %exitcond = icmp eq i64 %iv.next, 1000
237 br i1 %exitcond, label %for.end, label %for.body
243 define void @cos_f64_intrinsic(ptr nocapture %varray) {
244 ; CHECK-LABEL: @cos_f64_intrinsic(
245 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cos(<2 x double> [[TMP4:%.*]])
246 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_cos(<4 x double> [[TMP4:%.*]])
247 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_cos(<8 x double> [[TMP4:%.*]])
248 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.cos.v16f64(<16 x double> [[TMP4:%.*]])
255 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
256 %tmp = trunc i64 %iv to i32
257 %conv = sitofp i32 %tmp to double
258 %call = tail call double @llvm.cos.f64(double %conv)
259 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
260 store double %call, ptr %arrayidx, align 4
261 %iv.next = add nuw nsw i64 %iv, 1
262 %exitcond = icmp eq i64 %iv.next, 1000
263 br i1 %exitcond, label %for.end, label %for.body
269 define void @cos_f32_intrinsic(ptr nocapture %varray) {
270 ; CHECK-LABEL: @cos_f32_intrinsic(
271 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP4:%.*]])
272 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_cosf(<4 x float> [[TMP4:%.*]])
273 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_cosf(<8 x float> [[TMP4:%.*]])
274 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_cosf(<16 x float> [[TMP4:%.*]])
281 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
282 %tmp = trunc i64 %iv to i32
283 %conv = sitofp i32 %tmp to float
284 %call = tail call float @llvm.cos.f32(float %conv)
285 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
286 store float %call, ptr %arrayidx, align 4
287 %iv.next = add nuw nsw i64 %iv, 1
288 %exitcond = icmp eq i64 %iv.next, 1000
289 br i1 %exitcond, label %for.end, label %for.body
295 define void @tan_f64(ptr nocapture %varray) {
296 ; CHECK-LABEL: @tan_f64(
297 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_tan(<2 x double> [[TMP4:%.*]])
298 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]])
299 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]])
300 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.tan.v16f64(<16 x double> [[TMP4:%.*]])
307 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
308 %tmp = trunc i64 %iv to i32
309 %conv = sitofp i32 %tmp to double
310 %call = tail call double @tan(double %conv)
311 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
312 store double %call, ptr %arrayidx, align 4
313 %iv.next = add nuw nsw i64 %iv, 1
314 %exitcond = icmp eq i64 %iv.next, 1000
315 br i1 %exitcond, label %for.end, label %for.body
321 define void @tan_f32(ptr nocapture %varray) {
322 ; CHECK-LABEL: @tan_f32(
323 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP4:%.*]])
324 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]])
325 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanf(<8 x float> [[TMP4:%.*]])
326 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]])
333 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
334 %tmp = trunc i64 %iv to i32
335 %conv = sitofp i32 %tmp to float
336 %call = tail call float @tanf(float %conv)
337 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
338 store float %call, ptr %arrayidx, align 4
339 %iv.next = add nuw nsw i64 %iv, 1
340 %exitcond = icmp eq i64 %iv.next, 1000
341 br i1 %exitcond, label %for.end, label %for.body
347 define void @tan_f64_intrinsic(ptr nocapture %varray) {
348 ; CHECK-LABEL: @tan_f64_intrinsic(
349 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_tan(<2 x double> [[TMP4:%.*]])
350 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]])
351 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]])
352 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.tan.v16f64(<16 x double> [[TMP4:%.*]])
359 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
360 %tmp = trunc i64 %iv to i32
361 %conv = sitofp i32 %tmp to double
362 %call = tail call double @llvm.tan.f64(double %conv)
363 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
364 store double %call, ptr %arrayidx, align 4
365 %iv.next = add nuw nsw i64 %iv, 1
366 %exitcond = icmp eq i64 %iv.next, 1000
367 br i1 %exitcond, label %for.end, label %for.body
373 define void @tan_f32_intrinsic(ptr nocapture %varray) {
374 ; CHECK-LABEL: @tan_f32_intrinsic(
375 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP4:%.*]])
376 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]])
377 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanf(<8 x float> [[TMP4:%.*]])
378 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]])
385 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
386 %tmp = trunc i64 %iv to i32
387 %conv = sitofp i32 %tmp to float
388 %call = tail call float @llvm.tan.f32(float %conv)
389 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
390 store float %call, ptr %arrayidx, align 4
391 %iv.next = add nuw nsw i64 %iv, 1
392 %exitcond = icmp eq i64 %iv.next, 1000
393 br i1 %exitcond, label %for.end, label %for.body
399 define void @acos_f64(ptr nocapture %varray) {
400 ; CHECK-LABEL: @acos_f64(
401 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.acos.v2f64(<2 x double> [[TMP4:%.*]])
402 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.acos.v4f64(<4 x double> [[TMP4:%.*]])
403 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.acos.v8f64(<8 x double> [[TMP4:%.*]])
404 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.acos.v16f64(<16 x double> [[TMP4:%.*]])
411 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
412 %tmp = trunc i64 %iv to i32
413 %conv = sitofp i32 %tmp to double
414 %call = tail call double @acos(double %conv)
415 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
416 store double %call, ptr %arrayidx, align 4
417 %iv.next = add nuw nsw i64 %iv, 1
418 %exitcond = icmp eq i64 %iv.next, 1000
419 br i1 %exitcond, label %for.end, label %for.body
425 define void @acos_f32(ptr nocapture %varray) {
426 ; CHECK-LABEL: @acos_f32(
427 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP4:%.*]])
428 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_acosf(<4 x float> [[TMP4:%.*]])
429 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_acosf(<8 x float> [[TMP4:%.*]])
430 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_acosf(<16 x float> [[TMP4:%.*]])
437 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
438 %tmp = trunc i64 %iv to i32
439 %conv = sitofp i32 %tmp to float
440 %call = tail call float @acosf(float %conv)
441 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
442 store float %call, ptr %arrayidx, align 4
443 %iv.next = add nuw nsw i64 %iv, 1
444 %exitcond = icmp eq i64 %iv.next, 1000
445 br i1 %exitcond, label %for.end, label %for.body
451 define void @acos_f64_intrinsic(ptr nocapture %varray) {
452 ; CHECK-LABEL: @acos_f64_intrinsic(
453 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.acos.v2f64(<2 x double> [[TMP4:%.*]])
454 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.acos.v4f64(<4 x double> [[TMP4:%.*]])
455 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.acos.v8f64(<8 x double> [[TMP4:%.*]])
456 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.acos.v16f64(<16 x double> [[TMP4:%.*]])
463 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
464 %tmp = trunc i64 %iv to i32
465 %conv = sitofp i32 %tmp to double
466 %call = tail call double @llvm.acos.f64(double %conv)
467 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
468 store double %call, ptr %arrayidx, align 4
469 %iv.next = add nuw nsw i64 %iv, 1
470 %exitcond = icmp eq i64 %iv.next, 1000
471 br i1 %exitcond, label %for.end, label %for.body
477 define void @acos_f32_intrinsic(ptr nocapture %varray) {
478 ; CHECK-LABEL: @acos_f32_intrinsic(
479 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP4:%.*]])
480 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_acosf(<4 x float> [[TMP4:%.*]])
481 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_acosf(<8 x float> [[TMP4:%.*]])
482 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_acosf(<16 x float> [[TMP4:%.*]])
489 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
490 %tmp = trunc i64 %iv to i32
491 %conv = sitofp i32 %tmp to float
492 %call = tail call float @llvm.acos.f32(float %conv)
493 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
494 store float %call, ptr %arrayidx, align 4
495 %iv.next = add nuw nsw i64 %iv, 1
496 %exitcond = icmp eq i64 %iv.next, 1000
497 br i1 %exitcond, label %for.end, label %for.body
503 define void @asin_f64(ptr nocapture %varray) {
504 ; CHECK-LABEL: @asin_f64(
505 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.asin.v2f64(<2 x double> [[TMP4:%.*]])
506 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.asin.v4f64(<4 x double> [[TMP4:%.*]])
507 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_asin(<8 x double> [[TMP4:%.*]])
508 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.asin.v16f64(<16 x double> [[TMP4:%.*]])
515 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
516 %tmp = trunc i64 %iv to i32
517 %conv = sitofp i32 %tmp to double
518 %call = tail call double @asin(double %conv)
519 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
520 store double %call, ptr %arrayidx, align 4
521 %iv.next = add nuw nsw i64 %iv, 1
522 %exitcond = icmp eq i64 %iv.next, 1000
523 br i1 %exitcond, label %for.end, label %for.body
529 define void @asin_f32(ptr nocapture %varray) {
530 ; CHECK-LABEL: @asin_f32(
531 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP4:%.*]])
532 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_asinf(<4 x float> [[TMP4:%.*]])
533 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_asinf(<8 x float> [[TMP4:%.*]])
534 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_asinf(<16 x float> [[TMP4:%.*]])
541 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
542 %tmp = trunc i64 %iv to i32
543 %conv = sitofp i32 %tmp to float
544 %call = tail call float @asinf(float %conv)
545 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
546 store float %call, ptr %arrayidx, align 4
547 %iv.next = add nuw nsw i64 %iv, 1
548 %exitcond = icmp eq i64 %iv.next, 1000
549 br i1 %exitcond, label %for.end, label %for.body
555 define void @asin_f64_intrinsic(ptr nocapture %varray) {
556 ; CHECK-LABEL: @asin_f64_intrinsic(
557 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.asin.v2f64(<2 x double> [[TMP4:%.*]])
558 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.asin.v4f64(<4 x double> [[TMP4:%.*]])
559 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_asin(<8 x double> [[TMP4:%.*]])
560 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.asin.v16f64(<16 x double> [[TMP4:%.*]])
567 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
568 %tmp = trunc i64 %iv to i32
569 %conv = sitofp i32 %tmp to double
570 %call = tail call double @llvm.asin.f64(double %conv)
571 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
572 store double %call, ptr %arrayidx, align 4
573 %iv.next = add nuw nsw i64 %iv, 1
574 %exitcond = icmp eq i64 %iv.next, 1000
575 br i1 %exitcond, label %for.end, label %for.body
581 define void @asin_f32_intrinsic(ptr nocapture %varray) {
582 ; CHECK-LABEL: @asin_f32_intrinsic(
583 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP4:%.*]])
584 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_asinf(<4 x float> [[TMP4:%.*]])
585 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_asinf(<8 x float> [[TMP4:%.*]])
586 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_asinf(<16 x float> [[TMP4:%.*]])
593 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
594 %tmp = trunc i64 %iv to i32
595 %conv = sitofp i32 %tmp to float
596 %call = tail call float @llvm.asin.f32(float %conv)
597 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
598 store float %call, ptr %arrayidx, align 4
599 %iv.next = add nuw nsw i64 %iv, 1
600 %exitcond = icmp eq i64 %iv.next, 1000
601 br i1 %exitcond, label %for.end, label %for.body
607 define void @atan_f64(ptr nocapture %varray) {
608 ; CHECK-LABEL: @atan_f64(
609 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_atan(<2 x double> [[TMP4:%.*]])
610 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_atan(<4 x double> [[TMP4:%.*]])
611 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_atan(<8 x double> [[TMP4:%.*]])
612 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.atan.v16f64(<16 x double> [[TMP4:%.*]])
619 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
620 %tmp = trunc i64 %iv to i32
621 %conv = sitofp i32 %tmp to double
622 %call = tail call double @atan(double %conv)
623 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
624 store double %call, ptr %arrayidx, align 4
625 %iv.next = add nuw nsw i64 %iv, 1
626 %exitcond = icmp eq i64 %iv.next, 1000
627 br i1 %exitcond, label %for.end, label %for.body
633 define void @atan_f32(ptr nocapture %varray) {
634 ; CHECK-LABEL: @atan_f32(
635 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP4:%.*]])
636 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_atanf(<4 x float> [[TMP4:%.*]])
637 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_atanf(<8 x float> [[TMP4:%.*]])
638 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_atanf(<16 x float> [[TMP4:%.*]])
645 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
646 %tmp = trunc i64 %iv to i32
647 %conv = sitofp i32 %tmp to float
648 %call = tail call float @atanf(float %conv)
649 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
650 store float %call, ptr %arrayidx, align 4
651 %iv.next = add nuw nsw i64 %iv, 1
652 %exitcond = icmp eq i64 %iv.next, 1000
653 br i1 %exitcond, label %for.end, label %for.body
659 define void @atan_f64_intrinsic(ptr nocapture %varray) {
660 ; CHECK-LABEL: @atan_f64_intrinsic(
661 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_atan(<2 x double> [[TMP4:%.*]])
662 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_atan(<4 x double> [[TMP4:%.*]])
663 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_atan(<8 x double> [[TMP4:%.*]])
664 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.atan.v16f64(<16 x double> [[TMP4:%.*]])
671 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
672 %tmp = trunc i64 %iv to i32
673 %conv = sitofp i32 %tmp to double
674 %call = tail call double @llvm.atan.f64(double %conv)
675 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
676 store double %call, ptr %arrayidx, align 4
677 %iv.next = add nuw nsw i64 %iv, 1
678 %exitcond = icmp eq i64 %iv.next, 1000
679 br i1 %exitcond, label %for.end, label %for.body
685 define void @atan_f32_intrinsic(ptr nocapture %varray) {
686 ; CHECK-LABEL: @atan_f32_intrinsic(
687 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP4:%.*]])
688 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_atanf(<4 x float> [[TMP4:%.*]])
689 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_atanf(<8 x float> [[TMP4:%.*]])
690 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_atanf(<16 x float> [[TMP4:%.*]])
697 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
698 %tmp = trunc i64 %iv to i32
699 %conv = sitofp i32 %tmp to float
700 %call = tail call float @llvm.atan.f32(float %conv)
701 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
702 store float %call, ptr %arrayidx, align 4
703 %iv.next = add nuw nsw i64 %iv, 1
704 %exitcond = icmp eq i64 %iv.next, 1000
705 br i1 %exitcond, label %for.end, label %for.body
711 define void @sinh_f64(ptr nocapture %varray) {
712 ; CHECK-LABEL: @sinh_f64(
713 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.sinh.v2f64(<2 x double> [[TMP4:%.*]])
714 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.sinh.v4f64(<4 x double> [[TMP4:%.*]])
715 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.sinh.v8f64(<8 x double> [[TMP4:%.*]])
716 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.sinh.v16f64(<16 x double> [[TMP4:%.*]])
723 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
724 %tmp = trunc i64 %iv to i32
725 %conv = sitofp i32 %tmp to double
726 %call = tail call double @sinh(double %conv)
727 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
728 store double %call, ptr %arrayidx, align 4
729 %iv.next = add nuw nsw i64 %iv, 1
730 %exitcond = icmp eq i64 %iv.next, 1000
731 br i1 %exitcond, label %for.end, label %for.body
737 define void @sinh_f32(ptr nocapture %varray) {
738 ; CHECK-LABEL: @sinh_f32(
739 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP4:%.*]])
740 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @llvm.sinh.v4f32(<4 x float> [[TMP4:%.*]])
741 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @llvm.sinh.v8f32(<8 x float> [[TMP4:%.*]])
742 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @llvm.sinh.v16f32(<16 x float> [[TMP4:%.*]])
749 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
750 %tmp = trunc i64 %iv to i32
751 %conv = sitofp i32 %tmp to float
752 %call = tail call float @sinhf(float %conv)
753 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
754 store float %call, ptr %arrayidx, align 4
755 %iv.next = add nuw nsw i64 %iv, 1
756 %exitcond = icmp eq i64 %iv.next, 1000
757 br i1 %exitcond, label %for.end, label %for.body
763 define void @sinh_f64_intrinsic(ptr nocapture %varray) {
764 ; CHECK-LABEL: @sinh_f64_intrinsic(
765 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.sinh.v2f64(<2 x double> [[TMP4:%.*]])
766 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.sinh.v4f64(<4 x double> [[TMP4:%.*]])
767 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.sinh.v8f64(<8 x double> [[TMP4:%.*]])
768 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.sinh.v16f64(<16 x double> [[TMP4:%.*]])
775 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
776 %tmp = trunc i64 %iv to i32
777 %conv = sitofp i32 %tmp to double
778 %call = tail call double @llvm.sinh.f64(double %conv)
779 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
780 store double %call, ptr %arrayidx, align 4
781 %iv.next = add nuw nsw i64 %iv, 1
782 %exitcond = icmp eq i64 %iv.next, 1000
783 br i1 %exitcond, label %for.end, label %for.body
789 define void @sinh_f32_intrinsic(ptr nocapture %varray) {
790 ; CHECK-LABEL: @sinh_f32_intrinsic(
791 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP4:%.*]])
792 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @llvm.sinh.v4f32(<4 x float> [[TMP4:%.*]])
793 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @llvm.sinh.v8f32(<8 x float> [[TMP4:%.*]])
794 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @llvm.sinh.v16f32(<16 x float> [[TMP4:%.*]])
801 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
802 %tmp = trunc i64 %iv to i32
803 %conv = sitofp i32 %tmp to float
804 %call = tail call float @llvm.sinh.f32(float %conv)
805 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
806 store float %call, ptr %arrayidx, align 4
807 %iv.next = add nuw nsw i64 %iv, 1
808 %exitcond = icmp eq i64 %iv.next, 1000
809 br i1 %exitcond, label %for.end, label %for.body
815 define void @cosh_f64(ptr nocapture %varray) {
816 ; CHECK-LABEL: @cosh_f64(
817 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cosh(<2 x double> [[TMP4:%.*]])
818 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.cosh.v4f64(<4 x double> [[TMP4:%.*]])
819 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.cosh.v8f64(<8 x double> [[TMP4:%.*]])
820 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.cosh.v16f64(<16 x double> [[TMP4:%.*]])
827 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
828 %tmp = trunc i64 %iv to i32
829 %conv = sitofp i32 %tmp to double
830 %call = tail call double @cosh(double %conv)
831 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
832 store double %call, ptr %arrayidx, align 4
833 %iv.next = add nuw nsw i64 %iv, 1
834 %exitcond = icmp eq i64 %iv.next, 1000
835 br i1 %exitcond, label %for.end, label %for.body
841 define void @cosh_f32(ptr nocapture %varray) {
842 ; CHECK-LABEL: @cosh_f32(
843 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP4:%.*]])
844 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]])
845 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_coshf(<8 x float> [[TMP4:%.*]])
846 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @llvm.cosh.v16f32(<16 x float> [[TMP4:%.*]])
853 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
854 %tmp = trunc i64 %iv to i32
855 %conv = sitofp i32 %tmp to float
856 %call = tail call float @coshf(float %conv)
857 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
858 store float %call, ptr %arrayidx, align 4
859 %iv.next = add nuw nsw i64 %iv, 1
860 %exitcond = icmp eq i64 %iv.next, 1000
861 br i1 %exitcond, label %for.end, label %for.body
867 define void @cosh_f64_intrinsic(ptr nocapture %varray) {
868 ; CHECK-LABEL: @cosh_f64_intrinsic(
869 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cosh(<2 x double> [[TMP4:%.*]])
870 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.cosh.v4f64(<4 x double> [[TMP4:%.*]])
871 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.cosh.v8f64(<8 x double> [[TMP4:%.*]])
872 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.cosh.v16f64(<16 x double> [[TMP4:%.*]])
879 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
880 %tmp = trunc i64 %iv to i32
881 %conv = sitofp i32 %tmp to double
882 %call = tail call double @llvm.cosh.f64(double %conv)
883 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
884 store double %call, ptr %arrayidx, align 4
885 %iv.next = add nuw nsw i64 %iv, 1
886 %exitcond = icmp eq i64 %iv.next, 1000
887 br i1 %exitcond, label %for.end, label %for.body
893 define void @cosh_f32_intrinsic(ptr nocapture %varray) {
894 ; CHECK-LABEL: @cosh_f32_intrinsic(
895 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP4:%.*]])
896 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]])
897 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_coshf(<8 x float> [[TMP4:%.*]])
898 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @llvm.cosh.v16f32(<16 x float> [[TMP4:%.*]])
905 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
906 %tmp = trunc i64 %iv to i32
907 %conv = sitofp i32 %tmp to float
908 %call = tail call float @llvm.cosh.f32(float %conv)
909 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
910 store float %call, ptr %arrayidx, align 4
911 %iv.next = add nuw nsw i64 %iv, 1
912 %exitcond = icmp eq i64 %iv.next, 1000
913 br i1 %exitcond, label %for.end, label %for.body
919 define void @tanh_f64(ptr nocapture %varray) {
920 ; CHECK-LABEL: @tanh_f64(
921 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.tanh.v2f64(<2 x double> [[TMP4:%.*]])
922 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.tanh.v4f64(<4 x double> [[TMP4:%.*]])
923 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.tanh.v8f64(<8 x double> [[TMP4:%.*]])
924 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.tanh.v16f64(<16 x double> [[TMP4:%.*]])
931 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
932 %tmp = trunc i64 %iv to i32
933 %conv = sitofp i32 %tmp to double
934 %call = tail call double @tanh(double %conv)
935 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
936 store double %call, ptr %arrayidx, align 4
937 %iv.next = add nuw nsw i64 %iv, 1
938 %exitcond = icmp eq i64 %iv.next, 1000
939 br i1 %exitcond, label %for.end, label %for.body
945 define void @tanh_f32(ptr nocapture %varray) {
946 ; CHECK-LABEL: @tanh_f32(
947 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP4:%.*]])
948 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanhf(<4 x float> [[TMP4:%.*]])
949 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanhf(<8 x float> [[TMP4:%.*]])
950 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanhf(<16 x float> [[TMP4:%.*]])
957 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
958 %tmp = trunc i64 %iv to i32
959 %conv = sitofp i32 %tmp to float
960 %call = tail call float @tanhf(float %conv)
961 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
962 store float %call, ptr %arrayidx, align 4
963 %iv.next = add nuw nsw i64 %iv, 1
964 %exitcond = icmp eq i64 %iv.next, 1000
965 br i1 %exitcond, label %for.end, label %for.body
971 define void @tanh_f64_intrinsic(ptr nocapture %varray) {
972 ; CHECK-LABEL: @tanh_f64_intrinsic(
973 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.tanh.v2f64(<2 x double> [[TMP4:%.*]])
974 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.tanh.v4f64(<4 x double> [[TMP4:%.*]])
975 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.tanh.v8f64(<8 x double> [[TMP4:%.*]])
976 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.tanh.v16f64(<16 x double> [[TMP4:%.*]])
983 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
984 %tmp = trunc i64 %iv to i32
985 %conv = sitofp i32 %tmp to double
986 %call = tail call double @llvm.tanh.f64(double %conv)
987 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
988 store double %call, ptr %arrayidx, align 4
989 %iv.next = add nuw nsw i64 %iv, 1
990 %exitcond = icmp eq i64 %iv.next, 1000
991 br i1 %exitcond, label %for.end, label %for.body
997 define void @tanh_f32_intrinsic(ptr nocapture %varray) {
998 ; CHECK-LABEL: @tanh_f32_intrinsic(
999 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP4:%.*]])
1000 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanhf(<4 x float> [[TMP4:%.*]])
1001 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanhf(<8 x float> [[TMP4:%.*]])
1002 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanhf(<16 x float> [[TMP4:%.*]])
1009 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1010 %tmp = trunc i64 %iv to i32
1011 %conv = sitofp i32 %tmp to float
1012 %call = tail call float @llvm.tanh.f32(float %conv)
1013 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1014 store float %call, ptr %arrayidx, align 4
1015 %iv.next = add nuw nsw i64 %iv, 1
1016 %exitcond = icmp eq i64 %iv.next, 1000
1017 br i1 %exitcond, label %for.end, label %for.body
1023 define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
1024 ; CHECK-LABEL: @pow_f64(
1025 ; CHECK-VF2: [[TMP8:%.*]] = call <2 x double> @amd_vrd2_pow(<2 x double> [[TMP4:%.*]], <2 x double> [[WIDE_LOAD:%.*]])
1026 ; CHECK-VF4: [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
1027 ; CHECK-VF8: [[TMP8:%.*]] = call <8 x double> @amd_vrd8_pow(<8 x double> [[TMP4:%.*]], <8 x double> [[WIDE_LOAD:%.*]])
1028 ; CHECK-VF16: [[TMP8:%.*]] = call <16 x double> @llvm.pow.v16f64(<16 x double> [[TMP4:%.*]], <16 x double> [[WIDE_LOAD:%.*]])
1035 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1036 %tmp = trunc i64 %iv to i32
1037 %conv = sitofp i32 %tmp to double
1038 %arrayidx = getelementptr inbounds double, ptr %exp, i64 %iv
1039 %tmp1 = load double, ptr %arrayidx, align 4
1040 %tmp2 = tail call double @pow(double %conv, double %tmp1)
1041 %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %iv
1042 store double %tmp2, ptr %arrayidx2, align 4
1043 %iv.next = add nuw nsw i64 %iv, 1
1044 %exitcond = icmp eq i64 %iv.next, 1000
1045 br i1 %exitcond, label %for.end, label %for.body
1051 define void @pow_f64_intrinsic(ptr nocapture %varray, ptr nocapture readonly %exp) {
1052 ; CHECK-LABEL: @pow_f64_intrinsic(
1053 ; CHECK-VF2: [[TMP8:%.*]] = call <2 x double> @amd_vrd2_pow(<2 x double> [[TMP4:%.*]], <2 x double> [[WIDE_LOAD:%.*]])
1054 ; CHECK-VF4: [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
1055 ; CHECK-VF8: [[TMP8:%.*]] = call <8 x double> @amd_vrd8_pow(<8 x double> [[TMP4:%.*]], <8 x double> [[WIDE_LOAD:%.*]])
1056 ; CHECK-VF16: [[TMP8:%.*]] = call <16 x double> @llvm.pow.v16f64(<16 x double> [[TMP4:%.*]], <16 x double> [[WIDE_LOAD:%.*]])
1063 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1064 %tmp = trunc i64 %iv to i32
1065 %conv = sitofp i32 %tmp to double
1066 %arrayidx = getelementptr inbounds double, ptr %exp, i64 %iv
1067 %tmp1 = load double, ptr %arrayidx, align 4
1068 %tmp2 = tail call double @llvm.pow.f64(double %conv, double %tmp1)
1069 %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %iv
1070 store double %tmp2, ptr %arrayidx2, align 4
1071 %iv.next = add nuw nsw i64 %iv, 1
1072 %exitcond = icmp eq i64 %iv.next, 1000
1073 br i1 %exitcond, label %for.end, label %for.body
1079 define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) {
1080 ; CHECK-LABEL: @pow_f32(
1081 ; CHECK-VF2: [[TMP8:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[TMP4:%.*]], <2 x float> [[WIDE_LOAD:%.*]])
1082 ; CHECK-VF4: [[TMP8:%.*]] = call <4 x float> @amd_vrs4_powf(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
1083 ; CHECK-VF8: [[TMP8:%.*]] = call <8 x float> @amd_vrs8_powf(<8 x float> [[TMP4:%.*]], <8 x float> [[WIDE_LOAD:%.*]])
1084 ; CHECK-VF16: [[TMP8:%.*]] = call <16 x float> @amd_vrs16_powf(<16 x float> [[TMP4:%.*]], <16 x float> [[WIDE_LOAD:%.*]])
1091 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1092 %tmp = trunc i64 %iv to i32
1093 %conv = sitofp i32 %tmp to float
1094 %arrayidx = getelementptr inbounds float, ptr %exp, i64 %iv
1095 %tmp1 = load float, ptr %arrayidx, align 4
1096 %tmp2 = tail call float @powf(float %conv, float %tmp1)
1097 %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %iv
1098 store float %tmp2, ptr %arrayidx2, align 4
1099 %iv.next = add nuw nsw i64 %iv, 1
1100 %exitcond = icmp eq i64 %iv.next, 1000
1101 br i1 %exitcond, label %for.end, label %for.body
1107 define void @pow_f32_intrinsic(ptr nocapture %varray, ptr nocapture readonly %exp) {
1108 ; CHECK-LABEL: @pow_f32_intrinsic(
1109 ; CHECK-VF2: [[TMP8:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[TMP4:%.*]], <2 x float> [[WIDE_LOAD:%.*]])
1110 ; CHECK-VF4: [[TMP8:%.*]] = call <4 x float> @amd_vrs4_powf(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
1111 ; CHECK-VF8: [[TMP8:%.*]] = call <8 x float> @amd_vrs8_powf(<8 x float> [[TMP4:%.*]], <8 x float> [[WIDE_LOAD:%.*]])
1112 ; CHECK-VF16: [[TMP8:%.*]] = call <16 x float> @amd_vrs16_powf(<16 x float> [[TMP4:%.*]], <16 x float> [[WIDE_LOAD:%.*]])
1119 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1120 %tmp = trunc i64 %iv to i32
1121 %conv = sitofp i32 %tmp to float
1122 %arrayidx = getelementptr inbounds float, ptr %exp, i64 %iv
1123 %tmp1 = load float, ptr %arrayidx, align 4
1124 %tmp2 = tail call float @llvm.pow.f32(float %conv, float %tmp1)
1125 %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %iv
1126 store float %tmp2, ptr %arrayidx2, align 4
1127 %iv.next = add nuw nsw i64 %iv, 1
1128 %exitcond = icmp eq i64 %iv.next, 1000
1129 br i1 %exitcond, label %for.end, label %for.body
1135 define void @exp_f64(ptr nocapture %varray) {
1136 ; CHECK-LABEL: @exp_f64(
1137 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp(<2 x double> [[TMP4:%.*]])
1138 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp(<4 x double> [[TMP4:%.*]])
1139 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp(<8 x double> [[TMP4:%.*]])
1140 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.exp.v16f64(<16 x double> [[TMP4:%.*]])
1147 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1148 %tmp = trunc i64 %iv to i32
1149 %conv = sitofp i32 %tmp to double
1150 %call = tail call double @exp(double %conv)
1151 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1152 store double %call, ptr %arrayidx, align 4
1153 %iv.next = add nuw nsw i64 %iv, 1
1154 %exitcond = icmp eq i64 %iv.next, 1000
1155 br i1 %exitcond, label %for.end, label %for.body
1161 define void @exp_f32(ptr nocapture %varray) {
1162 ; CHECK-LABEL: @exp_f32(
1163 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP4:%.*]])
1164 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_expf(<4 x float> [[TMP4:%.*]])
1165 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_expf(<8 x float> [[TMP4:%.*]])
1166 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_expf(<16 x float> [[TMP4:%.*]])
1173 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1174 %tmp = trunc i64 %iv to i32
1175 %conv = sitofp i32 %tmp to float
1176 %call = tail call float @expf(float %conv)
1177 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1178 store float %call, ptr %arrayidx, align 4
1179 %iv.next = add nuw nsw i64 %iv, 1
1180 %exitcond = icmp eq i64 %iv.next, 1000
1181 br i1 %exitcond, label %for.end, label %for.body
1187 define void @exp_f64_intrinsic(ptr nocapture %varray) {
1188 ; CHECK-LABEL: @exp_f64_intrinsic(
1189 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp(<2 x double> [[TMP4:%.*]])
1190 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp(<4 x double> [[TMP4:%.*]])
1191 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp(<8 x double> [[TMP4:%.*]])
1192 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.exp.v16f64(<16 x double> [[TMP4:%.*]])
1199 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1200 %tmp = trunc i64 %iv to i32
1201 %conv = sitofp i32 %tmp to double
1202 %call = tail call double @llvm.exp.f64(double %conv)
1203 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1204 store double %call, ptr %arrayidx, align 4
1205 %iv.next = add nuw nsw i64 %iv, 1
1206 %exitcond = icmp eq i64 %iv.next, 1000
1207 br i1 %exitcond, label %for.end, label %for.body
1213 define void @exp_f32_intrinsic(ptr nocapture %varray) {
1214 ; CHECK-LABEL: @exp_f32_intrinsic(
1215 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP4:%.*]])
1216 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_expf(<4 x float> [[TMP4:%.*]])
1217 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_expf(<8 x float> [[TMP4:%.*]])
1218 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_expf(<16 x float> [[TMP4:%.*]])
1225 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1226 %tmp = trunc i64 %iv to i32
1227 %conv = sitofp i32 %tmp to float
1228 %call = tail call float @llvm.exp.f32(float %conv)
1229 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1230 store float %call, ptr %arrayidx, align 4
1231 %iv.next = add nuw nsw i64 %iv, 1
1232 %exitcond = icmp eq i64 %iv.next, 1000
1233 br i1 %exitcond, label %for.end, label %for.body
1239 define void @log_f64(ptr nocapture %varray) {
1240 ; CHECK-LABEL: @log_f64(
1241 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log(<2 x double> [[TMP4:%.*]])
1242 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log(<4 x double> [[TMP4:%.*]])
1243 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log(<8 x double> [[TMP4:%.*]])
1244 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.log.v16f64(<16 x double> [[TMP4:%.*]])
1251 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1252 %tmp = trunc i64 %iv to i32
1253 %conv = sitofp i32 %tmp to double
1254 %call = tail call double @log(double %conv)
1255 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1256 store double %call, ptr %arrayidx, align 4
1257 %iv.next = add nuw nsw i64 %iv, 1
1258 %exitcond = icmp eq i64 %iv.next, 1000
1259 br i1 %exitcond, label %for.end, label %for.body
1265 define void @log_f32(ptr nocapture %varray) {
1266 ; CHECK-LABEL: @log_f32(
1267 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.log.v2f32(<2 x float> [[TMP4:%.*]])
1268 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_logf(<4 x float> [[TMP4:%.*]])
1269 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_logf(<8 x float> [[TMP4:%.*]])
1270 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_logf(<16 x float> [[TMP4:%.*]])
1277 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1278 %tmp = trunc i64 %iv to i32
1279 %conv = sitofp i32 %tmp to float
1280 %call = tail call float @logf(float %conv)
1281 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1282 store float %call, ptr %arrayidx, align 4
1283 %iv.next = add nuw nsw i64 %iv, 1
1284 %exitcond = icmp eq i64 %iv.next, 1000
1285 br i1 %exitcond, label %for.end, label %for.body
1291 define void @log_f64_intrinsic(ptr nocapture %varray) {
1292 ; CHECK-LABEL: @log_f64_intrinsic(
1293 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log(<2 x double> [[TMP4:%.*]])
1294 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log(<4 x double> [[TMP4:%.*]])
1295 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log(<8 x double> [[TMP4:%.*]])
1296 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.log.v16f64(<16 x double> [[TMP4:%.*]])
1303 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1304 %tmp = trunc i64 %iv to i32
1305 %conv = sitofp i32 %tmp to double
1306 %call = tail call double @llvm.log.f64(double %conv)
1307 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1308 store double %call, ptr %arrayidx, align 4
1309 %iv.next = add nuw nsw i64 %iv, 1
1310 %exitcond = icmp eq i64 %iv.next, 1000
1311 br i1 %exitcond, label %for.end, label %for.body
1317 define void @log_f32_intrinsic(ptr nocapture %varray) {
1318 ; CHECK-LABEL: @log_f32_intrinsic(
1319 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.log.v2f32(<2 x float> [[TMP4:%.*]])
1320 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_logf(<4 x float> [[TMP4:%.*]])
1321 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_logf(<8 x float> [[TMP4:%.*]])
1322 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_logf(<16 x float> [[TMP4:%.*]])
1329 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1330 %tmp = trunc i64 %iv to i32
1331 %conv = sitofp i32 %tmp to float
1332 %call = tail call float @llvm.log.f32(float %conv)
1333 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1334 store float %call, ptr %arrayidx, align 4
1335 %iv.next = add nuw nsw i64 %iv, 1
1336 %exitcond = icmp eq i64 %iv.next, 1000
1337 br i1 %exitcond, label %for.end, label %for.body
1343 define void @log2_f64(ptr nocapture %varray) {
1344 ; CHECK-LABEL: @log2_f64(
1345 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log2(<2 x double> [[TMP4:%.*]])
1346 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log2(<4 x double> [[TMP4:%.*]])
1347 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log2(<8 x double> [[TMP4:%.*]])
1348 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.log2.v16f64(<16 x double> [[TMP4:%.*]])
1355 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1356 %tmp = trunc i64 %iv to i32
1357 %conv = sitofp i32 %tmp to double
1358 %call = tail call double @log2(double %conv)
1359 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1360 store double %call, ptr %arrayidx, align 4
1361 %iv.next = add nuw nsw i64 %iv, 1
1362 %exitcond = icmp eq i64 %iv.next, 1000
1363 br i1 %exitcond, label %for.end, label %for.body
1369 define void @log2_f32(ptr nocapture %varray) {
1370 ; CHECK-LABEL: @log2_f32(
1371 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.log2.v2f32(<2 x float> [[TMP4:%.*]])
1372 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log2f(<4 x float> [[TMP4:%.*]])
1373 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_log2f(<8 x float> [[TMP4:%.*]])
1374 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log2f(<16 x float> [[TMP4:%.*]])
1381 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1382 %tmp = trunc i64 %iv to i32
1383 %conv = sitofp i32 %tmp to float
1384 %call = tail call float @log2f(float %conv)
1385 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1386 store float %call, ptr %arrayidx, align 4
1387 %iv.next = add nuw nsw i64 %iv, 1
1388 %exitcond = icmp eq i64 %iv.next, 1000
1389 br i1 %exitcond, label %for.end, label %for.body
1395 define void @log2_f64_intrinsic(ptr nocapture %varray) {
1396 ; CHECK-LABEL: @log2_f64_intrinsic(
1397 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log2(<2 x double> [[TMP4:%.*]])
1398 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log2(<4 x double> [[TMP4:%.*]])
1399 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log2(<8 x double> [[TMP4:%.*]])
1400 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.log2.v16f64(<16 x double> [[TMP4:%.*]])
1407 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1408 %tmp = trunc i64 %iv to i32
1409 %conv = sitofp i32 %tmp to double
1410 %call = tail call double @llvm.log2.f64(double %conv)
1411 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1412 store double %call, ptr %arrayidx, align 4
1413 %iv.next = add nuw nsw i64 %iv, 1
1414 %exitcond = icmp eq i64 %iv.next, 1000
1415 br i1 %exitcond, label %for.end, label %for.body
1421 define void @log2_f32_intrinsic(ptr nocapture %varray) {
1422 ; CHECK-LABEL: @log2_f32_intrinsic(
1423 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.log2.v2f32(<2 x float> [[TMP4:%.*]])
1424 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log2f(<4 x float> [[TMP4:%.*]])
1425 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_log2f(<8 x float> [[TMP4:%.*]])
1426 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log2f(<16 x float> [[TMP4:%.*]])
1433 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1434 %tmp = trunc i64 %iv to i32
1435 %conv = sitofp i32 %tmp to float
1436 %call = tail call float @llvm.log2.f32(float %conv)
1437 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1438 store float %call, ptr %arrayidx, align 4
1439 %iv.next = add nuw nsw i64 %iv, 1
1440 %exitcond = icmp eq i64 %iv.next, 1000
1441 br i1 %exitcond, label %for.end, label %for.body
1447 define void @log10_f64(ptr nocapture %varray) {
1448 ; CHECK-LABEL: @log10_f64(
1449 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log10(<2 x double> [[TMP4:%.*]])
1450 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.log10.v4f64(<4 x double> [[TMP4:%.*]])
1451 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.log10.v8f64(<8 x double> [[TMP4:%.*]])
1452 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.log10.v16f64(<16 x double> [[TMP4:%.*]])
1459 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1460 %tmp = trunc i64 %iv to i32
1461 %conv = sitofp i32 %tmp to double
1462 %call = tail call double @log10(double %conv)
1463 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1464 store double %call, ptr %arrayidx, align 4
1465 %iv.next = add nuw nsw i64 %iv, 1
1466 %exitcond = icmp eq i64 %iv.next, 1000
1467 br i1 %exitcond, label %for.end, label %for.body
1473 define void @log10_f32(ptr nocapture %varray) {
1474 ; CHECK-LABEL: @log10_f32(
1475 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.log10.v2f32(<2 x float> [[TMP4:%.*]])
1476 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log10f(<4 x float> [[TMP4:%.*]])
1477 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_log10f(<8 x float> [[TMP4:%.*]])
1478 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log10f(<16 x float> [[TMP4:%.*]])
1485 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1486 %tmp = trunc i64 %iv to i32
1487 %conv = sitofp i32 %tmp to float
1488 %call = tail call float @log10f(float %conv)
1489 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1490 store float %call, ptr %arrayidx, align 4
1491 %iv.next = add nuw nsw i64 %iv, 1
1492 %exitcond = icmp eq i64 %iv.next, 1000
1493 br i1 %exitcond, label %for.end, label %for.body
1499 define void @log10_f64_intrinsic(ptr nocapture %varray) {
1500 ; CHECK-LABEL: @log10_f64_intrinsic(
1501 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log10(<2 x double> [[TMP4:%.*]])
1502 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.log10.v4f64(<4 x double> [[TMP4:%.*]])
1503 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.log10.v8f64(<8 x double> [[TMP4:%.*]])
1504 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.log10.v16f64(<16 x double> [[TMP4:%.*]])
1511 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1512 %tmp = trunc i64 %iv to i32
1513 %conv = sitofp i32 %tmp to double
1514 %call = tail call double @llvm.log10.f64(double %conv)
1515 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1516 store double %call, ptr %arrayidx, align 4
1517 %iv.next = add nuw nsw i64 %iv, 1
1518 %exitcond = icmp eq i64 %iv.next, 1000
1519 br i1 %exitcond, label %for.end, label %for.body
1525 define void @log10_f32_intrinsic(ptr nocapture %varray) {
1526 ; CHECK-LABEL: @log10_f32_intrinsic(
1527 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.log10.v2f32(<2 x float> [[TMP4:%.*]])
1528 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log10f(<4 x float> [[TMP4:%.*]])
1529 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_log10f(<8 x float> [[TMP4:%.*]])
1530 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log10f(<16 x float> [[TMP4:%.*]])
1537 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1538 %tmp = trunc i64 %iv to i32
1539 %conv = sitofp i32 %tmp to float
1540 %call = tail call float @llvm.log10.f32(float %conv)
1541 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1542 store float %call, ptr %arrayidx, align 4
1543 %iv.next = add nuw nsw i64 %iv, 1
1544 %exitcond = icmp eq i64 %iv.next, 1000
1545 br i1 %exitcond, label %for.end, label %for.body
1551 define void @exp2_f64(ptr nocapture %varray) {
1552 ; CHECK-LABEL: @exp2_f64(
1553 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp2(<2 x double> [[TMP4:%.*]])
1554 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp2(<4 x double> [[TMP4:%.*]])
1555 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp2(<8 x double> [[TMP4:%.*]])
1556 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.exp2.v16f64(<16 x double> [[TMP4:%.*]])
1563 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1564 %tmp = trunc i64 %iv to i32
1565 %conv = sitofp i32 %tmp to double
1566 %call = tail call double @exp2(double %conv)
1567 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1568 store double %call, ptr %arrayidx, align 4
1569 %iv.next = add nuw nsw i64 %iv, 1
1570 %exitcond = icmp eq i64 %iv.next, 1000
1571 br i1 %exitcond, label %for.end, label %for.body
1577 define void @exp2_f32(ptr nocapture %varray) {
1578 ; CHECK-LABEL: @exp2_f32(
1579 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[TMP4:%.*]])
1580 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp2f(<4 x float> [[TMP4:%.*]])
1581 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_exp2f(<8 x float> [[TMP4:%.*]])
1582 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_exp2f(<16 x float> [[TMP4:%.*]])
1589 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1590 %tmp = trunc i64 %iv to i32
1591 %conv = sitofp i32 %tmp to float
1592 %call = tail call float @exp2f(float %conv)
1593 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1594 store float %call, ptr %arrayidx, align 4
1595 %iv.next = add nuw nsw i64 %iv, 1
1596 %exitcond = icmp eq i64 %iv.next, 1000
1597 br i1 %exitcond, label %for.end, label %for.body
1603 define void @exp2_f64_intrinsic(ptr nocapture %varray) {
1604 ; CHECK-LABEL: @exp2_f64_intrinsic(
1605 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp2(<2 x double> [[TMP4:%.*]])
1606 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp2(<4 x double> [[TMP4:%.*]])
1607 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp2(<8 x double> [[TMP4:%.*]])
1608 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.exp2.v16f64(<16 x double> [[TMP4:%.*]])
1615 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1616 %tmp = trunc i64 %iv to i32
1617 %conv = sitofp i32 %tmp to double
1618 %call = tail call double @llvm.exp2.f64(double %conv)
1619 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1620 store double %call, ptr %arrayidx, align 4
1621 %iv.next = add nuw nsw i64 %iv, 1
1622 %exitcond = icmp eq i64 %iv.next, 1000
1623 br i1 %exitcond, label %for.end, label %for.body
1629 define void @exp2_f32_intrinsic(ptr nocapture %varray) {
1630 ; CHECK-LABEL: @exp2_f32_intrinsic(
1631 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[TMP4:%.*]])
1632 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp2f(<4 x float> [[TMP4:%.*]])
1633 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_exp2f(<8 x float> [[TMP4:%.*]])
1634 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_exp2f(<16 x float> [[TMP4:%.*]])
1641 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1642 %tmp = trunc i64 %iv to i32
1643 %conv = sitofp i32 %tmp to float
1644 %call = tail call float @llvm.exp2.f32(float %conv)
1645 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1646 store float %call, ptr %arrayidx, align 4
1647 %iv.next = add nuw nsw i64 %iv, 1
1648 %exitcond = icmp eq i64 %iv.next, 1000
1649 br i1 %exitcond, label %for.end, label %for.body
1655 define void @exp10_f64(ptr nocapture %varray) {
1656 ; CHECK-LABEL: @exp10_f64(
1657 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp10(<2 x double> [[TMP4:%.*]])
1658 ; CHECK-VF4: call <4 x double> @llvm.exp10.v4f64(<4 x double> [[TMP4:%.*]])
1659 ; CHECK-VF8: call <8 x double> @llvm.exp10.v8f64(<8 x double> [[TMP4:%.*]])
1660 ; CHECK-VF16: call <16 x double> @llvm.exp10.v16f64(<16 x double> [[TMP4:%.*]])
1667 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1668 %tmp = trunc i64 %iv to i32
1669 %conv = sitofp i32 %tmp to double
1670 %call = tail call double @exp10(double %conv)
1671 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1672 store double %call, ptr %arrayidx, align 4
1673 %iv.next = add nuw nsw i64 %iv, 1
1674 %exitcond = icmp eq i64 %iv.next, 1000
1675 br i1 %exitcond, label %for.end, label %for.body
1681 define void @exp10_f32(ptr nocapture %varray) {
1682 ; CHECK-LABEL: @exp10_f32(
1683 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> [[TMP4:%.*]])
1684 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp10f(<4 x float> [[TMP4:%.*]])
1685 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @llvm.exp10.v8f32(<8 x float> [[TMP4:%.*]])
1686 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @llvm.exp10.v16f32(<16 x float> [[TMP4:%.*]])
1693 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1694 %tmp = trunc i64 %iv to i32
1695 %conv = sitofp i32 %tmp to float
1696 %call = tail call float @exp10f(float %conv)
1697 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1698 store float %call, ptr %arrayidx, align 4
1699 %iv.next = add nuw nsw i64 %iv, 1
1700 %exitcond = icmp eq i64 %iv.next, 1000
1701 br i1 %exitcond, label %for.end, label %for.body
1707 define void @exp10_f64_intrinsic(ptr nocapture %varray) {
1708 ; CHECK-LABEL: @exp10_f64_intrinsic(
1709 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp10(<2 x double> [[TMP4:%.*]])
1710 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.exp10.v4f64(<4 x double> [[TMP4:%.*]])
1711 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.exp10.v8f64(<8 x double> [[TMP4:%.*]])
1712 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.exp10.v16f64(<16 x double> [[TMP4:%.*]])
1719 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1720 %tmp = trunc i64 %iv to i32
1721 %conv = sitofp i32 %tmp to double
1722 %call = tail call double @llvm.exp10.f64(double %conv)
1723 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1724 store double %call, ptr %arrayidx, align 4
1725 %iv.next = add nuw nsw i64 %iv, 1
1726 %exitcond = icmp eq i64 %iv.next, 1000
1727 br i1 %exitcond, label %for.end, label %for.body
1733 define void @exp10_f32_intrinsic(ptr nocapture %varray) {
1734 ; CHECK-LABEL: @exp10_f32_intrinsic(
1735 ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> [[TMP4:%.*]])
1736 ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp10f(<4 x float> [[TMP4:%.*]])
1737 ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @llvm.exp10.v8f32(<8 x float> [[TMP4:%.*]])
1738 ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @llvm.exp10.v16f32(<16 x float> [[TMP4:%.*]])
1745 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1746 %tmp = trunc i64 %iv to i32
1747 %conv = sitofp i32 %tmp to float
1748 %call = tail call float @llvm.exp10.f32(float %conv)
1749 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1750 store float %call, ptr %arrayidx, align 4
1751 %iv.next = add nuw nsw i64 %iv, 1
1752 %exitcond = icmp eq i64 %iv.next, 1000
1753 br i1 %exitcond, label %for.end, label %for.body
1760 define void @sincos_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
1761 ; CHECK-LABEL: define void @sincos_f64
1762 ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]])
1763 ; CHECK-VF2-NOT: call void @amd_vrd2_sincos(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
1764 ; CHECK-VF4-NOT: call void @amd_vrd4_sincos(<4 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
1765 ; CHECK-VF8-NOT: call void @amd_vrd8_sincos(<8 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
1772 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
1773 %gepa = getelementptr double, ptr %a, i64 %indvars.iv
1774 %num = load double, ptr %gepa, align 8
1775 %gepb = getelementptr double, ptr %b, i64 %indvars.iv
1776 %gepc = getelementptr double, ptr %c, i64 %indvars.iv
1777 call void @sincos(double %num, ptr %gepb, ptr %gepc)
1778 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1779 %exitcond = icmp eq i64 %indvars.iv.next, 1000
1780 br i1 %exitcond, label %for.cond.cleanup, label %for.body
1786 define void @sincos_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
1787 ; CHECK-LABEL: define void @sincos_f32
1788 ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]])
1789 ; CHECK-VF4-NOT: call void @amd_vrs4_sincosf(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
1790 ; CHECK-VF8-NOT: call void @amd_vrs8_sincosf(<8 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
1791 ; CHECK-VF16-NOT: call void @amd_vrs16_sincosf(<16 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
1798 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
1799 %gepa = getelementptr float, ptr %a, i64 %indvars.iv
1800 %num = load float, ptr %gepa, align 8
1801 %gepb = getelementptr float, ptr %b, i64 %indvars.iv
1802 %gepc = getelementptr float, ptr %c, i64 %indvars.iv
1803 call void @sincosf(float %num, ptr %gepb, ptr %gepc)
1804 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1805 %exitcond = icmp eq i64 %indvars.iv.next, 1000
1806 br i1 %exitcond, label %for.cond.cleanup, label %for.body
1812 attributes #0 = { nounwind readnone }
1814 declare double @exp10(double) #0
1815 declare float @exp10f(float) #0
1816 declare double @llvm.exp10.f64(double) #0
1817 declare float @llvm.exp10.f32(float) #0
1818 declare void @sincos(double, ptr, ptr)
1819 declare void @sincosf(float, ptr, ptr)