1 ; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s
3 ; Test to verify that when math headers are built with
4 ; __FINITE_MATH_ONLY__ enabled, causing use of __<func>_finite
5 ; function versions, vectorization can map these to vector versions.
7 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
8 target triple = "x86_64-unknown-linux-gnu"
10 define void @exp_f32(ptr nocapture %varray) {
11 ; CHECK-LABEL: @exp_f32
12 ; CHECK: <4 x float> @amd_vrs4_expf
17 for.body: ; preds = %for.body, %entry
18 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
19 %tmp = trunc i64 %indvars.iv to i32
20 %conv = sitofp i32 %tmp to float
21 %call = tail call fast float @__expf_finite(float %conv)
22 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
23 store float %call, ptr %arrayidx, align 4
24 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
25 %exitcond = icmp eq i64 %indvars.iv.next, 1000
26 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
28 for.end: ; preds = %for.body
32 define void @exp_f64(ptr nocapture %varray) {
33 ; CHECK-LABEL: @exp_f64
34 ; CHECK: <4 x double> @amd_vrd4_exp
39 for.body: ; preds = %for.body, %entry
40 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
41 %tmp = trunc i64 %indvars.iv to i32
42 %conv = sitofp i32 %tmp to double
43 %call = tail call fast double @__exp_finite(double %conv)
44 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %indvars.iv
45 store double %call, ptr %arrayidx, align 4
46 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
47 %exitcond = icmp eq i64 %indvars.iv.next, 1000
48 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
50 for.end: ; preds = %for.body
54 define void @log_f32(ptr nocapture %varray) {
55 ; CHECK-LABEL: @log_f32
56 ; CHECK: <4 x float> @amd_vrs4_logf
61 for.body: ; preds = %for.body, %entry
62 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
63 %tmp = trunc i64 %indvars.iv to i32
64 %conv = sitofp i32 %tmp to float
65 %call = tail call fast float @__logf_finite(float %conv)
66 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
67 store float %call, ptr %arrayidx, align 4
68 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
69 %exitcond = icmp eq i64 %indvars.iv.next, 1000
70 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
72 for.end: ; preds = %for.body
76 define void @log_f64(ptr nocapture %varray) {
77 ; CHECK-LABEL: @log_f64
78 ; CHECK: <4 x double> @amd_vrd4_log
83 for.body: ; preds = %for.body, %entry
84 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
85 %tmp = trunc i64 %indvars.iv to i32
86 %conv = sitofp i32 %tmp to double
87 %call = tail call fast double @__log_finite(double %conv)
88 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %indvars.iv
89 store double %call, ptr %arrayidx, align 4
90 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
91 %exitcond = icmp eq i64 %indvars.iv.next, 1000
92 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
94 for.end: ; preds = %for.body
98 define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) {
99 ; CHECK-LABEL: @pow_f32
100 ; CHECK: <4 x float> @amd_vrs4_powf
105 for.body: ; preds = %for.body, %entry
106 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
107 %tmp = trunc i64 %indvars.iv to i32
108 %conv = sitofp i32 %tmp to float
109 %arrayidx = getelementptr inbounds float, ptr %exp, i64 %indvars.iv
110 %tmp1 = load float, ptr %arrayidx, align 4
111 %tmp2 = tail call fast float @__powf_finite(float %conv, float %tmp1)
112 %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
113 store float %tmp2, ptr %arrayidx2, align 4
114 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
115 %exitcond = icmp eq i64 %indvars.iv.next, 1000
116 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
118 for.end: ; preds = %for.body
122 define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
123 ; CHECK-LABEL: @pow_f64
124 ; CHECK: <4 x double> @amd_vrd4_pow
129 for.body: ; preds = %for.body, %entry
130 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
131 %tmp = trunc i64 %indvars.iv to i32
132 %conv = sitofp i32 %tmp to double
133 %arrayidx = getelementptr inbounds double, ptr %exp, i64 %indvars.iv
134 %tmp1 = load double, ptr %arrayidx, align 4
135 %tmp2 = tail call fast double @__pow_finite(double %conv, double %tmp1)
136 %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %indvars.iv
137 store double %tmp2, ptr %arrayidx2, align 4
138 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
139 %exitcond = icmp eq i64 %indvars.iv.next, 1000
140 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
142 for.end: ; preds = %for.body
146 define void @exp2f_finite(ptr nocapture %varray) {
147 ; CHECK-LABEL: @exp2f_finite(
148 ; CHECK: call <4 x float> @amd_vrs4_exp2f(<4 x float> %{{.*}})
155 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
156 %tmp = trunc i64 %iv to i32
157 %conv = sitofp i32 %tmp to float
158 %call = tail call float @__exp2f_finite(float %conv)
159 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
160 store float %call, ptr %arrayidx, align 4
161 %iv.next = add nuw nsw i64 %iv, 1
162 %exitcond = icmp eq i64 %iv.next, 1000
163 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
169 define void @exp2_finite(ptr nocapture %varray) {
170 ; CHECK-LABEL: @exp2_finite(
171 ; CHECK: call <4 x double> @amd_vrd4_exp2(<4 x double> {{.*}})
178 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
179 %tmp = trunc i64 %iv to i32
180 %conv = sitofp i32 %tmp to double
181 %call = tail call double @__exp2_finite(double %conv)
182 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
183 store double %call, ptr %arrayidx, align 4
184 %iv.next = add nuw nsw i64 %iv, 1
185 %exitcond = icmp eq i64 %iv.next, 1000
186 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
192 define void @log2_f32(ptr nocapture %varray) {
193 ; CHECK-LABEL: @log2_f32
194 ; CHECK: <4 x float> @amd_vrs4_log2f
199 for.body: ; preds = %for.body, %entry
200 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
201 %tmp = trunc i64 %indvars.iv to i32
202 %conv = sitofp i32 %tmp to float
203 %call = tail call fast float @__log2f_finite(float %conv)
204 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
205 store float %call, ptr %arrayidx, align 4
206 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
207 %exitcond = icmp eq i64 %indvars.iv.next, 1000
208 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
210 for.end: ; preds = %for.body
214 define void @log2_f64(ptr nocapture %varray) {
215 ; CHECK-LABEL: @log2_f64
216 ; CHECK: <4 x double> @amd_vrd4_log2
221 for.body: ; preds = %for.body, %entry
222 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
223 %tmp = trunc i64 %indvars.iv to i32
224 %conv = sitofp i32 %tmp to double
225 %call = tail call fast double @__log2_finite(double %conv)
226 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %indvars.iv
227 store double %call, ptr %arrayidx, align 4
228 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
229 %exitcond = icmp eq i64 %indvars.iv.next, 1000
230 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
232 for.end: ; preds = %for.body
236 define void @log10_f32(ptr nocapture %varray) {
237 ; CHECK-LABEL: @log10_f32
238 ; CHECK: <4 x float> @amd_vrs4_log10f
243 for.body: ; preds = %for.body, %entry
244 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
245 %tmp = trunc i64 %indvars.iv to i32
246 %conv = sitofp i32 %tmp to float
247 %call = tail call fast float @__log10f_finite(float %conv)
248 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
249 store float %call, ptr %arrayidx, align 4
250 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
251 %exitcond = icmp eq i64 %indvars.iv.next, 1000
252 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
254 for.end: ; preds = %for.body
258 define void @log10_finite(ptr nocapture %varray) {
259 ; CHECK-LABEL: @log10_finite(
260 ; CHECK: call <2 x double> @amd_vrd2_log10(<2 x double> {{.*}})
267 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
268 %tmp = trunc i64 %iv to i32
269 %conv = sitofp i32 %tmp to double
270 %call = tail call double @__log10_finite(double %conv)
271 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
272 store double %call, ptr %arrayidx, align 4
273 %iv.next = add nuw nsw i64 %iv, 1
274 %exitcond = icmp eq i64 %iv.next, 1000
275 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
281 define void @exp10_finite(ptr nocapture %varray) {
282 ; CHECK-LABEL: @exp10_finite(
283 ; CHECK: call <2 x double> @amd_vrd2_exp10(<2 x double> {{.*}})
290 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
291 %tmp = trunc i64 %iv to i32
292 %conv = sitofp i32 %tmp to double
293 %call = tail call double @__exp10_finite(double %conv)
294 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
295 store double %call, ptr %arrayidx, align 4
296 %iv.next = add nuw nsw i64 %iv, 1
297 %exitcond = icmp eq i64 %iv.next, 1000
298 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
304 define void @exp10_f32(ptr nocapture %varray) {
305 ; CHECK-LABEL: @exp10_f32
306 ; CHECK: <4 x float> @amd_vrs4_exp10f
311 for.body: ; preds = %for.body, %entry
312 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
313 %tmp = trunc i64 %indvars.iv to i32
314 %conv = sitofp i32 %tmp to float
315 %call = tail call fast float @__exp10f_finite(float %conv)
316 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
317 store float %call, ptr %arrayidx, align 4
318 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
319 %exitcond = icmp eq i64 %indvars.iv.next, 1000
320 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
322 for.end: ; preds = %for.body
326 define void @asin_finite(ptr nocapture %varray) {
327 ; CHECK-LABEL: @asin_finite(
328 ; CHECK: call <8 x double> @amd_vrd8_asin(<8 x double> {{.*}})
335 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
336 %tmp = trunc i64 %iv to i32
337 %conv = sitofp i32 %tmp to double
338 %call = tail call double @__asin_finite(double %conv)
339 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
340 store double %call, ptr %arrayidx, align 4
341 %iv.next = add nuw nsw i64 %iv, 1
342 %exitcond = icmp eq i64 %iv.next, 1000
343 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !7
349 define void @asinf_finite(ptr nocapture %varray) {
350 ; CHECK-LABEL: @asinf_finite
351 ; CHECK: <4 x float> @amd_vrs4_asinf
356 for.body: ; preds = %for.body, %entry
357 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
358 %tmp = trunc i64 %indvars.iv to i32
359 %conv = sitofp i32 %tmp to float
360 %call = tail call fast float @__asinf_finite(float %conv)
361 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
362 store float %call, ptr %arrayidx, align 4
363 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
364 %exitcond = icmp eq i64 %indvars.iv.next, 1000
365 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
367 for.end: ; preds = %for.body
371 define void @acosf_finite(ptr nocapture %varray) {
372 ; CHECK-LABEL: @acosf_finite
373 ; CHECK: <4 x float> @amd_vrs4_acosf
378 for.body: ; preds = %for.body, %entry
379 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
380 %tmp = trunc i64 %indvars.iv to i32
381 %conv = sitofp i32 %tmp to float
382 %call = tail call fast float @__acosf_finite(float %conv)
383 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
384 store float %call, ptr %arrayidx, align 4
385 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
386 %exitcond = icmp eq i64 %indvars.iv.next, 1000
387 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
389 for.end: ; preds = %for.body
393 !1 = distinct !{!1, !2, !3}
394 !2 = !{!"llvm.loop.vectorize.width", i32 2}
395 !3 = !{!"llvm.loop.vectorize.enable", i1 true}
397 !4 = distinct !{!4, !5, !6}
398 !5 = !{!"llvm.loop.vectorize.width", i32 4}
399 !6 = !{!"llvm.loop.vectorize.enable", i1 true}
401 !7 = distinct !{!7, !8, !9}
402 !8 = !{!"llvm.loop.vectorize.width", i32 8}
403 !9 = !{!"llvm.loop.vectorize.enable", i1 true}
405 declare float @__expf_finite(float) #0
406 declare double @__exp_finite(double) #0
407 declare double @__log_finite(double) #0
408 declare float @__logf_finite(float) #0
409 declare float @__powf_finite(float, float) #0
410 declare double @__pow_finite(double, double) #0
411 declare float @__exp2f_finite(float) #0
412 declare double @__exp2_finite(double) #0
413 declare float @__log2f_finite(float) #0
414 declare double @__log2_finite(double) #0
415 declare float @__log10f_finite(float) #0
416 declare double @__log10_finite(double) #0
417 declare double @__exp10_finite(double) #0
418 declare float @__exp10f_finite(float) #0
419 declare double @__asin_finite(double) #0
420 declare float @__asinf_finite(float) #0
421 declare float @__acosf_finite(float) #0