1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-999 -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
4 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
5 target triple = "x86_64-apple-macosx10.8.0"
7 declare double @sin(double)
8 declare double @cos(double)
9 declare double @pow(double, double)
10 declare double @exp2(double)
11 declare double @sqrt(double)
12 declare i64 @round(i64)
15 define void @sin_libm(double* %a, double* %b) {
16 ; CHECK-LABEL: @sin_libm(
17 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
18 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
19 ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.sin.v2f64(<2 x double> [[TMP2]])
20 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
21 ; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
22 ; CHECK-NEXT: ret void
24 %a0 = load double, double* %a, align 8
25 %idx1 = getelementptr inbounds double, double* %a, i64 1
26 %a1 = load double, double* %idx1, align 8
27 %sin1 = tail call double @sin(double %a0) nounwind readnone
28 %sin2 = tail call double @sin(double %a1) nounwind readnone
29 store double %sin1, double* %b, align 8
30 %idx2 = getelementptr inbounds double, double* %b, i64 1
31 store double %sin2, double* %idx2, align 8
35 define void @cos_libm(double* %a, double* %b) {
36 ; CHECK-LABEL: @cos_libm(
37 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
38 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
39 ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.cos.v2f64(<2 x double> [[TMP2]])
40 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
41 ; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
42 ; CHECK-NEXT: ret void
44 %a0 = load double, double* %a, align 8
45 %idx1 = getelementptr inbounds double, double* %a, i64 1
46 %a1 = load double, double* %idx1, align 8
47 %cos1 = tail call double @cos(double %a0) nounwind readnone
48 %cos2 = tail call double @cos(double %a1) nounwind readnone
49 store double %cos1, double* %b, align 8
50 %idx2 = getelementptr inbounds double, double* %b, i64 1
51 store double %cos2, double* %idx2, align 8
55 define void @pow_libm(double* %a, double* %b) {
56 ; CHECK-LABEL: @pow_libm(
57 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
58 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
59 ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[TMP2]], <2 x double> [[TMP2]])
60 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
61 ; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
62 ; CHECK-NEXT: ret void
64 %a0 = load double, double* %a, align 8
65 %idx1 = getelementptr inbounds double, double* %a, i64 1
66 %a1 = load double, double* %idx1, align 8
67 %pow1 = tail call double @pow(double %a0, double %a0) nounwind readnone
68 %pow2 = tail call double @pow(double %a1, double %a1) nounwind readnone
69 store double %pow1, double* %b, align 8
70 %idx2 = getelementptr inbounds double, double* %b, i64 1
71 store double %pow2, double* %idx2, align 8
75 define void @exp_libm(double* %a, double* %b) {
76 ; CHECK-LABEL: @exp_libm(
77 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
78 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
79 ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.exp2.v2f64(<2 x double> [[TMP2]])
80 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
81 ; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
82 ; CHECK-NEXT: ret void
84 %a0 = load double, double* %a, align 8
85 %idx1 = getelementptr inbounds double, double* %a, i64 1
86 %a1 = load double, double* %idx1, align 8
87 %exp1 = tail call double @exp2(double %a0) nounwind readnone
88 %exp2 = tail call double @exp2(double %a1) nounwind readnone
89 store double %exp1, double* %b, align 8
90 %idx2 = getelementptr inbounds double, double* %b, i64 1
91 store double %exp2, double* %idx2, align 8
95 ; No fast-math-flags are required to convert sqrt library calls to an intrinsic.
96 ; We just need to know that errno is not set (readnone).
98 define void @sqrt_libm_no_errno(double* %a, double* %b) {
99 ; CHECK-LABEL: @sqrt_libm_no_errno(
100 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
101 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
102 ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP2]])
103 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
104 ; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
105 ; CHECK-NEXT: ret void
107 %a0 = load double, double* %a, align 8
108 %idx1 = getelementptr inbounds double, double* %a, i64 1
109 %a1 = load double, double* %idx1, align 8
110 %sqrt1 = tail call double @sqrt(double %a0) nounwind readnone
111 %sqrt2 = tail call double @sqrt(double %a1) nounwind readnone
112 store double %sqrt1, double* %b, align 8
113 %idx2 = getelementptr inbounds double, double* %b, i64 1
114 store double %sqrt2, double* %idx2, align 8
118 ; The sqrt intrinsic does not set errno, but a non-constant sqrt call might, so this can't vectorize.
119 ; The nnan on the call does not matter because there's no guarantee in the C standard that a negative
120 ; input would result in a nan output ("On a domain error, the function returns an
121 ; implementation-defined value.")
123 define void @sqrt_libm_errno(double* %a, double* %b) {
124 ; CHECK-LABEL: @sqrt_libm_errno(
125 ; CHECK-NEXT: [[A0:%.*]] = load double, double* [[A:%.*]], align 8
126 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
127 ; CHECK-NEXT: [[A1:%.*]] = load double, double* [[IDX1]], align 8
128 ; CHECK-NEXT: [[SQRT1:%.*]] = tail call nnan double @sqrt(double [[A0]]) #2
129 ; CHECK-NEXT: [[SQRT2:%.*]] = tail call nnan double @sqrt(double [[A1]]) #2
130 ; CHECK-NEXT: store double [[SQRT1]], double* [[B:%.*]], align 8
131 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
132 ; CHECK-NEXT: store double [[SQRT2]], double* [[IDX2]], align 8
133 ; CHECK-NEXT: ret void
135 %a0 = load double, double* %a, align 8
136 %idx1 = getelementptr inbounds double, double* %a, i64 1
137 %a1 = load double, double* %idx1, align 8
138 %sqrt1 = tail call nnan double @sqrt(double %a0) nounwind
139 %sqrt2 = tail call nnan double @sqrt(double %a1) nounwind
140 store double %sqrt1, double* %b, align 8
141 %idx2 = getelementptr inbounds double, double* %b, i64 1
142 store double %sqrt2, double* %idx2, align 8
147 define void @round_custom(i64* %a, i64* %b) {
148 ; CHECK-LABEL: @round_custom(
149 ; CHECK-NEXT: [[A0:%.*]] = load i64, i64* [[A:%.*]], align 8
150 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 1
151 ; CHECK-NEXT: [[A1:%.*]] = load i64, i64* [[IDX1]], align 8
152 ; CHECK-NEXT: [[ROUND1:%.*]] = tail call i64 @round(i64 [[A0]]) #3
153 ; CHECK-NEXT: [[ROUND2:%.*]] = tail call i64 @round(i64 [[A1]]) #3
154 ; CHECK-NEXT: store i64 [[ROUND1]], i64* [[B:%.*]], align 8
155 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 1
156 ; CHECK-NEXT: store i64 [[ROUND2]], i64* [[IDX2]], align 8
157 ; CHECK-NEXT: ret void
159 %a0 = load i64, i64* %a, align 8
160 %idx1 = getelementptr inbounds i64, i64* %a, i64 1
161 %a1 = load i64, i64* %idx1, align 8
162 %round1 = tail call i64 @round(i64 %a0) nounwind readnone
163 %round2 = tail call i64 @round(i64 %a1) nounwind readnone
164 store i64 %round1, i64* %b, align 8
165 %idx2 = getelementptr inbounds i64, i64* %b, i64 1
166 store i64 %round2, i64* %idx2, align 8
171 ; CHECK: declare <2 x double> @llvm.sin.v2f64(<2 x double>) [[ATTR0:#[0-9]+]]
172 ; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) [[ATTR0]]
173 ; CHECK: declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) [[ATTR0]]
174 ; CHECK: declare <2 x double> @llvm.exp2.v2f64(<2 x double>) [[ATTR0]]
176 ; CHECK: attributes [[ATTR0]] = { nounwind readnone speculatable }