1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 | FileCheck %s
3 ; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -slp-threshold=-10 | FileCheck %s --check-prefix=THRESHOLD
5 @n = external local_unnamed_addr global i32, align 4
6 @arr = common local_unnamed_addr global [20 x float] zeroinitializer, align 16
7 @arr1 = common local_unnamed_addr global [20 x float] zeroinitializer, align 16
8 @res = external local_unnamed_addr global float, align 4
13 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4
14 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
15 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
16 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr to <2 x float>*), align 16
17 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr1 to <2 x float>*), align 16
18 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP1]]
19 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
20 ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP4]], [[CONV]]
21 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
22 ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP5]], [[ADD]]
23 ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2) to <2 x float>*), align 8
24 ; CHECK-NEXT: [[TMP7:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2) to <2 x float>*), align 8
25 ; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <2 x float> [[TMP7]], [[TMP6]]
26 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
27 ; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP9]], [[ADD_1]]
28 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
29 ; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP10]], [[ADD_2]]
30 ; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV]]
31 ; CHECK-NEXT: [[ADD19:%.*]] = fadd fast float [[TMP4]], [[ADD7]]
32 ; CHECK-NEXT: [[ADD19_1:%.*]] = fadd fast float [[TMP5]], [[ADD19]]
33 ; CHECK-NEXT: [[ADD19_2:%.*]] = fadd fast float [[TMP9]], [[ADD19_1]]
34 ; CHECK-NEXT: [[ADD19_3:%.*]] = fadd fast float [[TMP10]], [[ADD19_2]]
35 ; CHECK-NEXT: store float [[ADD19_3]], float* @res, align 4
36 ; CHECK-NEXT: ret float [[ADD19_3]]
38 ; THRESHOLD-LABEL: @baz(
39 ; THRESHOLD-NEXT: entry:
40 ; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4
41 ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
42 ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
43 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr to <2 x float>*), align 16
44 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr1 to <2 x float>*), align 16
45 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP1]]
46 ; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
47 ; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[TMP4]], [[CONV]]
48 ; THRESHOLD-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
49 ; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP5]], [[ADD]]
50 ; THRESHOLD-NEXT: [[TMP6:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2) to <2 x float>*), align 8
51 ; THRESHOLD-NEXT: [[TMP7:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2) to <2 x float>*), align 8
52 ; THRESHOLD-NEXT: [[TMP8:%.*]] = fmul fast <2 x float> [[TMP7]], [[TMP6]]
53 ; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
54 ; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP9]], [[ADD_1]]
55 ; THRESHOLD-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
56 ; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP10]], [[ADD_2]]
57 ; THRESHOLD-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV]]
58 ; THRESHOLD-NEXT: [[ADD19:%.*]] = fadd fast float [[TMP4]], [[ADD7]]
59 ; THRESHOLD-NEXT: [[ADD19_1:%.*]] = fadd fast float [[TMP5]], [[ADD19]]
60 ; THRESHOLD-NEXT: [[ADD19_2:%.*]] = fadd fast float [[TMP9]], [[ADD19_1]]
61 ; THRESHOLD-NEXT: [[ADD19_3:%.*]] = fadd fast float [[TMP10]], [[ADD19_2]]
62 ; THRESHOLD-NEXT: store float [[ADD19_3]], float* @res, align 4
63 ; THRESHOLD-NEXT: ret float [[ADD19_3]]
66 %0 = load i32, i32* @n, align 4
67 %mul = mul nsw i32 %0, 3
68 %conv = sitofp i32 %mul to float
69 %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
70 %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
71 %mul4 = fmul fast float %2, %1
72 %add = fadd fast float %mul4, %conv
73 %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
74 %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
75 %mul4.1 = fmul fast float %4, %3
76 %add.1 = fadd fast float %mul4.1, %add
77 %5 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
78 %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
79 %mul4.2 = fmul fast float %6, %5
80 %add.2 = fadd fast float %mul4.2, %add.1
81 %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
82 %8 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
83 %mul4.3 = fmul fast float %8, %7
84 %add.3 = fadd fast float %mul4.3, %add.2
85 %add7 = fadd fast float %add.3, %conv
86 %add19 = fadd fast float %mul4, %add7
87 %add19.1 = fadd fast float %mul4.1, %add19
88 %add19.2 = fadd fast float %mul4.2, %add19.1
89 %add19.3 = fadd fast float %mul4.3, %add19.2
90 store float %add19.3, float* @res, align 4
94 define float @bazz() {
97 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4
98 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
99 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
100 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16
101 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16
102 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]]
103 ; CHECK-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
104 ; CHECK-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
105 ; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP3]])
106 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]]
107 ; CHECK-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]]
108 ; CHECK-NEXT: store float [[OP_EXTRA1]], float* @res, align 4
109 ; CHECK-NEXT: ret float [[OP_EXTRA1]]
111 ; THRESHOLD-LABEL: @bazz(
112 ; THRESHOLD-NEXT: entry:
113 ; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4
114 ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
115 ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
116 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16
117 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16
118 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]]
119 ; THRESHOLD-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
120 ; THRESHOLD-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
121 ; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP3]])
122 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]]
123 ; THRESHOLD-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]]
124 ; THRESHOLD-NEXT: store float [[OP_EXTRA1]], float* @res, align 4
125 ; THRESHOLD-NEXT: ret float [[OP_EXTRA1]]
128 %0 = load i32, i32* @n, align 4
129 %mul = mul nsw i32 %0, 3
130 %conv = sitofp i32 %mul to float
131 %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
132 %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
133 %mul4 = fmul fast float %2, %1
134 %add = fadd fast float %mul4, %conv
135 %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
136 %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
137 %mul4.1 = fmul fast float %4, %3
138 %add.1 = fadd fast float %mul4.1, %add
139 %5 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
140 %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
141 %mul4.2 = fmul fast float %6, %5
142 %add.2 = fadd fast float %mul4.2, %add.1
143 %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
144 %8 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
145 %mul4.3 = fmul fast float %8, %7
146 %add.3 = fadd fast float %mul4.3, %add.2
147 %mul5 = shl nsw i32 %0, 2
148 %conv6 = sitofp i32 %mul5 to float
149 %add7 = fadd fast float %add.3, %conv6
150 %9 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 4), align 16
151 %10 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 4), align 16
152 %mul18 = fmul fast float %10, %9
153 %add19 = fadd fast float %mul18, %add7
154 %11 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 5), align 4
155 %12 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 5), align 4
156 %mul18.1 = fmul fast float %12, %11
157 %add19.1 = fadd fast float %mul18.1, %add19
158 %13 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 6), align 8
159 %14 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 6), align 8
160 %mul18.2 = fmul fast float %14, %13
161 %add19.2 = fadd fast float %mul18.2, %add19.1
162 %15 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 7), align 4
163 %16 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 7), align 4
164 %mul18.3 = fmul fast float %16, %15
165 %add19.3 = fadd fast float %mul18.3, %add19.2
166 store float %add19.3, float* @res, align 4
170 define float @bazzz() {
171 ; CHECK-LABEL: @bazzz(
173 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4
174 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
175 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
176 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
177 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
178 ; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]])
179 ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
180 ; CHECK-NEXT: store float [[TMP5]], float* @res, align 4
181 ; CHECK-NEXT: ret float [[TMP5]]
183 ; THRESHOLD-LABEL: @bazzz(
184 ; THRESHOLD-NEXT: entry:
185 ; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4
186 ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
187 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
188 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
189 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
190 ; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]])
191 ; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
192 ; THRESHOLD-NEXT: store float [[TMP5]], float* @res, align 4
193 ; THRESHOLD-NEXT: ret float [[TMP5]]
196 %0 = load i32, i32* @n, align 4
197 %conv = sitofp i32 %0 to float
198 %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
199 %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
200 %mul = fmul fast float %2, %1
201 %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
202 %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
203 %mul.1 = fmul fast float %4, %3
204 %5 = fadd fast float %mul.1, %mul
205 %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
206 %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
207 %mul.2 = fmul fast float %7, %6
208 %8 = fadd fast float %mul.2, %5
209 %9 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
210 %10 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
211 %mul.3 = fmul fast float %10, %9
212 %11 = fadd fast float %mul.3, %8
213 %12 = fmul fast float %conv, %11
214 store float %12, float* @res, align 4
221 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4
222 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
223 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
224 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
225 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
226 ; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]])
227 ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
228 ; CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32
229 ; CHECK-NEXT: store i32 [[CONV4]], i32* @n, align 4
230 ; CHECK-NEXT: ret i32 [[CONV4]]
232 ; THRESHOLD-LABEL: @foo(
233 ; THRESHOLD-NEXT: entry:
234 ; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4
235 ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
236 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
237 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
238 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
239 ; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]])
240 ; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
241 ; THRESHOLD-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32
242 ; THRESHOLD-NEXT: store i32 [[CONV4]], i32* @n, align 4
243 ; THRESHOLD-NEXT: ret i32 [[CONV4]]
246 %0 = load i32, i32* @n, align 4
247 %conv = sitofp i32 %0 to float
248 %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
249 %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
250 %mul = fmul fast float %2, %1
251 %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
252 %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
253 %mul.1 = fmul fast float %4, %3
254 %5 = fadd fast float %mul.1, %mul
255 %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
256 %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
257 %mul.2 = fmul fast float %7, %6
258 %8 = fadd fast float %mul.2, %5
259 %9 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
260 %10 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
261 %mul.3 = fmul fast float %10, %9
262 %11 = fadd fast float %mul.3, %8
263 %12 = fmul fast float %conv, %11
264 %conv4 = fptosi float %12 to i32
265 store i32 %conv4, i32* @n, align 4
269 ; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select
270 ; with fastmath on the select.
271 define float @bar() {
274 ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr to <2 x float>*), align 16
275 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr1 to <2 x float>*), align 16
276 ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP0]]
277 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
278 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
279 ; CHECK-NEXT: [[CMP4:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
280 ; CHECK-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float [[TMP3]], float [[TMP4]]
281 ; CHECK-NEXT: [[TMP5:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
282 ; CHECK-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
283 ; CHECK-NEXT: [[MUL3_1:%.*]] = fmul fast float [[TMP6]], [[TMP5]]
284 ; CHECK-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], [[MUL3_1]]
285 ; CHECK-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float [[MUL3_1]]
286 ; CHECK-NEXT: [[TMP7:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
287 ; CHECK-NEXT: [[TMP8:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
288 ; CHECK-NEXT: [[MUL3_2:%.*]] = fmul fast float [[TMP8]], [[TMP7]]
289 ; CHECK-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], [[MUL3_2]]
290 ; CHECK-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float [[MUL3_2]]
291 ; CHECK-NEXT: store float [[MAX_0_MUL3_2]], float* @res, align 4
292 ; CHECK-NEXT: ret float [[MAX_0_MUL3_2]]
294 ; THRESHOLD-LABEL: @bar(
295 ; THRESHOLD-NEXT: entry:
296 ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr to <2 x float>*), align 16
297 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr1 to <2 x float>*), align 16
298 ; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP0]]
299 ; THRESHOLD-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
300 ; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
301 ; THRESHOLD-NEXT: [[CMP4:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
302 ; THRESHOLD-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float [[TMP3]], float [[TMP4]]
303 ; THRESHOLD-NEXT: [[TMP5:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
304 ; THRESHOLD-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
305 ; THRESHOLD-NEXT: [[MUL3_1:%.*]] = fmul fast float [[TMP6]], [[TMP5]]
306 ; THRESHOLD-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], [[MUL3_1]]
307 ; THRESHOLD-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float [[MUL3_1]]
308 ; THRESHOLD-NEXT: [[TMP7:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
309 ; THRESHOLD-NEXT: [[TMP8:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
310 ; THRESHOLD-NEXT: [[MUL3_2:%.*]] = fmul fast float [[TMP8]], [[TMP7]]
311 ; THRESHOLD-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], [[MUL3_2]]
312 ; THRESHOLD-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float [[MUL3_2]]
313 ; THRESHOLD-NEXT: store float [[MAX_0_MUL3_2]], float* @res, align 4
314 ; THRESHOLD-NEXT: ret float [[MAX_0_MUL3_2]]
317 %0 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
318 %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
319 %mul = fmul fast float %1, %0
320 %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
321 %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
322 %mul3 = fmul fast float %3, %2
323 %cmp4 = fcmp fast ogt float %mul, %mul3
324 %max.0.mul3 = select i1 %cmp4, float %mul, float %mul3
325 %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
326 %5 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
327 %mul3.1 = fmul fast float %5, %4
328 %cmp4.1 = fcmp fast ogt float %max.0.mul3, %mul3.1
329 %max.0.mul3.1 = select i1 %cmp4.1, float %max.0.mul3, float %mul3.1
330 %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
331 %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
332 %mul3.2 = fmul fast float %7, %6
333 %cmp4.2 = fcmp fast ogt float %max.0.mul3.1, %mul3.2
334 %max.0.mul3.2 = select i1 %cmp4.2, float %max.0.mul3.1, float %mul3.2
335 store float %max.0.mul3.2, float* @res, align 4
336 ret float %max.0.mul3.2
339 define float @f(float* nocapture readonly %x) {
342 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
343 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
344 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
345 ; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
346 ; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
347 ; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
348 ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
349 ; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8
350 ; CHECK-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9
351 ; CHECK-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10
352 ; CHECK-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11
353 ; CHECK-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12
354 ; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13
355 ; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14
356 ; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15
357 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>*
358 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4
359 ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16
360 ; CHECK-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
361 ; CHECK-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18
362 ; CHECK-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19
363 ; CHECK-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20
364 ; CHECK-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21
365 ; CHECK-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22
366 ; CHECK-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23
367 ; CHECK-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24
368 ; CHECK-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25
369 ; CHECK-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26
370 ; CHECK-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27
371 ; CHECK-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28
372 ; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29
373 ; CHECK-NEXT: [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30
374 ; CHECK-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31
375 ; CHECK-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, float* [[X]], i64 32
376 ; CHECK-NEXT: [[ARRAYIDX_33:%.*]] = getelementptr inbounds float, float* [[X]], i64 33
377 ; CHECK-NEXT: [[ARRAYIDX_34:%.*]] = getelementptr inbounds float, float* [[X]], i64 34
378 ; CHECK-NEXT: [[ARRAYIDX_35:%.*]] = getelementptr inbounds float, float* [[X]], i64 35
379 ; CHECK-NEXT: [[ARRAYIDX_36:%.*]] = getelementptr inbounds float, float* [[X]], i64 36
380 ; CHECK-NEXT: [[ARRAYIDX_37:%.*]] = getelementptr inbounds float, float* [[X]], i64 37
381 ; CHECK-NEXT: [[ARRAYIDX_38:%.*]] = getelementptr inbounds float, float* [[X]], i64 38
382 ; CHECK-NEXT: [[ARRAYIDX_39:%.*]] = getelementptr inbounds float, float* [[X]], i64 39
383 ; CHECK-NEXT: [[ARRAYIDX_40:%.*]] = getelementptr inbounds float, float* [[X]], i64 40
384 ; CHECK-NEXT: [[ARRAYIDX_41:%.*]] = getelementptr inbounds float, float* [[X]], i64 41
385 ; CHECK-NEXT: [[ARRAYIDX_42:%.*]] = getelementptr inbounds float, float* [[X]], i64 42
386 ; CHECK-NEXT: [[ARRAYIDX_43:%.*]] = getelementptr inbounds float, float* [[X]], i64 43
387 ; CHECK-NEXT: [[ARRAYIDX_44:%.*]] = getelementptr inbounds float, float* [[X]], i64 44
388 ; CHECK-NEXT: [[ARRAYIDX_45:%.*]] = getelementptr inbounds float, float* [[X]], i64 45
389 ; CHECK-NEXT: [[ARRAYIDX_46:%.*]] = getelementptr inbounds float, float* [[X]], i64 46
390 ; CHECK-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47
391 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>*
392 ; CHECK-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4
393 ; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP3]])
394 ; CHECK-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP1]])
395 ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]]
396 ; CHECK-NEXT: ret float [[OP_RDX]]
398 ; THRESHOLD-LABEL: @f(
399 ; THRESHOLD-NEXT: entry:
400 ; THRESHOLD-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
401 ; THRESHOLD-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
402 ; THRESHOLD-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
403 ; THRESHOLD-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
404 ; THRESHOLD-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
405 ; THRESHOLD-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
406 ; THRESHOLD-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
407 ; THRESHOLD-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8
408 ; THRESHOLD-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9
409 ; THRESHOLD-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10
410 ; THRESHOLD-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11
411 ; THRESHOLD-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12
412 ; THRESHOLD-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13
413 ; THRESHOLD-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14
414 ; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15
415 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>*
416 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4
417 ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16
418 ; THRESHOLD-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
419 ; THRESHOLD-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18
420 ; THRESHOLD-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19
421 ; THRESHOLD-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20
422 ; THRESHOLD-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21
423 ; THRESHOLD-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22
424 ; THRESHOLD-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23
425 ; THRESHOLD-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24
426 ; THRESHOLD-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25
427 ; THRESHOLD-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26
428 ; THRESHOLD-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27
429 ; THRESHOLD-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28
430 ; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29
431 ; THRESHOLD-NEXT: [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30
432 ; THRESHOLD-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31
433 ; THRESHOLD-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, float* [[X]], i64 32
434 ; THRESHOLD-NEXT: [[ARRAYIDX_33:%.*]] = getelementptr inbounds float, float* [[X]], i64 33
435 ; THRESHOLD-NEXT: [[ARRAYIDX_34:%.*]] = getelementptr inbounds float, float* [[X]], i64 34
436 ; THRESHOLD-NEXT: [[ARRAYIDX_35:%.*]] = getelementptr inbounds float, float* [[X]], i64 35
437 ; THRESHOLD-NEXT: [[ARRAYIDX_36:%.*]] = getelementptr inbounds float, float* [[X]], i64 36
438 ; THRESHOLD-NEXT: [[ARRAYIDX_37:%.*]] = getelementptr inbounds float, float* [[X]], i64 37
439 ; THRESHOLD-NEXT: [[ARRAYIDX_38:%.*]] = getelementptr inbounds float, float* [[X]], i64 38
440 ; THRESHOLD-NEXT: [[ARRAYIDX_39:%.*]] = getelementptr inbounds float, float* [[X]], i64 39
441 ; THRESHOLD-NEXT: [[ARRAYIDX_40:%.*]] = getelementptr inbounds float, float* [[X]], i64 40
442 ; THRESHOLD-NEXT: [[ARRAYIDX_41:%.*]] = getelementptr inbounds float, float* [[X]], i64 41
443 ; THRESHOLD-NEXT: [[ARRAYIDX_42:%.*]] = getelementptr inbounds float, float* [[X]], i64 42
444 ; THRESHOLD-NEXT: [[ARRAYIDX_43:%.*]] = getelementptr inbounds float, float* [[X]], i64 43
445 ; THRESHOLD-NEXT: [[ARRAYIDX_44:%.*]] = getelementptr inbounds float, float* [[X]], i64 44
446 ; THRESHOLD-NEXT: [[ARRAYIDX_45:%.*]] = getelementptr inbounds float, float* [[X]], i64 45
447 ; THRESHOLD-NEXT: [[ARRAYIDX_46:%.*]] = getelementptr inbounds float, float* [[X]], i64 46
448 ; THRESHOLD-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47
449 ; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>*
450 ; THRESHOLD-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4
451 ; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP3]])
452 ; THRESHOLD-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP1]])
453 ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]]
454 ; THRESHOLD-NEXT: ret float [[OP_RDX]]
457 %0 = load float, float* %x, align 4
458 %arrayidx.1 = getelementptr inbounds float, float* %x, i64 1
459 %1 = load float, float* %arrayidx.1, align 4
460 %add.1 = fadd fast float %1, %0
461 %arrayidx.2 = getelementptr inbounds float, float* %x, i64 2
462 %2 = load float, float* %arrayidx.2, align 4
463 %add.2 = fadd fast float %2, %add.1
464 %arrayidx.3 = getelementptr inbounds float, float* %x, i64 3
465 %3 = load float, float* %arrayidx.3, align 4
466 %add.3 = fadd fast float %3, %add.2
467 %arrayidx.4 = getelementptr inbounds float, float* %x, i64 4
468 %4 = load float, float* %arrayidx.4, align 4
469 %add.4 = fadd fast float %4, %add.3
470 %arrayidx.5 = getelementptr inbounds float, float* %x, i64 5
471 %5 = load float, float* %arrayidx.5, align 4
472 %add.5 = fadd fast float %5, %add.4
473 %arrayidx.6 = getelementptr inbounds float, float* %x, i64 6
474 %6 = load float, float* %arrayidx.6, align 4
475 %add.6 = fadd fast float %6, %add.5
476 %arrayidx.7 = getelementptr inbounds float, float* %x, i64 7
477 %7 = load float, float* %arrayidx.7, align 4
478 %add.7 = fadd fast float %7, %add.6
479 %arrayidx.8 = getelementptr inbounds float, float* %x, i64 8
480 %8 = load float, float* %arrayidx.8, align 4
481 %add.8 = fadd fast float %8, %add.7
482 %arrayidx.9 = getelementptr inbounds float, float* %x, i64 9
483 %9 = load float, float* %arrayidx.9, align 4
484 %add.9 = fadd fast float %9, %add.8
485 %arrayidx.10 = getelementptr inbounds float, float* %x, i64 10
486 %10 = load float, float* %arrayidx.10, align 4
487 %add.10 = fadd fast float %10, %add.9
488 %arrayidx.11 = getelementptr inbounds float, float* %x, i64 11
489 %11 = load float, float* %arrayidx.11, align 4
490 %add.11 = fadd fast float %11, %add.10
491 %arrayidx.12 = getelementptr inbounds float, float* %x, i64 12
492 %12 = load float, float* %arrayidx.12, align 4
493 %add.12 = fadd fast float %12, %add.11
494 %arrayidx.13 = getelementptr inbounds float, float* %x, i64 13
495 %13 = load float, float* %arrayidx.13, align 4
496 %add.13 = fadd fast float %13, %add.12
497 %arrayidx.14 = getelementptr inbounds float, float* %x, i64 14
498 %14 = load float, float* %arrayidx.14, align 4
499 %add.14 = fadd fast float %14, %add.13
500 %arrayidx.15 = getelementptr inbounds float, float* %x, i64 15
501 %15 = load float, float* %arrayidx.15, align 4
502 %add.15 = fadd fast float %15, %add.14
503 %arrayidx.16 = getelementptr inbounds float, float* %x, i64 16
504 %16 = load float, float* %arrayidx.16, align 4
505 %add.16 = fadd fast float %16, %add.15
506 %arrayidx.17 = getelementptr inbounds float, float* %x, i64 17
507 %17 = load float, float* %arrayidx.17, align 4
508 %add.17 = fadd fast float %17, %add.16
509 %arrayidx.18 = getelementptr inbounds float, float* %x, i64 18
510 %18 = load float, float* %arrayidx.18, align 4
511 %add.18 = fadd fast float %18, %add.17
512 %arrayidx.19 = getelementptr inbounds float, float* %x, i64 19
513 %19 = load float, float* %arrayidx.19, align 4
514 %add.19 = fadd fast float %19, %add.18
515 %arrayidx.20 = getelementptr inbounds float, float* %x, i64 20
516 %20 = load float, float* %arrayidx.20, align 4
517 %add.20 = fadd fast float %20, %add.19
518 %arrayidx.21 = getelementptr inbounds float, float* %x, i64 21
519 %21 = load float, float* %arrayidx.21, align 4
520 %add.21 = fadd fast float %21, %add.20
521 %arrayidx.22 = getelementptr inbounds float, float* %x, i64 22
522 %22 = load float, float* %arrayidx.22, align 4
523 %add.22 = fadd fast float %22, %add.21
524 %arrayidx.23 = getelementptr inbounds float, float* %x, i64 23
525 %23 = load float, float* %arrayidx.23, align 4
526 %add.23 = fadd fast float %23, %add.22
527 %arrayidx.24 = getelementptr inbounds float, float* %x, i64 24
528 %24 = load float, float* %arrayidx.24, align 4
529 %add.24 = fadd fast float %24, %add.23
530 %arrayidx.25 = getelementptr inbounds float, float* %x, i64 25
531 %25 = load float, float* %arrayidx.25, align 4
532 %add.25 = fadd fast float %25, %add.24
533 %arrayidx.26 = getelementptr inbounds float, float* %x, i64 26
534 %26 = load float, float* %arrayidx.26, align 4
535 %add.26 = fadd fast float %26, %add.25
536 %arrayidx.27 = getelementptr inbounds float, float* %x, i64 27
537 %27 = load float, float* %arrayidx.27, align 4
538 %add.27 = fadd fast float %27, %add.26
539 %arrayidx.28 = getelementptr inbounds float, float* %x, i64 28
540 %28 = load float, float* %arrayidx.28, align 4
541 %add.28 = fadd fast float %28, %add.27
542 %arrayidx.29 = getelementptr inbounds float, float* %x, i64 29
543 %29 = load float, float* %arrayidx.29, align 4
544 %add.29 = fadd fast float %29, %add.28
545 %arrayidx.30 = getelementptr inbounds float, float* %x, i64 30
546 %30 = load float, float* %arrayidx.30, align 4
547 %add.30 = fadd fast float %30, %add.29
548 %arrayidx.31 = getelementptr inbounds float, float* %x, i64 31
549 %31 = load float, float* %arrayidx.31, align 4
550 %add.31 = fadd fast float %31, %add.30
551 %arrayidx.32 = getelementptr inbounds float, float* %x, i64 32
552 %32 = load float, float* %arrayidx.32, align 4
553 %add.32 = fadd fast float %32, %add.31
554 %arrayidx.33 = getelementptr inbounds float, float* %x, i64 33
555 %33 = load float, float* %arrayidx.33, align 4
556 %add.33 = fadd fast float %33, %add.32
557 %arrayidx.34 = getelementptr inbounds float, float* %x, i64 34
558 %34 = load float, float* %arrayidx.34, align 4
559 %add.34 = fadd fast float %34, %add.33
560 %arrayidx.35 = getelementptr inbounds float, float* %x, i64 35
561 %35 = load float, float* %arrayidx.35, align 4
562 %add.35 = fadd fast float %35, %add.34
563 %arrayidx.36 = getelementptr inbounds float, float* %x, i64 36
564 %36 = load float, float* %arrayidx.36, align 4
565 %add.36 = fadd fast float %36, %add.35
566 %arrayidx.37 = getelementptr inbounds float, float* %x, i64 37
567 %37 = load float, float* %arrayidx.37, align 4
568 %add.37 = fadd fast float %37, %add.36
569 %arrayidx.38 = getelementptr inbounds float, float* %x, i64 38
570 %38 = load float, float* %arrayidx.38, align 4
571 %add.38 = fadd fast float %38, %add.37
572 %arrayidx.39 = getelementptr inbounds float, float* %x, i64 39
573 %39 = load float, float* %arrayidx.39, align 4
574 %add.39 = fadd fast float %39, %add.38
575 %arrayidx.40 = getelementptr inbounds float, float* %x, i64 40
576 %40 = load float, float* %arrayidx.40, align 4
577 %add.40 = fadd fast float %40, %add.39
578 %arrayidx.41 = getelementptr inbounds float, float* %x, i64 41
579 %41 = load float, float* %arrayidx.41, align 4
580 %add.41 = fadd fast float %41, %add.40
581 %arrayidx.42 = getelementptr inbounds float, float* %x, i64 42
582 %42 = load float, float* %arrayidx.42, align 4
583 %add.42 = fadd fast float %42, %add.41
584 %arrayidx.43 = getelementptr inbounds float, float* %x, i64 43
585 %43 = load float, float* %arrayidx.43, align 4
586 %add.43 = fadd fast float %43, %add.42
587 %arrayidx.44 = getelementptr inbounds float, float* %x, i64 44
588 %44 = load float, float* %arrayidx.44, align 4
589 %add.44 = fadd fast float %44, %add.43
590 %arrayidx.45 = getelementptr inbounds float, float* %x, i64 45
591 %45 = load float, float* %arrayidx.45, align 4
592 %add.45 = fadd fast float %45, %add.44
593 %arrayidx.46 = getelementptr inbounds float, float* %x, i64 46
594 %46 = load float, float* %arrayidx.46, align 4
595 %add.46 = fadd fast float %46, %add.45
596 %arrayidx.47 = getelementptr inbounds float, float* %x, i64 47
597 %47 = load float, float* %arrayidx.47, align 4
598 %add.47 = fadd fast float %47, %add.46
602 define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) {
605 ; CHECK-NEXT: [[REM:%.*]] = srem i32 [[A:%.*]], [[B:%.*]]
606 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[REM]] to float
607 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
608 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
609 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
610 ; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
611 ; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
612 ; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
613 ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
614 ; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8
615 ; CHECK-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9
616 ; CHECK-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10
617 ; CHECK-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11
618 ; CHECK-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12
619 ; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13
620 ; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14
621 ; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15
622 ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16
623 ; CHECK-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
624 ; CHECK-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18
625 ; CHECK-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19
626 ; CHECK-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20
627 ; CHECK-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21
628 ; CHECK-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22
629 ; CHECK-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23
630 ; CHECK-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24
631 ; CHECK-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25
632 ; CHECK-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26
633 ; CHECK-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27
634 ; CHECK-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28
635 ; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29
636 ; CHECK-NEXT: [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30
637 ; CHECK-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31
638 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>*
639 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4
640 ; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP1]])
641 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]]
642 ; CHECK-NEXT: ret float [[OP_EXTRA]]
644 ; THRESHOLD-LABEL: @f1(
645 ; THRESHOLD-NEXT: entry:
646 ; THRESHOLD-NEXT: [[REM:%.*]] = srem i32 [[A:%.*]], [[B:%.*]]
647 ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[REM]] to float
648 ; THRESHOLD-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
649 ; THRESHOLD-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
650 ; THRESHOLD-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
651 ; THRESHOLD-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
652 ; THRESHOLD-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
653 ; THRESHOLD-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
654 ; THRESHOLD-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
655 ; THRESHOLD-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8
656 ; THRESHOLD-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9
657 ; THRESHOLD-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10
658 ; THRESHOLD-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11
659 ; THRESHOLD-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12
660 ; THRESHOLD-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13
661 ; THRESHOLD-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14
662 ; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15
663 ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16
664 ; THRESHOLD-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
665 ; THRESHOLD-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18
666 ; THRESHOLD-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19
667 ; THRESHOLD-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20
668 ; THRESHOLD-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21
669 ; THRESHOLD-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22
670 ; THRESHOLD-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23
671 ; THRESHOLD-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24
672 ; THRESHOLD-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25
673 ; THRESHOLD-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26
674 ; THRESHOLD-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27
675 ; THRESHOLD-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28
676 ; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29
677 ; THRESHOLD-NEXT: [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30
678 ; THRESHOLD-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31
679 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>*
680 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4
681 ; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP1]])
682 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]]
683 ; THRESHOLD-NEXT: ret float [[OP_EXTRA]]
686 %rem = srem i32 %a, %b
687 %conv = sitofp i32 %rem to float
688 %0 = load float, float* %x, align 4
689 %add = fadd fast float %0, %conv
690 %arrayidx.1 = getelementptr inbounds float, float* %x, i64 1
691 %1 = load float, float* %arrayidx.1, align 4
692 %add.1 = fadd fast float %1, %add
693 %arrayidx.2 = getelementptr inbounds float, float* %x, i64 2
694 %2 = load float, float* %arrayidx.2, align 4
695 %add.2 = fadd fast float %2, %add.1
696 %arrayidx.3 = getelementptr inbounds float, float* %x, i64 3
697 %3 = load float, float* %arrayidx.3, align 4
698 %add.3 = fadd fast float %3, %add.2
699 %arrayidx.4 = getelementptr inbounds float, float* %x, i64 4
700 %4 = load float, float* %arrayidx.4, align 4
701 %add.4 = fadd fast float %4, %add.3
702 %arrayidx.5 = getelementptr inbounds float, float* %x, i64 5
703 %5 = load float, float* %arrayidx.5, align 4
704 %add.5 = fadd fast float %5, %add.4
705 %arrayidx.6 = getelementptr inbounds float, float* %x, i64 6
706 %6 = load float, float* %arrayidx.6, align 4
707 %add.6 = fadd fast float %6, %add.5
708 %arrayidx.7 = getelementptr inbounds float, float* %x, i64 7
709 %7 = load float, float* %arrayidx.7, align 4
710 %add.7 = fadd fast float %7, %add.6
711 %arrayidx.8 = getelementptr inbounds float, float* %x, i64 8
712 %8 = load float, float* %arrayidx.8, align 4
713 %add.8 = fadd fast float %8, %add.7
714 %arrayidx.9 = getelementptr inbounds float, float* %x, i64 9
715 %9 = load float, float* %arrayidx.9, align 4
716 %add.9 = fadd fast float %9, %add.8
717 %arrayidx.10 = getelementptr inbounds float, float* %x, i64 10
718 %10 = load float, float* %arrayidx.10, align 4
719 %add.10 = fadd fast float %10, %add.9
720 %arrayidx.11 = getelementptr inbounds float, float* %x, i64 11
721 %11 = load float, float* %arrayidx.11, align 4
722 %add.11 = fadd fast float %11, %add.10
723 %arrayidx.12 = getelementptr inbounds float, float* %x, i64 12
724 %12 = load float, float* %arrayidx.12, align 4
725 %add.12 = fadd fast float %12, %add.11
726 %arrayidx.13 = getelementptr inbounds float, float* %x, i64 13
727 %13 = load float, float* %arrayidx.13, align 4
728 %add.13 = fadd fast float %13, %add.12
729 %arrayidx.14 = getelementptr inbounds float, float* %x, i64 14
730 %14 = load float, float* %arrayidx.14, align 4
731 %add.14 = fadd fast float %14, %add.13
732 %arrayidx.15 = getelementptr inbounds float, float* %x, i64 15
733 %15 = load float, float* %arrayidx.15, align 4
734 %add.15 = fadd fast float %15, %add.14
735 %arrayidx.16 = getelementptr inbounds float, float* %x, i64 16
736 %16 = load float, float* %arrayidx.16, align 4
737 %add.16 = fadd fast float %16, %add.15
738 %arrayidx.17 = getelementptr inbounds float, float* %x, i64 17
739 %17 = load float, float* %arrayidx.17, align 4
740 %add.17 = fadd fast float %17, %add.16
741 %arrayidx.18 = getelementptr inbounds float, float* %x, i64 18
742 %18 = load float, float* %arrayidx.18, align 4
743 %add.18 = fadd fast float %18, %add.17
744 %arrayidx.19 = getelementptr inbounds float, float* %x, i64 19
745 %19 = load float, float* %arrayidx.19, align 4
746 %add.19 = fadd fast float %19, %add.18
747 %arrayidx.20 = getelementptr inbounds float, float* %x, i64 20
748 %20 = load float, float* %arrayidx.20, align 4
749 %add.20 = fadd fast float %20, %add.19
750 %arrayidx.21 = getelementptr inbounds float, float* %x, i64 21
751 %21 = load float, float* %arrayidx.21, align 4
752 %add.21 = fadd fast float %21, %add.20
753 %arrayidx.22 = getelementptr inbounds float, float* %x, i64 22
754 %22 = load float, float* %arrayidx.22, align 4
755 %add.22 = fadd fast float %22, %add.21
756 %arrayidx.23 = getelementptr inbounds float, float* %x, i64 23
757 %23 = load float, float* %arrayidx.23, align 4
758 %add.23 = fadd fast float %23, %add.22
759 %arrayidx.24 = getelementptr inbounds float, float* %x, i64 24
760 %24 = load float, float* %arrayidx.24, align 4
761 %add.24 = fadd fast float %24, %add.23
762 %arrayidx.25 = getelementptr inbounds float, float* %x, i64 25
763 %25 = load float, float* %arrayidx.25, align 4
764 %add.25 = fadd fast float %25, %add.24
765 %arrayidx.26 = getelementptr inbounds float, float* %x, i64 26
766 %26 = load float, float* %arrayidx.26, align 4
767 %add.26 = fadd fast float %26, %add.25
768 %arrayidx.27 = getelementptr inbounds float, float* %x, i64 27
769 %27 = load float, float* %arrayidx.27, align 4
770 %add.27 = fadd fast float %27, %add.26
771 %arrayidx.28 = getelementptr inbounds float, float* %x, i64 28
772 %28 = load float, float* %arrayidx.28, align 4
773 %add.28 = fadd fast float %28, %add.27
774 %arrayidx.29 = getelementptr inbounds float, float* %x, i64 29
775 %29 = load float, float* %arrayidx.29, align 4
776 %add.29 = fadd fast float %29, %add.28
777 %arrayidx.30 = getelementptr inbounds float, float* %x, i64 30
778 %30 = load float, float* %arrayidx.30, align 4
779 %add.30 = fadd fast float %30, %add.29
780 %arrayidx.31 = getelementptr inbounds float, float* %x, i64 31
781 %31 = load float, float* %arrayidx.31, align 4
782 %add.31 = fadd fast float %31, %add.30
786 define float @loadadd31(float* nocapture readonly %x) {
787 ; CHECK-LABEL: @loadadd31(
789 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
790 ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4
791 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
792 ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4
793 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
794 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
795 ; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
796 ; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
797 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>*
798 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
799 ; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
800 ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8
801 ; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9
802 ; CHECK-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 10
803 ; CHECK-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 11
804 ; CHECK-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 12
805 ; CHECK-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 13
806 ; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14
807 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>*
808 ; CHECK-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4
809 ; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15
810 ; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16
811 ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
812 ; CHECK-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 18
813 ; CHECK-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 19
814 ; CHECK-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 20
815 ; CHECK-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 21
816 ; CHECK-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 22
817 ; CHECK-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 23
818 ; CHECK-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 24
819 ; CHECK-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 25
820 ; CHECK-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 26
821 ; CHECK-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 27
822 ; CHECK-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 28
823 ; CHECK-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 29
824 ; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30
825 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>*
826 ; CHECK-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4
827 ; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP7]])
828 ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP5]])
829 ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[TMP9]]
830 ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]])
831 ; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]]
832 ; CHECK-NEXT: [[TMP11:%.*]] = fadd fast float [[OP_RDX1]], [[TMP1]]
833 ; CHECK-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]]
834 ; CHECK-NEXT: ret float [[TMP12]]
836 ; THRESHOLD-LABEL: @loadadd31(
837 ; THRESHOLD-NEXT: entry:
838 ; THRESHOLD-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
839 ; THRESHOLD-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4
840 ; THRESHOLD-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
841 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4
842 ; THRESHOLD-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
843 ; THRESHOLD-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
844 ; THRESHOLD-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
845 ; THRESHOLD-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
846 ; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>*
847 ; THRESHOLD-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
848 ; THRESHOLD-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
849 ; THRESHOLD-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8
850 ; THRESHOLD-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9
851 ; THRESHOLD-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 10
852 ; THRESHOLD-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 11
853 ; THRESHOLD-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 12
854 ; THRESHOLD-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 13
855 ; THRESHOLD-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14
856 ; THRESHOLD-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>*
857 ; THRESHOLD-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4
858 ; THRESHOLD-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15
859 ; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16
860 ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
861 ; THRESHOLD-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 18
862 ; THRESHOLD-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 19
863 ; THRESHOLD-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 20
864 ; THRESHOLD-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 21
865 ; THRESHOLD-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 22
866 ; THRESHOLD-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 23
867 ; THRESHOLD-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 24
868 ; THRESHOLD-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 25
869 ; THRESHOLD-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 26
870 ; THRESHOLD-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 27
871 ; THRESHOLD-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 28
872 ; THRESHOLD-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 29
873 ; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30
874 ; THRESHOLD-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>*
875 ; THRESHOLD-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4
876 ; THRESHOLD-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP7]])
877 ; THRESHOLD-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP5]])
878 ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[TMP9]]
879 ; THRESHOLD-NEXT: [[TMP10:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]])
880 ; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]]
881 ; THRESHOLD-NEXT: [[TMP11:%.*]] = fadd fast float [[OP_RDX1]], [[TMP1]]
882 ; THRESHOLD-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]]
883 ; THRESHOLD-NEXT: ret float [[TMP12]]
886 %arrayidx = getelementptr inbounds float, float* %x, i64 1
887 %0 = load float, float* %arrayidx, align 4
888 %arrayidx.1 = getelementptr inbounds float, float* %x, i64 2
889 %1 = load float, float* %arrayidx.1, align 4
890 %add.1 = fadd fast float %1, %0
891 %arrayidx.2 = getelementptr inbounds float, float* %x, i64 3
892 %2 = load float, float* %arrayidx.2, align 4
893 %add.2 = fadd fast float %2, %add.1
894 %arrayidx.3 = getelementptr inbounds float, float* %x, i64 4
895 %3 = load float, float* %arrayidx.3, align 4
896 %add.3 = fadd fast float %3, %add.2
897 %arrayidx.4 = getelementptr inbounds float, float* %x, i64 5
898 %4 = load float, float* %arrayidx.4, align 4
899 %add.4 = fadd fast float %4, %add.3
900 %arrayidx.5 = getelementptr inbounds float, float* %x, i64 6
901 %5 = load float, float* %arrayidx.5, align 4
902 %add.5 = fadd fast float %5, %add.4
903 %arrayidx.6 = getelementptr inbounds float, float* %x, i64 7
904 %6 = load float, float* %arrayidx.6, align 4
905 %add.6 = fadd fast float %6, %add.5
906 %arrayidx.7 = getelementptr inbounds float, float* %x, i64 8
907 %7 = load float, float* %arrayidx.7, align 4
908 %add.7 = fadd fast float %7, %add.6
909 %arrayidx.8 = getelementptr inbounds float, float* %x, i64 9
910 %8 = load float, float* %arrayidx.8, align 4
911 %add.8 = fadd fast float %8, %add.7
912 %arrayidx.9 = getelementptr inbounds float, float* %x, i64 10
913 %9 = load float, float* %arrayidx.9, align 4
914 %add.9 = fadd fast float %9, %add.8
915 %arrayidx.10 = getelementptr inbounds float, float* %x, i64 11
916 %10 = load float, float* %arrayidx.10, align 4
917 %add.10 = fadd fast float %10, %add.9
918 %arrayidx.11 = getelementptr inbounds float, float* %x, i64 12
919 %11 = load float, float* %arrayidx.11, align 4
920 %add.11 = fadd fast float %11, %add.10
921 %arrayidx.12 = getelementptr inbounds float, float* %x, i64 13
922 %12 = load float, float* %arrayidx.12, align 4
923 %add.12 = fadd fast float %12, %add.11
924 %arrayidx.13 = getelementptr inbounds float, float* %x, i64 14
925 %13 = load float, float* %arrayidx.13, align 4
926 %add.13 = fadd fast float %13, %add.12
927 %arrayidx.14 = getelementptr inbounds float, float* %x, i64 15
928 %14 = load float, float* %arrayidx.14, align 4
929 %add.14 = fadd fast float %14, %add.13
930 %arrayidx.15 = getelementptr inbounds float, float* %x, i64 16
931 %15 = load float, float* %arrayidx.15, align 4
932 %add.15 = fadd fast float %15, %add.14
933 %arrayidx.16 = getelementptr inbounds float, float* %x, i64 17
934 %16 = load float, float* %arrayidx.16, align 4
935 %add.16 = fadd fast float %16, %add.15
936 %arrayidx.17 = getelementptr inbounds float, float* %x, i64 18
937 %17 = load float, float* %arrayidx.17, align 4
938 %add.17 = fadd fast float %17, %add.16
939 %arrayidx.18 = getelementptr inbounds float, float* %x, i64 19
940 %18 = load float, float* %arrayidx.18, align 4
941 %add.18 = fadd fast float %18, %add.17
942 %arrayidx.19 = getelementptr inbounds float, float* %x, i64 20
943 %19 = load float, float* %arrayidx.19, align 4
944 %add.19 = fadd fast float %19, %add.18
945 %arrayidx.20 = getelementptr inbounds float, float* %x, i64 21
946 %20 = load float, float* %arrayidx.20, align 4
947 %add.20 = fadd fast float %20, %add.19
948 %arrayidx.21 = getelementptr inbounds float, float* %x, i64 22
949 %21 = load float, float* %arrayidx.21, align 4
950 %add.21 = fadd fast float %21, %add.20
951 %arrayidx.22 = getelementptr inbounds float, float* %x, i64 23
952 %22 = load float, float* %arrayidx.22, align 4
953 %add.22 = fadd fast float %22, %add.21
954 %arrayidx.23 = getelementptr inbounds float, float* %x, i64 24
955 %23 = load float, float* %arrayidx.23, align 4
956 %add.23 = fadd fast float %23, %add.22
957 %arrayidx.24 = getelementptr inbounds float, float* %x, i64 25
958 %24 = load float, float* %arrayidx.24, align 4
959 %add.24 = fadd fast float %24, %add.23
960 %arrayidx.25 = getelementptr inbounds float, float* %x, i64 26
961 %25 = load float, float* %arrayidx.25, align 4
962 %add.25 = fadd fast float %25, %add.24
963 %arrayidx.26 = getelementptr inbounds float, float* %x, i64 27
964 %26 = load float, float* %arrayidx.26, align 4
965 %add.26 = fadd fast float %26, %add.25
966 %arrayidx.27 = getelementptr inbounds float, float* %x, i64 28
967 %27 = load float, float* %arrayidx.27, align 4
968 %add.27 = fadd fast float %27, %add.26
969 %arrayidx.28 = getelementptr inbounds float, float* %x, i64 29
970 %28 = load float, float* %arrayidx.28, align 4
971 %add.28 = fadd fast float %28, %add.27
972 %arrayidx.29 = getelementptr inbounds float, float* %x, i64 30
973 %29 = load float, float* %arrayidx.29, align 4
974 %add.29 = fadd fast float %29, %add.28
978 define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) {
979 ; CHECK-LABEL: @extra_args(
981 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
982 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
983 ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00
984 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
985 ; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
986 ; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
987 ; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
988 ; CHECK-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
989 ; CHECK-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
990 ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
991 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
992 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
993 ; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]])
994 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
995 ; CHECK-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]]
996 ; CHECK-NEXT: ret float [[OP_EXTRA1]]
998 ; THRESHOLD-LABEL: @extra_args(
999 ; THRESHOLD-NEXT: entry:
1000 ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
1001 ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
1002 ; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00
1003 ; THRESHOLD-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
1004 ; THRESHOLD-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
1005 ; THRESHOLD-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
1006 ; THRESHOLD-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
1007 ; THRESHOLD-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
1008 ; THRESHOLD-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
1009 ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
1010 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
1011 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
1012 ; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]])
1013 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
1014 ; THRESHOLD-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]]
1015 ; THRESHOLD-NEXT: ret float [[OP_EXTRA1]]
1018 %mul = mul nsw i32 %b, %a
1019 %conv = sitofp i32 %mul to float
1020 %0 = load float, float* %x, align 4
1021 %add = fadd fast float %conv, 3.000000e+00
1022 %add1 = fadd fast float %0, %add
1023 %arrayidx3 = getelementptr inbounds float, float* %x, i64 1
1024 %1 = load float, float* %arrayidx3, align 4
1025 %add4 = fadd fast float %1, %add1
1026 %add5 = fadd fast float %add4, %conv
1027 %arrayidx3.1 = getelementptr inbounds float, float* %x, i64 2
1028 %2 = load float, float* %arrayidx3.1, align 4
1029 %add4.1 = fadd fast float %2, %add5
1030 %arrayidx3.2 = getelementptr inbounds float, float* %x, i64 3
1031 %3 = load float, float* %arrayidx3.2, align 4
1032 %add4.2 = fadd fast float %3, %add4.1
1033 %arrayidx3.3 = getelementptr inbounds float, float* %x, i64 4
1034 %4 = load float, float* %arrayidx3.3, align 4
1035 %add4.3 = fadd fast float %4, %add4.2
1036 %arrayidx3.4 = getelementptr inbounds float, float* %x, i64 5
1037 %5 = load float, float* %arrayidx3.4, align 4
1038 %add4.4 = fadd fast float %5, %add4.3
1039 %arrayidx3.5 = getelementptr inbounds float, float* %x, i64 6
1040 %6 = load float, float* %arrayidx3.5, align 4
1041 %add4.5 = fadd fast float %6, %add4.4
1042 %arrayidx3.6 = getelementptr inbounds float, float* %x, i64 7
1043 %7 = load float, float* %arrayidx3.6, align 4
1044 %add4.6 = fadd fast float %7, %add4.5
1048 define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a, i32 %b) {
1049 ; CHECK-LABEL: @extra_args_same_several_times(
1050 ; CHECK-NEXT: entry:
1051 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
1052 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
1053 ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00
1054 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
1055 ; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
1056 ; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
1057 ; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
1058 ; CHECK-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
1059 ; CHECK-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
1060 ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
1061 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
1062 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
1063 ; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]])
1064 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
1065 ; CHECK-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00
1066 ; CHECK-NEXT: [[OP_EXTRA2:%.*]] = fadd fast float [[OP_EXTRA1]], 5.000000e+00
1067 ; CHECK-NEXT: [[OP_EXTRA3:%.*]] = fadd fast float [[OP_EXTRA2]], [[CONV]]
1068 ; CHECK-NEXT: ret float [[OP_EXTRA3]]
1070 ; THRESHOLD-LABEL: @extra_args_same_several_times(
1071 ; THRESHOLD-NEXT: entry:
1072 ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
1073 ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
1074 ; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00
1075 ; THRESHOLD-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
1076 ; THRESHOLD-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
1077 ; THRESHOLD-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
1078 ; THRESHOLD-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
1079 ; THRESHOLD-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
1080 ; THRESHOLD-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
1081 ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
1082 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
1083 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
1084 ; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]])
1085 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
1086 ; THRESHOLD-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00
1087 ; THRESHOLD-NEXT: [[OP_EXTRA2:%.*]] = fadd fast float [[OP_EXTRA1]], 5.000000e+00
1088 ; THRESHOLD-NEXT: [[OP_EXTRA3:%.*]] = fadd fast float [[OP_EXTRA2]], [[CONV]]
1089 ; THRESHOLD-NEXT: ret float [[OP_EXTRA3]]
1092 %mul = mul nsw i32 %b, %a
1093 %conv = sitofp i32 %mul to float
1094 %0 = load float, float* %x, align 4
1095 %add = fadd fast float %conv, 3.000000e+00
1096 %add1 = fadd fast float %0, %add
1097 %arrayidx3 = getelementptr inbounds float, float* %x, i64 1
1098 %1 = load float, float* %arrayidx3, align 4
1099 %add4 = fadd fast float %1, %add1
1100 %add41 = fadd fast float %add4, 5.000000e+00
1101 %add5 = fadd fast float %add41, %conv
1102 %arrayidx3.1 = getelementptr inbounds float, float* %x, i64 2
1103 %2 = load float, float* %arrayidx3.1, align 4
1104 %add4.1 = fadd fast float %2, %add5
1105 %add4.11 = fadd fast float %add4.1, 5.000000e+00
1106 %arrayidx3.2 = getelementptr inbounds float, float* %x, i64 3
1107 %3 = load float, float* %arrayidx3.2, align 4
1108 %add4.2 = fadd fast float %3, %add4.11
1109 %arrayidx3.3 = getelementptr inbounds float, float* %x, i64 4
1110 %4 = load float, float* %arrayidx3.3, align 4
1111 %add4.3 = fadd fast float %4, %add4.2
1112 %arrayidx3.4 = getelementptr inbounds float, float* %x, i64 5
1113 %5 = load float, float* %arrayidx3.4, align 4
1114 %add4.4 = fadd fast float %5, %add4.3
1115 %arrayidx3.5 = getelementptr inbounds float, float* %x, i64 6
1116 %6 = load float, float* %arrayidx3.5, align 4
1117 %add4.5 = fadd fast float %6, %add4.4
1118 %arrayidx3.6 = getelementptr inbounds float, float* %x, i64 7
1119 %7 = load float, float* %arrayidx3.6, align 4
1120 %add4.6 = fadd fast float %7, %add4.5
1124 define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b, i32 %c) {
1125 ; CHECK-LABEL: @extra_args_no_replace(
1126 ; CHECK-NEXT: entry:
1127 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
1128 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
1129 ; CHECK-NEXT: [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float
1130 ; CHECK-NEXT: [[ADDC:%.*]] = fadd fast float [[CONVC]], 3.000000e+00
1131 ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], [[ADDC]]
1132 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
1133 ; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
1134 ; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
1135 ; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
1136 ; CHECK-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
1137 ; CHECK-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
1138 ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
1139 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
1140 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
1141 ; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]])
1142 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
1143 ; CHECK-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]]
1144 ; CHECK-NEXT: ret float [[OP_EXTRA1]]
1146 ; THRESHOLD-LABEL: @extra_args_no_replace(
1147 ; THRESHOLD-NEXT: entry:
1148 ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
1149 ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
1150 ; THRESHOLD-NEXT: [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float
1151 ; THRESHOLD-NEXT: [[ADDC:%.*]] = fadd fast float [[CONVC]], 3.000000e+00
1152 ; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], [[ADDC]]
1153 ; THRESHOLD-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
1154 ; THRESHOLD-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
1155 ; THRESHOLD-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
1156 ; THRESHOLD-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
1157 ; THRESHOLD-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
1158 ; THRESHOLD-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
1159 ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
1160 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
1161 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
1162 ; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]])
1163 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
1164 ; THRESHOLD-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]]
1165 ; THRESHOLD-NEXT: ret float [[OP_EXTRA1]]
1168 %mul = mul nsw i32 %b, %a
1169 %conv = sitofp i32 %mul to float
1170 %0 = load float, float* %x, align 4
1171 %convc = sitofp i32 %c to float
1172 %addc = fadd fast float %convc, 3.000000e+00
1173 %add = fadd fast float %conv, %addc
1174 %add1 = fadd fast float %0, %add
1175 %arrayidx3 = getelementptr inbounds float, float* %x, i64 1
1176 %1 = load float, float* %arrayidx3, align 4
1177 %add4 = fadd fast float %1, %add1
1178 %arrayidx3.1 = getelementptr inbounds float, float* %x, i64 2
1179 %2 = load float, float* %arrayidx3.1, align 4
1180 %add4.1 = fadd fast float %2, %add4
1181 %arrayidx3.2 = getelementptr inbounds float, float* %x, i64 3
1182 %3 = load float, float* %arrayidx3.2, align 4
1183 %add4.2 = fadd fast float %3, %add4.1
1184 %arrayidx3.3 = getelementptr inbounds float, float* %x, i64 4
1185 %4 = load float, float* %arrayidx3.3, align 4
1186 %add4.3 = fadd fast float %4, %add4.2
1187 %add5 = fadd fast float %add4.3, %conv
1188 %arrayidx3.4 = getelementptr inbounds float, float* %x, i64 5
1189 %5 = load float, float* %arrayidx3.4, align 4
1190 %add4.4 = fadd fast float %5, %add5
1191 %arrayidx3.5 = getelementptr inbounds float, float* %x, i64 6
1192 %6 = load float, float* %arrayidx3.5, align 4
1193 %add4.5 = fadd fast float %6, %add4.4
1194 %arrayidx3.6 = getelementptr inbounds float, float* %x, i64 7
1195 %7 = load float, float* %arrayidx3.6, align 4
1196 %add4.6 = fadd fast float %7, %add4.5
1200 define float @extra_args_no_fast(float* %x, float %a, float %b) {
1201 ; CHECK-LABEL: @extra_args_no_fast(
1202 ; CHECK-NEXT: [[ADDC:%.*]] = fadd fast float [[B:%.*]], 3.000000e+00
1203 ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[A:%.*]], [[ADDC]]
1204 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
1205 ; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
1206 ; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
1207 ; CHECK-NEXT: [[T0:%.*]] = load float, float* [[X]], align 4
1208 ; CHECK-NEXT: [[T1:%.*]] = load float, float* [[ARRAYIDX3]], align 4
1209 ; CHECK-NEXT: [[T2:%.*]] = load float, float* [[ARRAYIDX3_1]], align 4
1210 ; CHECK-NEXT: [[T3:%.*]] = load float, float* [[ARRAYIDX3_2]], align 4
1211 ; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[T0]], [[ADD]]
1212 ; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float [[T1]], [[ADD1]]
1213 ; CHECK-NEXT: [[ADD4_1:%.*]] = fadd float [[T2]], [[ADD4]]
1214 ; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float [[T3]], [[ADD4_1]]
1215 ; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_2]], [[A]]
1216 ; CHECK-NEXT: ret float [[ADD5]]
1218 ; THRESHOLD-LABEL: @extra_args_no_fast(
1219 ; THRESHOLD-NEXT: [[ADDC:%.*]] = fadd fast float [[B:%.*]], 3.000000e+00
1220 ; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[A:%.*]], [[ADDC]]
1221 ; THRESHOLD-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
1222 ; THRESHOLD-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
1223 ; THRESHOLD-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
1224 ; THRESHOLD-NEXT: [[T0:%.*]] = load float, float* [[X]], align 4
1225 ; THRESHOLD-NEXT: [[T1:%.*]] = load float, float* [[ARRAYIDX3]], align 4
1226 ; THRESHOLD-NEXT: [[T2:%.*]] = load float, float* [[ARRAYIDX3_1]], align 4
1227 ; THRESHOLD-NEXT: [[T3:%.*]] = load float, float* [[ARRAYIDX3_2]], align 4
1228 ; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float [[T0]], [[ADD]]
1229 ; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float [[T1]], [[ADD1]]
1230 ; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd float [[T2]], [[ADD4]]
1231 ; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float [[T3]], [[ADD4_1]]
1232 ; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_2]], [[A]]
1233 ; THRESHOLD-NEXT: ret float [[ADD5]]
1235 %addc = fadd fast float %b, 3.0
1236 %add = fadd fast float %a, %addc
1237 %arrayidx3 = getelementptr inbounds float, float* %x, i64 1
1238 %arrayidx3.1 = getelementptr inbounds float, float* %x, i64 2
1239 %arrayidx3.2 = getelementptr inbounds float, float* %x, i64 3
1240 %t0 = load float, float* %x, align 4
1241 %t1 = load float, float* %arrayidx3, align 4
1242 %t2 = load float, float* %arrayidx3.1, align 4
1243 %t3 = load float, float* %arrayidx3.2, align 4
1244 %add1 = fadd fast float %t0, %add
1245 %add4 = fadd fast float %t1, %add1
1246 %add4.1 = fadd float %t2, %add4 ; this is not a reduction candidate
1247 %add4.2 = fadd fast float %t3, %add4.1
1248 %add5 = fadd fast float %add4.2, %a
1252 define i32 @wobble(i32 %arg, i32 %bar) {
1253 ; CHECK-LABEL: @wobble(
1255 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[ARG:%.*]], i32 0
1256 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
1257 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[BAR:%.*]], i32 0
1258 ; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
1259 ; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[SHUFFLE]], [[SHUFFLE1]]
1260 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
1261 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer
1262 ; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i32>
1263 ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
1264 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP6]], [[ARG]]
1265 ; CHECK-NEXT: [[OP_EXTRA2:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP3]]
1266 ; CHECK-NEXT: ret i32 [[OP_EXTRA2]]
1268 ; THRESHOLD-LABEL: @wobble(
1269 ; THRESHOLD-NEXT: bb:
1270 ; THRESHOLD-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[ARG:%.*]], i32 0
1271 ; THRESHOLD-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
1272 ; THRESHOLD-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[BAR:%.*]], i32 0
1273 ; THRESHOLD-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
1274 ; THRESHOLD-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[SHUFFLE]], [[SHUFFLE1]]
1275 ; THRESHOLD-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
1276 ; THRESHOLD-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer
1277 ; THRESHOLD-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i32>
1278 ; THRESHOLD-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
1279 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP6]], [[ARG]]
1280 ; THRESHOLD-NEXT: [[OP_EXTRA2:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP3]]
1281 ; THRESHOLD-NEXT: ret i32 [[OP_EXTRA2]]
1284 %x1 = xor i32 %arg, %bar
1285 %i1 = icmp eq i32 %x1, 0
1286 %s1 = sext i1 %i1 to i32
1287 %x2 = xor i32 %arg, %bar
1288 %i2 = icmp eq i32 %x2, 0
1289 %s2 = sext i1 %i2 to i32
1290 %x3 = xor i32 %arg, %bar
1291 %i3 = icmp eq i32 %x3, 0
1292 %s3 = sext i1 %i3 to i32
1293 %x4 = xor i32 %arg, %bar
1294 %i4 = icmp eq i32 %x4, 0
1295 %s4 = sext i1 %i4 to i32
1296 %r1 = add nuw i32 %arg, %s1
1297 %r2 = add nsw i32 %r1, %s2
1298 %r3 = add nsw i32 %r2, %s3
1299 %r4 = add nsw i32 %r3, %s4
1300 %r5 = add nsw i32 %r4, %x4