1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -passes=slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 | FileCheck %s
3 ; RUN: opt -passes=slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -slp-threshold=-10 | FileCheck %s --check-prefix=THRESHOLD
5 @n = external local_unnamed_addr global i32, align 4
6 @arr = common local_unnamed_addr global [20 x float] zeroinitializer, align 16
7 @arr1 = common local_unnamed_addr global [20 x float] zeroinitializer, align 16
8 @res = external local_unnamed_addr global float, align 4
13 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @n, align 4
14 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
15 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
16 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr @arr, align 16
17 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr @arr1, align 16
18 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
19 ; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]])
20 ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], 2.000000e+00
21 ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[CONV]], 2.000000e+00
22 ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP5]], [[TMP6]]
23 ; CHECK-NEXT: store float [[OP_RDX]], ptr @res, align 4
24 ; CHECK-NEXT: ret float [[OP_RDX]]
26 ; THRESHOLD-LABEL: @baz(
27 ; THRESHOLD-NEXT: entry:
28 ; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, ptr @n, align 4
29 ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
30 ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
31 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr @arr, align 16
32 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr @arr1, align 16
33 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
34 ; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]])
35 ; THRESHOLD-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i32 0
36 ; THRESHOLD-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[CONV]], i32 1
37 ; THRESHOLD-NEXT: [[TMP7:%.*]] = fmul fast <2 x float> [[TMP6]], <float 2.000000e+00, float 2.000000e+00>
38 ; THRESHOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0
39 ; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP7]], i32 1
40 ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[TMP9]]
41 ; THRESHOLD-NEXT: store float [[OP_RDX]], ptr @res, align 4
42 ; THRESHOLD-NEXT: ret float [[OP_RDX]]
45 %0 = load i32, ptr @n, align 4
46 %mul = mul nsw i32 %0, 3
47 %conv = sitofp i32 %mul to float
48 %1 = load float, ptr @arr, align 16
49 %2 = load float, ptr @arr1, align 16
50 %mul4 = fmul fast float %2, %1
51 %add = fadd fast float %mul4, %conv
52 %3 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 1), align 4
53 %4 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 1), align 4
54 %mul4.1 = fmul fast float %4, %3
55 %add.1 = fadd fast float %mul4.1, %add
56 %5 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8
57 %6 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8
58 %mul4.2 = fmul fast float %6, %5
59 %add.2 = fadd fast float %mul4.2, %add.1
60 %7 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4
61 %8 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4
62 %mul4.3 = fmul fast float %8, %7
63 %add.3 = fadd fast float %mul4.3, %add.2
64 %add7 = fadd fast float %add.3, %conv
65 %add19 = fadd fast float %mul4, %add7
66 %add19.1 = fadd fast float %mul4.1, %add19
67 %add19.2 = fadd fast float %mul4.2, %add19.1
68 %add19.3 = fadd fast float %mul4.3, %add19.2
69 store float %add19.3, ptr @res, align 4
73 define float @bazz() {
76 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @n, align 4
77 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
78 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
79 ; CHECK-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
80 ; CHECK-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
81 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr @arr, align 16
82 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, ptr @arr1, align 16
83 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]]
84 ; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP3]])
85 ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[CONV]]
86 ; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV6]]
87 ; CHECK-NEXT: store float [[OP_RDX1]], ptr @res, align 4
88 ; CHECK-NEXT: ret float [[OP_RDX1]]
90 ; THRESHOLD-LABEL: @bazz(
91 ; THRESHOLD-NEXT: entry:
92 ; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, ptr @n, align 4
93 ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
94 ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
95 ; THRESHOLD-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
96 ; THRESHOLD-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
97 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr @arr, align 16
98 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <8 x float>, ptr @arr1, align 16
99 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]]
100 ; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP3]])
101 ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[CONV]]
102 ; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV6]]
103 ; THRESHOLD-NEXT: store float [[OP_RDX1]], ptr @res, align 4
104 ; THRESHOLD-NEXT: ret float [[OP_RDX1]]
107 %0 = load i32, ptr @n, align 4
108 %mul = mul nsw i32 %0, 3
109 %conv = sitofp i32 %mul to float
110 %1 = load float, ptr @arr, align 16
111 %2 = load float, ptr @arr1, align 16
112 %mul4 = fmul fast float %2, %1
113 %add = fadd fast float %mul4, %conv
114 %3 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 1), align 4
115 %4 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 1), align 4
116 %mul4.1 = fmul fast float %4, %3
117 %add.1 = fadd fast float %mul4.1, %add
118 %5 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8
119 %6 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8
120 %mul4.2 = fmul fast float %6, %5
121 %add.2 = fadd fast float %mul4.2, %add.1
122 %7 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4
123 %8 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4
124 %mul4.3 = fmul fast float %8, %7
125 %add.3 = fadd fast float %mul4.3, %add.2
126 %mul5 = shl nsw i32 %0, 2
127 %conv6 = sitofp i32 %mul5 to float
128 %add7 = fadd fast float %add.3, %conv6
129 %9 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 4), align 16
130 %10 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 4), align 16
131 %mul18 = fmul fast float %10, %9
132 %add19 = fadd fast float %mul18, %add7
133 %11 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 5), align 4
134 %12 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 5), align 4
135 %mul18.1 = fmul fast float %12, %11
136 %add19.1 = fadd fast float %mul18.1, %add19
137 %13 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 6), align 8
138 %14 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 6), align 8
139 %mul18.2 = fmul fast float %14, %13
140 %add19.2 = fadd fast float %mul18.2, %add19.1
141 %15 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 7), align 4
142 %16 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 7), align 4
143 %mul18.3 = fmul fast float %16, %15
144 %add19.3 = fadd fast float %mul18.3, %add19.2
145 store float %add19.3, ptr @res, align 4
149 define float @bazzz() {
150 ; CHECK-LABEL: @bazzz(
152 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @n, align 4
153 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
154 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr @arr, align 16
155 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr @arr1, align 16
156 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
157 ; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]])
158 ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
159 ; CHECK-NEXT: store float [[TMP5]], ptr @res, align 4
160 ; CHECK-NEXT: ret float [[TMP5]]
162 ; THRESHOLD-LABEL: @bazzz(
163 ; THRESHOLD-NEXT: entry:
164 ; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, ptr @n, align 4
165 ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
166 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr @arr, align 16
167 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr @arr1, align 16
168 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
169 ; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]])
170 ; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
171 ; THRESHOLD-NEXT: store float [[TMP5]], ptr @res, align 4
172 ; THRESHOLD-NEXT: ret float [[TMP5]]
175 %0 = load i32, ptr @n, align 4
176 %conv = sitofp i32 %0 to float
177 %1 = load float, ptr @arr, align 16
178 %2 = load float, ptr @arr1, align 16
179 %mul = fmul fast float %2, %1
180 %3 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 1), align 4
181 %4 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 1), align 4
182 %mul.1 = fmul fast float %4, %3
183 %5 = fadd fast float %mul.1, %mul
184 %6 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8
185 %7 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8
186 %mul.2 = fmul fast float %7, %6
187 %8 = fadd fast float %mul.2, %5
188 %9 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4
189 %10 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4
190 %mul.3 = fmul fast float %10, %9
191 %11 = fadd fast float %mul.3, %8
192 %12 = fmul fast float %conv, %11
193 store float %12, ptr @res, align 4
200 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @n, align 4
201 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
202 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr @arr, align 16
203 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr @arr1, align 16
204 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
205 ; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]])
206 ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
207 ; CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32
208 ; CHECK-NEXT: store i32 [[CONV4]], ptr @n, align 4
209 ; CHECK-NEXT: ret i32 [[CONV4]]
211 ; THRESHOLD-LABEL: @foo(
212 ; THRESHOLD-NEXT: entry:
213 ; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, ptr @n, align 4
214 ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
215 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr @arr, align 16
216 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr @arr1, align 16
217 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
218 ; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]])
219 ; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
220 ; THRESHOLD-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32
221 ; THRESHOLD-NEXT: store i32 [[CONV4]], ptr @n, align 4
222 ; THRESHOLD-NEXT: ret i32 [[CONV4]]
225 %0 = load i32, ptr @n, align 4
226 %conv = sitofp i32 %0 to float
227 %1 = load float, ptr @arr, align 16
228 %2 = load float, ptr @arr1, align 16
229 %mul = fmul fast float %2, %1
230 %3 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 1), align 4
231 %4 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 1), align 4
232 %mul.1 = fmul fast float %4, %3
233 %5 = fadd fast float %mul.1, %mul
234 %6 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8
235 %7 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8
236 %mul.2 = fmul fast float %7, %6
237 %8 = fadd fast float %mul.2, %5
238 %9 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4
239 %10 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4
240 %mul.3 = fmul fast float %10, %9
241 %11 = fadd fast float %mul.3, %8
242 %12 = fmul fast float %conv, %11
243 %conv4 = fptosi float %12 to i32
244 store i32 %conv4, ptr @n, align 4
248 ; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select
249 ; with fastmath on the select.
250 define float @bar() {
253 ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr @arr, align 16
254 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr @arr1, align 16
255 ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP0]]
256 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
257 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
258 ; CHECK-NEXT: [[CMP4:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
259 ; CHECK-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float [[TMP3]], float [[TMP4]]
260 ; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8
261 ; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8
262 ; CHECK-NEXT: [[MUL3_1:%.*]] = fmul fast float [[TMP6]], [[TMP5]]
263 ; CHECK-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], [[MUL3_1]]
264 ; CHECK-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float [[MUL3_1]]
265 ; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4
266 ; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4
267 ; CHECK-NEXT: [[MUL3_2:%.*]] = fmul fast float [[TMP8]], [[TMP7]]
268 ; CHECK-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], [[MUL3_2]]
269 ; CHECK-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float [[MUL3_2]]
270 ; CHECK-NEXT: store float [[MAX_0_MUL3_2]], ptr @res, align 4
271 ; CHECK-NEXT: ret float [[MAX_0_MUL3_2]]
273 ; THRESHOLD-LABEL: @bar(
274 ; THRESHOLD-NEXT: entry:
275 ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr @arr, align 16
276 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr @arr1, align 16
277 ; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP0]]
278 ; THRESHOLD-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
279 ; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
280 ; THRESHOLD-NEXT: [[CMP4:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
281 ; THRESHOLD-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float [[TMP3]], float [[TMP4]]
282 ; THRESHOLD-NEXT: [[TMP5:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8
283 ; THRESHOLD-NEXT: [[TMP6:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8
284 ; THRESHOLD-NEXT: [[MUL3_1:%.*]] = fmul fast float [[TMP6]], [[TMP5]]
285 ; THRESHOLD-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], [[MUL3_1]]
286 ; THRESHOLD-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float [[MUL3_1]]
287 ; THRESHOLD-NEXT: [[TMP7:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4
288 ; THRESHOLD-NEXT: [[TMP8:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4
289 ; THRESHOLD-NEXT: [[MUL3_2:%.*]] = fmul fast float [[TMP8]], [[TMP7]]
290 ; THRESHOLD-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], [[MUL3_2]]
291 ; THRESHOLD-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float [[MUL3_2]]
292 ; THRESHOLD-NEXT: store float [[MAX_0_MUL3_2]], ptr @res, align 4
293 ; THRESHOLD-NEXT: ret float [[MAX_0_MUL3_2]]
296 %0 = load float, ptr @arr, align 16
297 %1 = load float, ptr @arr1, align 16
298 %mul = fmul fast float %1, %0
299 %2 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 1), align 4
300 %3 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 1), align 4
301 %mul3 = fmul fast float %3, %2
302 %cmp4 = fcmp fast ogt float %mul, %mul3
303 %max.0.mul3 = select i1 %cmp4, float %mul, float %mul3
304 %4 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8
305 %5 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8
306 %mul3.1 = fmul fast float %5, %4
307 %cmp4.1 = fcmp fast ogt float %max.0.mul3, %mul3.1
308 %max.0.mul3.1 = select i1 %cmp4.1, float %max.0.mul3, float %mul3.1
309 %6 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4
310 %7 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4
311 %mul3.2 = fmul fast float %7, %6
312 %cmp4.2 = fcmp fast ogt float %max.0.mul3.1, %mul3.2
313 %max.0.mul3.2 = select i1 %cmp4.2, float %max.0.mul3.1, float %mul3.2
314 store float %max.0.mul3.2, ptr @res, align 4
315 ret float %max.0.mul3.2
318 define float @f(ptr nocapture readonly %x) {
321 ; CHECK-NEXT: [[TMP0:%.*]] = load <32 x float>, ptr [[X:%.*]], align 4
322 ; CHECK-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, ptr [[X]], i64 32
323 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[ARRAYIDX_32]], align 4
324 ; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP0]])
325 ; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP1]])
326 ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[TMP3]]
327 ; CHECK-NEXT: ret float [[OP_RDX]]
329 ; THRESHOLD-LABEL: @f(
330 ; THRESHOLD-NEXT: entry:
331 ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <32 x float>, ptr [[X:%.*]], align 4
332 ; THRESHOLD-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, ptr [[X]], i64 32
333 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[ARRAYIDX_32]], align 4
334 ; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP0]])
335 ; THRESHOLD-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP1]])
336 ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[TMP3]]
337 ; THRESHOLD-NEXT: ret float [[OP_RDX]]
340 %0 = load float, ptr %x, align 4
341 %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1
342 %1 = load float, ptr %arrayidx.1, align 4
343 %add.1 = fadd fast float %1, %0
344 %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2
345 %2 = load float, ptr %arrayidx.2, align 4
346 %add.2 = fadd fast float %2, %add.1
347 %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3
348 %3 = load float, ptr %arrayidx.3, align 4
349 %add.3 = fadd fast float %3, %add.2
350 %arrayidx.4 = getelementptr inbounds float, ptr %x, i64 4
351 %4 = load float, ptr %arrayidx.4, align 4
352 %add.4 = fadd fast float %4, %add.3
353 %arrayidx.5 = getelementptr inbounds float, ptr %x, i64 5
354 %5 = load float, ptr %arrayidx.5, align 4
355 %add.5 = fadd fast float %5, %add.4
356 %arrayidx.6 = getelementptr inbounds float, ptr %x, i64 6
357 %6 = load float, ptr %arrayidx.6, align 4
358 %add.6 = fadd fast float %6, %add.5
359 %arrayidx.7 = getelementptr inbounds float, ptr %x, i64 7
360 %7 = load float, ptr %arrayidx.7, align 4
361 %add.7 = fadd fast float %7, %add.6
362 %arrayidx.8 = getelementptr inbounds float, ptr %x, i64 8
363 %8 = load float, ptr %arrayidx.8, align 4
364 %add.8 = fadd fast float %8, %add.7
365 %arrayidx.9 = getelementptr inbounds float, ptr %x, i64 9
366 %9 = load float, ptr %arrayidx.9, align 4
367 %add.9 = fadd fast float %9, %add.8
368 %arrayidx.10 = getelementptr inbounds float, ptr %x, i64 10
369 %10 = load float, ptr %arrayidx.10, align 4
370 %add.10 = fadd fast float %10, %add.9
371 %arrayidx.11 = getelementptr inbounds float, ptr %x, i64 11
372 %11 = load float, ptr %arrayidx.11, align 4
373 %add.11 = fadd fast float %11, %add.10
374 %arrayidx.12 = getelementptr inbounds float, ptr %x, i64 12
375 %12 = load float, ptr %arrayidx.12, align 4
376 %add.12 = fadd fast float %12, %add.11
377 %arrayidx.13 = getelementptr inbounds float, ptr %x, i64 13
378 %13 = load float, ptr %arrayidx.13, align 4
379 %add.13 = fadd fast float %13, %add.12
380 %arrayidx.14 = getelementptr inbounds float, ptr %x, i64 14
381 %14 = load float, ptr %arrayidx.14, align 4
382 %add.14 = fadd fast float %14, %add.13
383 %arrayidx.15 = getelementptr inbounds float, ptr %x, i64 15
384 %15 = load float, ptr %arrayidx.15, align 4
385 %add.15 = fadd fast float %15, %add.14
386 %arrayidx.16 = getelementptr inbounds float, ptr %x, i64 16
387 %16 = load float, ptr %arrayidx.16, align 4
388 %add.16 = fadd fast float %16, %add.15
389 %arrayidx.17 = getelementptr inbounds float, ptr %x, i64 17
390 %17 = load float, ptr %arrayidx.17, align 4
391 %add.17 = fadd fast float %17, %add.16
392 %arrayidx.18 = getelementptr inbounds float, ptr %x, i64 18
393 %18 = load float, ptr %arrayidx.18, align 4
394 %add.18 = fadd fast float %18, %add.17
395 %arrayidx.19 = getelementptr inbounds float, ptr %x, i64 19
396 %19 = load float, ptr %arrayidx.19, align 4
397 %add.19 = fadd fast float %19, %add.18
398 %arrayidx.20 = getelementptr inbounds float, ptr %x, i64 20
399 %20 = load float, ptr %arrayidx.20, align 4
400 %add.20 = fadd fast float %20, %add.19
401 %arrayidx.21 = getelementptr inbounds float, ptr %x, i64 21
402 %21 = load float, ptr %arrayidx.21, align 4
403 %add.21 = fadd fast float %21, %add.20
404 %arrayidx.22 = getelementptr inbounds float, ptr %x, i64 22
405 %22 = load float, ptr %arrayidx.22, align 4
406 %add.22 = fadd fast float %22, %add.21
407 %arrayidx.23 = getelementptr inbounds float, ptr %x, i64 23
408 %23 = load float, ptr %arrayidx.23, align 4
409 %add.23 = fadd fast float %23, %add.22
410 %arrayidx.24 = getelementptr inbounds float, ptr %x, i64 24
411 %24 = load float, ptr %arrayidx.24, align 4
412 %add.24 = fadd fast float %24, %add.23
413 %arrayidx.25 = getelementptr inbounds float, ptr %x, i64 25
414 %25 = load float, ptr %arrayidx.25, align 4
415 %add.25 = fadd fast float %25, %add.24
416 %arrayidx.26 = getelementptr inbounds float, ptr %x, i64 26
417 %26 = load float, ptr %arrayidx.26, align 4
418 %add.26 = fadd fast float %26, %add.25
419 %arrayidx.27 = getelementptr inbounds float, ptr %x, i64 27
420 %27 = load float, ptr %arrayidx.27, align 4
421 %add.27 = fadd fast float %27, %add.26
422 %arrayidx.28 = getelementptr inbounds float, ptr %x, i64 28
423 %28 = load float, ptr %arrayidx.28, align 4
424 %add.28 = fadd fast float %28, %add.27
425 %arrayidx.29 = getelementptr inbounds float, ptr %x, i64 29
426 %29 = load float, ptr %arrayidx.29, align 4
427 %add.29 = fadd fast float %29, %add.28
428 %arrayidx.30 = getelementptr inbounds float, ptr %x, i64 30
429 %30 = load float, ptr %arrayidx.30, align 4
430 %add.30 = fadd fast float %30, %add.29
431 %arrayidx.31 = getelementptr inbounds float, ptr %x, i64 31
432 %31 = load float, ptr %arrayidx.31, align 4
433 %add.31 = fadd fast float %31, %add.30
434 %arrayidx.32 = getelementptr inbounds float, ptr %x, i64 32
435 %32 = load float, ptr %arrayidx.32, align 4
436 %add.32 = fadd fast float %32, %add.31
437 %arrayidx.33 = getelementptr inbounds float, ptr %x, i64 33
438 %33 = load float, ptr %arrayidx.33, align 4
439 %add.33 = fadd fast float %33, %add.32
440 %arrayidx.34 = getelementptr inbounds float, ptr %x, i64 34
441 %34 = load float, ptr %arrayidx.34, align 4
442 %add.34 = fadd fast float %34, %add.33
443 %arrayidx.35 = getelementptr inbounds float, ptr %x, i64 35
444 %35 = load float, ptr %arrayidx.35, align 4
445 %add.35 = fadd fast float %35, %add.34
446 %arrayidx.36 = getelementptr inbounds float, ptr %x, i64 36
447 %36 = load float, ptr %arrayidx.36, align 4
448 %add.36 = fadd fast float %36, %add.35
449 %arrayidx.37 = getelementptr inbounds float, ptr %x, i64 37
450 %37 = load float, ptr %arrayidx.37, align 4
451 %add.37 = fadd fast float %37, %add.36
452 %arrayidx.38 = getelementptr inbounds float, ptr %x, i64 38
453 %38 = load float, ptr %arrayidx.38, align 4
454 %add.38 = fadd fast float %38, %add.37
455 %arrayidx.39 = getelementptr inbounds float, ptr %x, i64 39
456 %39 = load float, ptr %arrayidx.39, align 4
457 %add.39 = fadd fast float %39, %add.38
458 %arrayidx.40 = getelementptr inbounds float, ptr %x, i64 40
459 %40 = load float, ptr %arrayidx.40, align 4
460 %add.40 = fadd fast float %40, %add.39
461 %arrayidx.41 = getelementptr inbounds float, ptr %x, i64 41
462 %41 = load float, ptr %arrayidx.41, align 4
463 %add.41 = fadd fast float %41, %add.40
464 %arrayidx.42 = getelementptr inbounds float, ptr %x, i64 42
465 %42 = load float, ptr %arrayidx.42, align 4
466 %add.42 = fadd fast float %42, %add.41
467 %arrayidx.43 = getelementptr inbounds float, ptr %x, i64 43
468 %43 = load float, ptr %arrayidx.43, align 4
469 %add.43 = fadd fast float %43, %add.42
470 %arrayidx.44 = getelementptr inbounds float, ptr %x, i64 44
471 %44 = load float, ptr %arrayidx.44, align 4
472 %add.44 = fadd fast float %44, %add.43
473 %arrayidx.45 = getelementptr inbounds float, ptr %x, i64 45
474 %45 = load float, ptr %arrayidx.45, align 4
475 %add.45 = fadd fast float %45, %add.44
476 %arrayidx.46 = getelementptr inbounds float, ptr %x, i64 46
477 %46 = load float, ptr %arrayidx.46, align 4
478 %add.46 = fadd fast float %46, %add.45
479 %arrayidx.47 = getelementptr inbounds float, ptr %x, i64 47
480 %47 = load float, ptr %arrayidx.47, align 4
481 %add.47 = fadd fast float %47, %add.46
485 define float @f1(ptr nocapture readonly %x, i32 %a, i32 %b) {
488 ; CHECK-NEXT: [[REM:%.*]] = srem i32 [[A:%.*]], [[B:%.*]]
489 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[REM]] to float
490 ; CHECK-NEXT: [[TMP0:%.*]] = load <32 x float>, ptr [[X:%.*]], align 4
491 ; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP0]])
492 ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], [[CONV]]
493 ; CHECK-NEXT: ret float [[OP_RDX]]
495 ; THRESHOLD-LABEL: @f1(
496 ; THRESHOLD-NEXT: entry:
497 ; THRESHOLD-NEXT: [[REM:%.*]] = srem i32 [[A:%.*]], [[B:%.*]]
498 ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[REM]] to float
499 ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <32 x float>, ptr [[X:%.*]], align 4
500 ; THRESHOLD-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP0]])
501 ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], [[CONV]]
502 ; THRESHOLD-NEXT: ret float [[OP_RDX]]
505 %rem = srem i32 %a, %b
506 %conv = sitofp i32 %rem to float
507 %0 = load float, ptr %x, align 4
508 %add = fadd fast float %0, %conv
509 %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1
510 %1 = load float, ptr %arrayidx.1, align 4
511 %add.1 = fadd fast float %1, %add
512 %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2
513 %2 = load float, ptr %arrayidx.2, align 4
514 %add.2 = fadd fast float %2, %add.1
515 %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3
516 %3 = load float, ptr %arrayidx.3, align 4
517 %add.3 = fadd fast float %3, %add.2
518 %arrayidx.4 = getelementptr inbounds float, ptr %x, i64 4
519 %4 = load float, ptr %arrayidx.4, align 4
520 %add.4 = fadd fast float %4, %add.3
521 %arrayidx.5 = getelementptr inbounds float, ptr %x, i64 5
522 %5 = load float, ptr %arrayidx.5, align 4
523 %add.5 = fadd fast float %5, %add.4
524 %arrayidx.6 = getelementptr inbounds float, ptr %x, i64 6
525 %6 = load float, ptr %arrayidx.6, align 4
526 %add.6 = fadd fast float %6, %add.5
527 %arrayidx.7 = getelementptr inbounds float, ptr %x, i64 7
528 %7 = load float, ptr %arrayidx.7, align 4
529 %add.7 = fadd fast float %7, %add.6
530 %arrayidx.8 = getelementptr inbounds float, ptr %x, i64 8
531 %8 = load float, ptr %arrayidx.8, align 4
532 %add.8 = fadd fast float %8, %add.7
533 %arrayidx.9 = getelementptr inbounds float, ptr %x, i64 9
534 %9 = load float, ptr %arrayidx.9, align 4
535 %add.9 = fadd fast float %9, %add.8
536 %arrayidx.10 = getelementptr inbounds float, ptr %x, i64 10
537 %10 = load float, ptr %arrayidx.10, align 4
538 %add.10 = fadd fast float %10, %add.9
539 %arrayidx.11 = getelementptr inbounds float, ptr %x, i64 11
540 %11 = load float, ptr %arrayidx.11, align 4
541 %add.11 = fadd fast float %11, %add.10
542 %arrayidx.12 = getelementptr inbounds float, ptr %x, i64 12
543 %12 = load float, ptr %arrayidx.12, align 4
544 %add.12 = fadd fast float %12, %add.11
545 %arrayidx.13 = getelementptr inbounds float, ptr %x, i64 13
546 %13 = load float, ptr %arrayidx.13, align 4
547 %add.13 = fadd fast float %13, %add.12
548 %arrayidx.14 = getelementptr inbounds float, ptr %x, i64 14
549 %14 = load float, ptr %arrayidx.14, align 4
550 %add.14 = fadd fast float %14, %add.13
551 %arrayidx.15 = getelementptr inbounds float, ptr %x, i64 15
552 %15 = load float, ptr %arrayidx.15, align 4
553 %add.15 = fadd fast float %15, %add.14
554 %arrayidx.16 = getelementptr inbounds float, ptr %x, i64 16
555 %16 = load float, ptr %arrayidx.16, align 4
556 %add.16 = fadd fast float %16, %add.15
557 %arrayidx.17 = getelementptr inbounds float, ptr %x, i64 17
558 %17 = load float, ptr %arrayidx.17, align 4
559 %add.17 = fadd fast float %17, %add.16
560 %arrayidx.18 = getelementptr inbounds float, ptr %x, i64 18
561 %18 = load float, ptr %arrayidx.18, align 4
562 %add.18 = fadd fast float %18, %add.17
563 %arrayidx.19 = getelementptr inbounds float, ptr %x, i64 19
564 %19 = load float, ptr %arrayidx.19, align 4
565 %add.19 = fadd fast float %19, %add.18
566 %arrayidx.20 = getelementptr inbounds float, ptr %x, i64 20
567 %20 = load float, ptr %arrayidx.20, align 4
568 %add.20 = fadd fast float %20, %add.19
569 %arrayidx.21 = getelementptr inbounds float, ptr %x, i64 21
570 %21 = load float, ptr %arrayidx.21, align 4
571 %add.21 = fadd fast float %21, %add.20
572 %arrayidx.22 = getelementptr inbounds float, ptr %x, i64 22
573 %22 = load float, ptr %arrayidx.22, align 4
574 %add.22 = fadd fast float %22, %add.21
575 %arrayidx.23 = getelementptr inbounds float, ptr %x, i64 23
576 %23 = load float, ptr %arrayidx.23, align 4
577 %add.23 = fadd fast float %23, %add.22
578 %arrayidx.24 = getelementptr inbounds float, ptr %x, i64 24
579 %24 = load float, ptr %arrayidx.24, align 4
580 %add.24 = fadd fast float %24, %add.23
581 %arrayidx.25 = getelementptr inbounds float, ptr %x, i64 25
582 %25 = load float, ptr %arrayidx.25, align 4
583 %add.25 = fadd fast float %25, %add.24
584 %arrayidx.26 = getelementptr inbounds float, ptr %x, i64 26
585 %26 = load float, ptr %arrayidx.26, align 4
586 %add.26 = fadd fast float %26, %add.25
587 %arrayidx.27 = getelementptr inbounds float, ptr %x, i64 27
588 %27 = load float, ptr %arrayidx.27, align 4
589 %add.27 = fadd fast float %27, %add.26
590 %arrayidx.28 = getelementptr inbounds float, ptr %x, i64 28
591 %28 = load float, ptr %arrayidx.28, align 4
592 %add.28 = fadd fast float %28, %add.27
593 %arrayidx.29 = getelementptr inbounds float, ptr %x, i64 29
594 %29 = load float, ptr %arrayidx.29, align 4
595 %add.29 = fadd fast float %29, %add.28
596 %arrayidx.30 = getelementptr inbounds float, ptr %x, i64 30
597 %30 = load float, ptr %arrayidx.30, align 4
598 %add.30 = fadd fast float %30, %add.29
599 %arrayidx.31 = getelementptr inbounds float, ptr %x, i64 31
600 %31 = load float, ptr %arrayidx.31, align 4
601 %add.31 = fadd fast float %31, %add.30
605 define float @loadadd31(ptr nocapture readonly %x) {
606 ; CHECK-LABEL: @loadadd31(
608 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 1
609 ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[ARRAYIDX]], align 4
610 ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, ptr [[X]], i64 17
611 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[ARRAYIDX_16]], align 4
612 ; CHECK-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, ptr [[X]], i64 25
613 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[ARRAYIDX_24]], align 4
614 ; CHECK-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, ptr [[X]], i64 29
615 ; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX_28]], align 4
616 ; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, ptr [[X]], i64 30
617 ; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX_29]], align 4
618 ; CHECK-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP0]])
619 ; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]])
620 ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP5]], [[TMP6]]
621 ; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]])
622 ; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP7]]
623 ; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[TMP3]]
624 ; CHECK-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], [[TMP4]]
625 ; CHECK-NEXT: ret float [[OP_RDX3]]
627 ; THRESHOLD-LABEL: @loadadd31(
628 ; THRESHOLD-NEXT: entry:
629 ; THRESHOLD-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 1
630 ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[ARRAYIDX]], align 4
631 ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, ptr [[X]], i64 17
632 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[ARRAYIDX_16]], align 4
633 ; THRESHOLD-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, ptr [[X]], i64 25
634 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[ARRAYIDX_24]], align 4
635 ; THRESHOLD-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, ptr [[X]], i64 29
636 ; THRESHOLD-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX_28]], align 4
637 ; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, ptr [[X]], i64 30
638 ; THRESHOLD-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX_29]], align 4
639 ; THRESHOLD-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP0]])
640 ; THRESHOLD-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]])
641 ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP5]], [[TMP6]]
642 ; THRESHOLD-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]])
643 ; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP7]]
644 ; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[TMP3]]
645 ; THRESHOLD-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], [[TMP4]]
646 ; THRESHOLD-NEXT: ret float [[OP_RDX3]]
649 %arrayidx = getelementptr inbounds float, ptr %x, i64 1
650 %0 = load float, ptr %arrayidx, align 4
651 %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 2
652 %1 = load float, ptr %arrayidx.1, align 4
653 %add.1 = fadd fast float %1, %0
654 %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 3
655 %2 = load float, ptr %arrayidx.2, align 4
656 %add.2 = fadd fast float %2, %add.1
657 %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 4
658 %3 = load float, ptr %arrayidx.3, align 4
659 %add.3 = fadd fast float %3, %add.2
660 %arrayidx.4 = getelementptr inbounds float, ptr %x, i64 5
661 %4 = load float, ptr %arrayidx.4, align 4
662 %add.4 = fadd fast float %4, %add.3
663 %arrayidx.5 = getelementptr inbounds float, ptr %x, i64 6
664 %5 = load float, ptr %arrayidx.5, align 4
665 %add.5 = fadd fast float %5, %add.4
666 %arrayidx.6 = getelementptr inbounds float, ptr %x, i64 7
667 %6 = load float, ptr %arrayidx.6, align 4
668 %add.6 = fadd fast float %6, %add.5
669 %arrayidx.7 = getelementptr inbounds float, ptr %x, i64 8
670 %7 = load float, ptr %arrayidx.7, align 4
671 %add.7 = fadd fast float %7, %add.6
672 %arrayidx.8 = getelementptr inbounds float, ptr %x, i64 9
673 %8 = load float, ptr %arrayidx.8, align 4
674 %add.8 = fadd fast float %8, %add.7
675 %arrayidx.9 = getelementptr inbounds float, ptr %x, i64 10
676 %9 = load float, ptr %arrayidx.9, align 4
677 %add.9 = fadd fast float %9, %add.8
678 %arrayidx.10 = getelementptr inbounds float, ptr %x, i64 11
679 %10 = load float, ptr %arrayidx.10, align 4
680 %add.10 = fadd fast float %10, %add.9
681 %arrayidx.11 = getelementptr inbounds float, ptr %x, i64 12
682 %11 = load float, ptr %arrayidx.11, align 4
683 %add.11 = fadd fast float %11, %add.10
684 %arrayidx.12 = getelementptr inbounds float, ptr %x, i64 13
685 %12 = load float, ptr %arrayidx.12, align 4
686 %add.12 = fadd fast float %12, %add.11
687 %arrayidx.13 = getelementptr inbounds float, ptr %x, i64 14
688 %13 = load float, ptr %arrayidx.13, align 4
689 %add.13 = fadd fast float %13, %add.12
690 %arrayidx.14 = getelementptr inbounds float, ptr %x, i64 15
691 %14 = load float, ptr %arrayidx.14, align 4
692 %add.14 = fadd fast float %14, %add.13
693 %arrayidx.15 = getelementptr inbounds float, ptr %x, i64 16
694 %15 = load float, ptr %arrayidx.15, align 4
695 %add.15 = fadd fast float %15, %add.14
696 %arrayidx.16 = getelementptr inbounds float, ptr %x, i64 17
697 %16 = load float, ptr %arrayidx.16, align 4
698 %add.16 = fadd fast float %16, %add.15
699 %arrayidx.17 = getelementptr inbounds float, ptr %x, i64 18
700 %17 = load float, ptr %arrayidx.17, align 4
701 %add.17 = fadd fast float %17, %add.16
702 %arrayidx.18 = getelementptr inbounds float, ptr %x, i64 19
703 %18 = load float, ptr %arrayidx.18, align 4
704 %add.18 = fadd fast float %18, %add.17
705 %arrayidx.19 = getelementptr inbounds float, ptr %x, i64 20
706 %19 = load float, ptr %arrayidx.19, align 4
707 %add.19 = fadd fast float %19, %add.18
708 %arrayidx.20 = getelementptr inbounds float, ptr %x, i64 21
709 %20 = load float, ptr %arrayidx.20, align 4
710 %add.20 = fadd fast float %20, %add.19
711 %arrayidx.21 = getelementptr inbounds float, ptr %x, i64 22
712 %21 = load float, ptr %arrayidx.21, align 4
713 %add.21 = fadd fast float %21, %add.20
714 %arrayidx.22 = getelementptr inbounds float, ptr %x, i64 23
715 %22 = load float, ptr %arrayidx.22, align 4
716 %add.22 = fadd fast float %22, %add.21
717 %arrayidx.23 = getelementptr inbounds float, ptr %x, i64 24
718 %23 = load float, ptr %arrayidx.23, align 4
719 %add.23 = fadd fast float %23, %add.22
720 %arrayidx.24 = getelementptr inbounds float, ptr %x, i64 25
721 %24 = load float, ptr %arrayidx.24, align 4
722 %add.24 = fadd fast float %24, %add.23
723 %arrayidx.25 = getelementptr inbounds float, ptr %x, i64 26
724 %25 = load float, ptr %arrayidx.25, align 4
725 %add.25 = fadd fast float %25, %add.24
726 %arrayidx.26 = getelementptr inbounds float, ptr %x, i64 27
727 %26 = load float, ptr %arrayidx.26, align 4
728 %add.26 = fadd fast float %26, %add.25
729 %arrayidx.27 = getelementptr inbounds float, ptr %x, i64 28
730 %27 = load float, ptr %arrayidx.27, align 4
731 %add.27 = fadd fast float %27, %add.26
732 %arrayidx.28 = getelementptr inbounds float, ptr %x, i64 29
733 %28 = load float, ptr %arrayidx.28, align 4
734 %add.28 = fadd fast float %28, %add.27
735 %arrayidx.29 = getelementptr inbounds float, ptr %x, i64 30
736 %29 = load float, ptr %arrayidx.29, align 4
737 %add.29 = fadd fast float %29, %add.28
741 define float @extra_args(ptr nocapture readonly %x, i32 %a, i32 %b) {
742 ; CHECK-LABEL: @extra_args(
744 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
745 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
746 ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4
747 ; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP0]])
748 ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[CONV]], 2.000000e+00
749 ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], [[TMP2]]
750 ; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], 3.000000e+00
751 ; CHECK-NEXT: ret float [[OP_RDX1]]
753 ; THRESHOLD-LABEL: @extra_args(
754 ; THRESHOLD-NEXT: entry:
755 ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
756 ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
757 ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4
758 ; THRESHOLD-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP0]])
759 ; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast float [[CONV]], 2.000000e+00
760 ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], [[TMP2]]
761 ; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], 3.000000e+00
762 ; THRESHOLD-NEXT: ret float [[OP_RDX1]]
765 %mul = mul nsw i32 %b, %a
766 %conv = sitofp i32 %mul to float
767 %0 = load float, ptr %x, align 4
768 %add = fadd fast float %conv, 3.000000e+00
769 %add1 = fadd fast float %0, %add
770 %arrayidx3 = getelementptr inbounds float, ptr %x, i64 1
771 %1 = load float, ptr %arrayidx3, align 4
772 %add4 = fadd fast float %1, %add1
773 %add5 = fadd fast float %add4, %conv
774 %arrayidx3.1 = getelementptr inbounds float, ptr %x, i64 2
775 %2 = load float, ptr %arrayidx3.1, align 4
776 %add4.1 = fadd fast float %2, %add5
777 %arrayidx3.2 = getelementptr inbounds float, ptr %x, i64 3
778 %3 = load float, ptr %arrayidx3.2, align 4
779 %add4.2 = fadd fast float %3, %add4.1
780 %arrayidx3.3 = getelementptr inbounds float, ptr %x, i64 4
781 %4 = load float, ptr %arrayidx3.3, align 4
782 %add4.3 = fadd fast float %4, %add4.2
783 %arrayidx3.4 = getelementptr inbounds float, ptr %x, i64 5
784 %5 = load float, ptr %arrayidx3.4, align 4
785 %add4.4 = fadd fast float %5, %add4.3
786 %arrayidx3.5 = getelementptr inbounds float, ptr %x, i64 6
787 %6 = load float, ptr %arrayidx3.5, align 4
788 %add4.5 = fadd fast float %6, %add4.4
789 %arrayidx3.6 = getelementptr inbounds float, ptr %x, i64 7
790 %7 = load float, ptr %arrayidx3.6, align 4
791 %add4.6 = fadd fast float %7, %add4.5
795 define float @extra_args_same_several_times(ptr nocapture readonly %x, i32 %a, i32 %b) {
796 ; CHECK-LABEL: @extra_args_same_several_times(
798 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
799 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
800 ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4
801 ; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP0]])
802 ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], 1.300000e+01
803 ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[CONV]], 2.000000e+00
804 ; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP2]]
805 ; CHECK-NEXT: ret float [[OP_RDX1]]
807 ; THRESHOLD-LABEL: @extra_args_same_several_times(
808 ; THRESHOLD-NEXT: entry:
809 ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
810 ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
811 ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4
812 ; THRESHOLD-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP0]])
813 ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], 1.300000e+01
814 ; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast float [[CONV]], 2.000000e+00
815 ; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP2]]
816 ; THRESHOLD-NEXT: ret float [[OP_RDX1]]
819 %mul = mul nsw i32 %b, %a
820 %conv = sitofp i32 %mul to float
821 %0 = load float, ptr %x, align 4
822 %add = fadd fast float %conv, 3.000000e+00
823 %add1 = fadd fast float %0, %add
824 %arrayidx3 = getelementptr inbounds float, ptr %x, i64 1
825 %1 = load float, ptr %arrayidx3, align 4
826 %add4 = fadd fast float %1, %add1
827 %add41 = fadd fast float %add4, 5.000000e+00
828 %add5 = fadd fast float %add41, %conv
829 %arrayidx3.1 = getelementptr inbounds float, ptr %x, i64 2
830 %2 = load float, ptr %arrayidx3.1, align 4
831 %add4.1 = fadd fast float %2, %add5
832 %add4.11 = fadd fast float %add4.1, 5.000000e+00
833 %arrayidx3.2 = getelementptr inbounds float, ptr %x, i64 3
834 %3 = load float, ptr %arrayidx3.2, align 4
835 %add4.2 = fadd fast float %3, %add4.11
836 %arrayidx3.3 = getelementptr inbounds float, ptr %x, i64 4
837 %4 = load float, ptr %arrayidx3.3, align 4
838 %add4.3 = fadd fast float %4, %add4.2
839 %arrayidx3.4 = getelementptr inbounds float, ptr %x, i64 5
840 %5 = load float, ptr %arrayidx3.4, align 4
841 %add4.4 = fadd fast float %5, %add4.3
842 %arrayidx3.5 = getelementptr inbounds float, ptr %x, i64 6
843 %6 = load float, ptr %arrayidx3.5, align 4
844 %add4.5 = fadd fast float %6, %add4.4
845 %arrayidx3.6 = getelementptr inbounds float, ptr %x, i64 7
846 %7 = load float, ptr %arrayidx3.6, align 4
847 %add4.6 = fadd fast float %7, %add4.5
851 define float @extra_args_no_replace(ptr nocapture readonly %x, i32 %a, i32 %b, i32 %c) {
852 ; CHECK-LABEL: @extra_args_no_replace(
854 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
855 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
856 ; CHECK-NEXT: [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float
857 ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4
858 ; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP0]])
859 ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[CONV]], 2.000000e+00
860 ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], [[TMP2]]
861 ; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], 3.000000e+00
862 ; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[CONVC]]
863 ; CHECK-NEXT: ret float [[OP_RDX2]]
865 ; THRESHOLD-LABEL: @extra_args_no_replace(
866 ; THRESHOLD-NEXT: entry:
867 ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
868 ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
869 ; THRESHOLD-NEXT: [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float
870 ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4
871 ; THRESHOLD-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP0]])
872 ; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast float [[CONV]], 2.000000e+00
873 ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], [[TMP2]]
874 ; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], 3.000000e+00
875 ; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[CONVC]]
876 ; THRESHOLD-NEXT: ret float [[OP_RDX2]]
879 %mul = mul nsw i32 %b, %a
880 %conv = sitofp i32 %mul to float
881 %0 = load float, ptr %x, align 4
882 %convc = sitofp i32 %c to float
883 %addc = fadd fast float %convc, 3.000000e+00
884 %add = fadd fast float %conv, %addc
885 %add1 = fadd fast float %0, %add
886 %arrayidx3 = getelementptr inbounds float, ptr %x, i64 1
887 %1 = load float, ptr %arrayidx3, align 4
888 %add4 = fadd fast float %1, %add1
889 %arrayidx3.1 = getelementptr inbounds float, ptr %x, i64 2
890 %2 = load float, ptr %arrayidx3.1, align 4
891 %add4.1 = fadd fast float %2, %add4
892 %arrayidx3.2 = getelementptr inbounds float, ptr %x, i64 3
893 %3 = load float, ptr %arrayidx3.2, align 4
894 %add4.2 = fadd fast float %3, %add4.1
895 %arrayidx3.3 = getelementptr inbounds float, ptr %x, i64 4
896 %4 = load float, ptr %arrayidx3.3, align 4
897 %add4.3 = fadd fast float %4, %add4.2
898 %add5 = fadd fast float %add4.3, %conv
899 %arrayidx3.4 = getelementptr inbounds float, ptr %x, i64 5
900 %5 = load float, ptr %arrayidx3.4, align 4
901 %add4.4 = fadd fast float %5, %add5
902 %arrayidx3.5 = getelementptr inbounds float, ptr %x, i64 6
903 %6 = load float, ptr %arrayidx3.5, align 4
904 %add4.5 = fadd fast float %6, %add4.4
905 %arrayidx3.6 = getelementptr inbounds float, ptr %x, i64 7
906 %7 = load float, ptr %arrayidx3.6, align 4
907 %add4.6 = fadd fast float %7, %add4.5
911 define float @extra_args_no_fast(ptr %x, float %a, float %b) {
912 ; CHECK-LABEL: @extra_args_no_fast(
913 ; CHECK-NEXT: [[ADDC:%.*]] = fadd fast float [[B:%.*]], 3.000000e+00
914 ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[A:%.*]], [[ADDC]]
915 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 1
916 ; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2
917 ; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3
918 ; CHECK-NEXT: [[T0:%.*]] = load float, ptr [[X]], align 4
919 ; CHECK-NEXT: [[T1:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
920 ; CHECK-NEXT: [[T2:%.*]] = load float, ptr [[ARRAYIDX3_1]], align 4
921 ; CHECK-NEXT: [[T3:%.*]] = load float, ptr [[ARRAYIDX3_2]], align 4
922 ; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[T0]], [[ADD]]
923 ; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float [[T1]], [[ADD1]]
924 ; CHECK-NEXT: [[ADD4_1:%.*]] = fadd float [[T2]], [[ADD4]]
925 ; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float [[T3]], [[ADD4_1]]
926 ; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_2]], [[A]]
927 ; CHECK-NEXT: ret float [[ADD5]]
929 ; THRESHOLD-LABEL: @extra_args_no_fast(
930 ; THRESHOLD-NEXT: [[ADDC:%.*]] = fadd fast float [[B:%.*]], 3.000000e+00
931 ; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[A:%.*]], [[ADDC]]
932 ; THRESHOLD-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 1
933 ; THRESHOLD-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2
934 ; THRESHOLD-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3
935 ; THRESHOLD-NEXT: [[T0:%.*]] = load float, ptr [[X]], align 4
936 ; THRESHOLD-NEXT: [[T1:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
937 ; THRESHOLD-NEXT: [[T2:%.*]] = load float, ptr [[ARRAYIDX3_1]], align 4
938 ; THRESHOLD-NEXT: [[T3:%.*]] = load float, ptr [[ARRAYIDX3_2]], align 4
939 ; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float [[T0]], [[ADD]]
940 ; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float [[T1]], [[ADD1]]
941 ; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd float [[T2]], [[ADD4]]
942 ; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float [[T3]], [[ADD4_1]]
943 ; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_2]], [[A]]
944 ; THRESHOLD-NEXT: ret float [[ADD5]]
946 %addc = fadd fast float %b, 3.0
947 %add = fadd fast float %a, %addc
948 %arrayidx3 = getelementptr inbounds float, ptr %x, i64 1
949 %arrayidx3.1 = getelementptr inbounds float, ptr %x, i64 2
950 %arrayidx3.2 = getelementptr inbounds float, ptr %x, i64 3
951 %t0 = load float, ptr %x, align 4
952 %t1 = load float, ptr %arrayidx3, align 4
953 %t2 = load float, ptr %arrayidx3.1, align 4
954 %t3 = load float, ptr %arrayidx3.2, align 4
955 %add1 = fadd fast float %t0, %add
956 %add4 = fadd fast float %t1, %add1
957 %add4.1 = fadd float %t2, %add4 ; this is not a reduction candidate
958 %add4.2 = fadd fast float %t3, %add4.1
959 %add5 = fadd fast float %add4.2, %a
963 define i32 @wobble(i32 %arg, i32 %bar) {
964 ; CHECK-LABEL: @wobble(
966 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[ARG:%.*]], i32 0
967 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
968 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[BAR:%.*]], i32 0
969 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
970 ; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i32> [[TMP1]], [[TMP3]]
971 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
972 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i32> [[TMP4]], zeroinitializer
973 ; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i1> [[TMP6]] to <4 x i32>
974 ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
975 ; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP8]], [[TMP5]]
976 ; CHECK-NEXT: [[OP_RDX1:%.*]] = add i32 [[OP_RDX]], [[ARG]]
977 ; CHECK-NEXT: ret i32 [[OP_RDX1]]
979 ; THRESHOLD-LABEL: @wobble(
980 ; THRESHOLD-NEXT: bb:
981 ; THRESHOLD-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[ARG:%.*]], i32 0
982 ; THRESHOLD-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
983 ; THRESHOLD-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[BAR:%.*]], i32 0
984 ; THRESHOLD-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
985 ; THRESHOLD-NEXT: [[TMP4:%.*]] = xor <4 x i32> [[TMP1]], [[TMP3]]
986 ; THRESHOLD-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
987 ; THRESHOLD-NEXT: [[TMP6:%.*]] = icmp eq <4 x i32> [[TMP4]], zeroinitializer
988 ; THRESHOLD-NEXT: [[TMP7:%.*]] = sext <4 x i1> [[TMP6]] to <4 x i32>
989 ; THRESHOLD-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
990 ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP8]], [[TMP5]]
991 ; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = add i32 [[OP_RDX]], [[ARG]]
992 ; THRESHOLD-NEXT: ret i32 [[OP_RDX1]]
995 %x1 = xor i32 %arg, %bar
996 %i1 = icmp eq i32 %x1, 0
997 %s1 = sext i1 %i1 to i32
998 %x2 = xor i32 %arg, %bar
999 %i2 = icmp eq i32 %x2, 0
1000 %s2 = sext i1 %i2 to i32
1001 %x3 = xor i32 %arg, %bar
1002 %i3 = icmp eq i32 %x3, 0
1003 %s3 = sext i1 %i3 to i32
1004 %x4 = xor i32 %arg, %bar
1005 %i4 = icmp eq i32 %x4, 0
1006 %s4 = sext i1 %i4 to i32
1007 %r1 = add nuw i32 %arg, %s1
1008 %r2 = add nsw i32 %r1, %s2
1009 %r3 = add nsw i32 %r2, %s3
1010 %r4 = add nsw i32 %r3, %s4
1011 %r5 = add nsw i32 %r4, %x4