1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2 -S -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 < %s | FileCheck --check-prefixes=CHECK,NON-POW2 %s
3 ; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2=false -S -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 < %s | FileCheck --check-prefixes=CHECK,POW2-ONLY %s
5 define void @add0(ptr noalias %dst, ptr noalias %src) {
8 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4
9 ; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[TMP0]], <i32 1, i32 1, i32 2, i32 3>
10 ; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
11 ; CHECK-NEXT: ret void
14 %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
15 %0 = load i32, ptr %src, align 4
16 %add = add nsw i32 %0, 1
17 %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
18 store i32 %add, ptr %dst, align 4
19 %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
20 %1 = load i32, ptr %incdec.ptr, align 4
21 %add3 = add nsw i32 %1, 1
22 %incdec.ptr4 = getelementptr inbounds i32, ptr %dst, i64 2
23 store i32 %add3, ptr %incdec.ptr1, align 4
24 %incdec.ptr5 = getelementptr inbounds i32, ptr %src, i64 3
25 %2 = load i32, ptr %incdec.ptr2, align 4
26 %add6 = add nsw i32 %2, 2
27 %incdec.ptr7 = getelementptr inbounds i32, ptr %dst, i64 3
28 store i32 %add6, ptr %incdec.ptr4, align 4
29 %3 = load i32, ptr %incdec.ptr5, align 4
30 %add9 = add nsw i32 %3, 3
31 store i32 %add9, ptr %incdec.ptr7, align 4
35 define void @add1(ptr noalias %dst, ptr noalias %src) {
36 ; NON-POW2-LABEL: @add1(
37 ; NON-POW2-NEXT: entry:
38 ; NON-POW2-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
39 ; NON-POW2-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
40 ; NON-POW2-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
41 ; NON-POW2-NEXT: store i32 [[TMP0]], ptr [[DST]], align 4
42 ; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[INCDEC_PTR]], align 4
43 ; NON-POW2-NEXT: [[TMP2:%.*]] = add nsw <3 x i32> [[TMP1]], <i32 1, i32 2, i32 3>
44 ; NON-POW2-NEXT: store <3 x i32> [[TMP2]], ptr [[INCDEC_PTR1]], align 4
45 ; NON-POW2-NEXT: ret void
47 ; POW2-ONLY-LABEL: @add1(
48 ; POW2-ONLY-NEXT: entry:
49 ; POW2-ONLY-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
50 ; POW2-ONLY-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
51 ; POW2-ONLY-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
52 ; POW2-ONLY-NEXT: store i32 [[TMP0]], ptr [[DST]], align 4
53 ; POW2-ONLY-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
54 ; POW2-ONLY-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
55 ; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR]], align 4
56 ; POW2-ONLY-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[TMP1]], <i32 1, i32 2>
57 ; POW2-ONLY-NEXT: store <2 x i32> [[TMP2]], ptr [[INCDEC_PTR1]], align 4
58 ; POW2-ONLY-NEXT: [[TMP3:%.*]] = load i32, ptr [[INCDEC_PTR5]], align 4
59 ; POW2-ONLY-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP3]], 3
60 ; POW2-ONLY-NEXT: store i32 [[ADD9]], ptr [[INCDEC_PTR7]], align 4
61 ; POW2-ONLY-NEXT: ret void
64 %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
65 %0 = load i32, ptr %src, align 4
66 %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
67 store i32 %0, ptr %dst, align 4
68 %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
69 %1 = load i32, ptr %incdec.ptr, align 4
70 %add3 = add nsw i32 %1, 1
71 %incdec.ptr4 = getelementptr inbounds i32, ptr %dst, i64 2
72 store i32 %add3, ptr %incdec.ptr1, align 4
73 %incdec.ptr5 = getelementptr inbounds i32, ptr %src, i64 3
74 %2 = load i32, ptr %incdec.ptr2, align 4
75 %add6 = add nsw i32 %2, 2
76 %incdec.ptr7 = getelementptr inbounds i32, ptr %dst, i64 3
77 store i32 %add6, ptr %incdec.ptr4, align 4
78 %3 = load i32, ptr %incdec.ptr5, align 4
79 %add9 = add nsw i32 %3, 3
80 store i32 %add9, ptr %incdec.ptr7, align 4
84 define void @sub0(ptr noalias %dst, ptr noalias %src) {
87 ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
88 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
89 ; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[TMP0]], -1
90 ; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
91 ; CHECK-NEXT: store i32 [[SUB]], ptr [[DST]], align 4
92 ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 2
93 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[INCDEC_PTR]], align 4
94 ; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 2
95 ; CHECK-NEXT: store i32 [[TMP1]], ptr [[INCDEC_PTR1]], align 4
96 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR2]], align 4
97 ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 -2, i32 -3>
98 ; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr [[INCDEC_PTR3]], align 4
99 ; CHECK-NEXT: ret void
102 %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
103 %0 = load i32, ptr %src, align 4
104 %sub = add nsw i32 %0, -1
105 %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
106 store i32 %sub, ptr %dst, align 4
107 %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
108 %1 = load i32, ptr %incdec.ptr, align 4
109 %incdec.ptr3 = getelementptr inbounds i32, ptr %dst, i64 2
110 store i32 %1, ptr %incdec.ptr1, align 4
111 %incdec.ptr4 = getelementptr inbounds i32, ptr %src, i64 3
112 %2 = load i32, ptr %incdec.ptr2, align 4
113 %sub5 = add nsw i32 %2, -2
114 %incdec.ptr6 = getelementptr inbounds i32, ptr %dst, i64 3
115 store i32 %sub5, ptr %incdec.ptr3, align 4
116 %3 = load i32, ptr %incdec.ptr4, align 4
117 %sub8 = add nsw i32 %3, -3
118 store i32 %sub8, ptr %incdec.ptr6, align 4
122 define void @sub1(ptr noalias %dst, ptr noalias %src) {
123 ; CHECK-LABEL: @sub1(
125 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4
126 ; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[TMP0]], <i32 4, i32 -1, i32 -2, i32 -3>
127 ; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
128 ; CHECK-NEXT: ret void
131 %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
132 %0 = load i32, ptr %src, align 4
133 %add = add nsw i32 %0, 4
134 %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
135 store i32 %add, ptr %dst, align 4
136 %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
137 %1 = load i32, ptr %incdec.ptr, align 4
138 %sub = add nsw i32 %1, -1
139 %incdec.ptr3 = getelementptr inbounds i32, ptr %dst, i64 2
140 store i32 %sub, ptr %incdec.ptr1, align 4
141 %incdec.ptr4 = getelementptr inbounds i32, ptr %src, i64 3
142 %2 = load i32, ptr %incdec.ptr2, align 4
143 %sub5 = add nsw i32 %2, -2
144 %incdec.ptr6 = getelementptr inbounds i32, ptr %dst, i64 3
145 store i32 %sub5, ptr %incdec.ptr3, align 4
146 %3 = load i32, ptr %incdec.ptr4, align 4
147 %sub8 = add nsw i32 %3, -3
148 store i32 %sub8, ptr %incdec.ptr6, align 4
152 define void @sub2(ptr noalias %dst, ptr noalias %src) {
153 ; CHECK-LABEL: @sub2(
155 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4
156 ; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[TMP0]], <i32 -1, i32 -1, i32 -2, i32 -3>
157 ; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
158 ; CHECK-NEXT: ret void
161 %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
162 %0 = load i32, ptr %src, align 4
163 %sub = add nsw i32 %0, -1
164 %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
165 store i32 %sub, ptr %dst, align 4
166 %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
167 %1 = load i32, ptr %incdec.ptr, align 4
168 %sub3 = add nsw i32 %1, -1
169 %incdec.ptr4 = getelementptr inbounds i32, ptr %dst, i64 2
170 store i32 %sub3, ptr %incdec.ptr1, align 4
171 %incdec.ptr5 = getelementptr inbounds i32, ptr %src, i64 3
172 %2 = load i32, ptr %incdec.ptr2, align 4
173 %sub6 = add nsw i32 %2, -2
174 %incdec.ptr7 = getelementptr inbounds i32, ptr %dst, i64 3
175 store i32 %sub6, ptr %incdec.ptr4, align 4
176 %3 = load i32, ptr %incdec.ptr5, align 4
177 %sub9 = add nsw i32 %3, -3
178 store i32 %sub9, ptr %incdec.ptr7, align 4
182 define void @addsub0(ptr noalias %dst, ptr noalias %src) {
183 ; CHECK-LABEL: @addsub0(
185 ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
186 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
187 ; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[TMP0]], -1
188 ; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
189 ; CHECK-NEXT: store i32 [[SUB]], ptr [[DST]], align 4
190 ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 2
191 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[INCDEC_PTR]], align 4
192 ; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 2
193 ; CHECK-NEXT: store i32 [[TMP1]], ptr [[INCDEC_PTR1]], align 4
194 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR2]], align 4
195 ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 -2, i32 -3>
196 ; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <2 x i32> [[TMP2]], <i32 -2, i32 -3>
197 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
198 ; CHECK-NEXT: store <2 x i32> [[TMP5]], ptr [[INCDEC_PTR3]], align 4
199 ; CHECK-NEXT: ret void
202 %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
203 %0 = load i32, ptr %src, align 4
204 %sub = add nsw i32 %0, -1
205 %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
206 store i32 %sub, ptr %dst, align 4
207 %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
208 %1 = load i32, ptr %incdec.ptr, align 4
209 %incdec.ptr3 = getelementptr inbounds i32, ptr %dst, i64 2
210 store i32 %1, ptr %incdec.ptr1, align 4
211 %incdec.ptr4 = getelementptr inbounds i32, ptr %src, i64 3
212 %2 = load i32, ptr %incdec.ptr2, align 4
213 %sub5 = add nsw i32 %2, -2
214 %incdec.ptr6 = getelementptr inbounds i32, ptr %dst, i64 3
215 store i32 %sub5, ptr %incdec.ptr3, align 4
216 %3 = load i32, ptr %incdec.ptr4, align 4
217 %sub8 = sub nsw i32 %3, -3
218 store i32 %sub8, ptr %incdec.ptr6, align 4
222 define void @addsub1(ptr noalias %dst, ptr noalias %src) {
223 ; CHECK-LABEL: @addsub1(
225 ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 2
226 ; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 2
227 ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[SRC]], align 4
228 ; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[TMP0]], splat (i32 -1)
229 ; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <2 x i32> [[TMP0]], splat (i32 -1)
230 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 0, i32 3>
231 ; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr [[DST]], align 4
232 ; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
233 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[INCDEC_PTR2]], align 4
234 ; CHECK-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
235 ; CHECK-NEXT: store i32 [[TMP4]], ptr [[INCDEC_PTR3]], align 4
236 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[INCDEC_PTR4]], align 4
237 ; CHECK-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP5]], -3
238 ; CHECK-NEXT: store i32 [[SUB8]], ptr [[INCDEC_PTR6]], align 4
239 ; CHECK-NEXT: ret void
242 %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
243 %0 = load i32, ptr %src, align 4
244 %sub = add nsw i32 %0, -1
245 %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
246 store i32 %sub, ptr %dst, align 4
247 %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
248 %1 = load i32, ptr %incdec.ptr, align 4
249 %sub1 = sub nsw i32 %1, -1
250 %incdec.ptr3 = getelementptr inbounds i32, ptr %dst, i64 2
251 store i32 %sub1, ptr %incdec.ptr1, align 4
252 %incdec.ptr4 = getelementptr inbounds i32, ptr %src, i64 3
253 %2 = load i32, ptr %incdec.ptr2, align 4
254 %incdec.ptr6 = getelementptr inbounds i32, ptr %dst, i64 3
255 store i32 %2, ptr %incdec.ptr3, align 4
256 %3 = load i32, ptr %incdec.ptr4, align 4
257 %sub8 = sub nsw i32 %3, -3
258 store i32 %sub8, ptr %incdec.ptr6, align 4
262 define void @mul(ptr noalias %dst, ptr noalias %src) {
265 ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 2
266 ; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 2
267 ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[SRC]], align 4
268 ; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <2 x i32> [[TMP0]], <i32 257, i32 -3>
269 ; CHECK-NEXT: store <2 x i32> [[TMP1]], ptr [[DST]], align 4
270 ; CHECK-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
271 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[INCDEC_PTR2]], align 4
272 ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
273 ; CHECK-NEXT: store i32 [[TMP2]], ptr [[INCDEC_PTR4]], align 4
274 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[INCDEC_PTR5]], align 4
275 ; CHECK-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP3]], -9
276 ; CHECK-NEXT: store i32 [[MUL9]], ptr [[INCDEC_PTR7]], align 4
277 ; CHECK-NEXT: ret void
280 %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
281 %0 = load i32, ptr %src, align 4
282 %mul = mul nsw i32 %0, 257
283 %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
284 store i32 %mul, ptr %dst, align 4
285 %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
286 %1 = load i32, ptr %incdec.ptr, align 4
287 %mul3 = mul nsw i32 %1, -3
288 %incdec.ptr4 = getelementptr inbounds i32, ptr %dst, i64 2
289 store i32 %mul3, ptr %incdec.ptr1, align 4
290 %incdec.ptr5 = getelementptr inbounds i32, ptr %src, i64 3
291 %2 = load i32, ptr %incdec.ptr2, align 4
292 %incdec.ptr7 = getelementptr inbounds i32, ptr %dst, i64 3
293 store i32 %2, ptr %incdec.ptr4, align 4
294 %3 = load i32, ptr %incdec.ptr5, align 4
295 %mul9 = mul nsw i32 %3, -9
296 store i32 %mul9, ptr %incdec.ptr7, align 4
300 define void @shl0(ptr noalias %dst, ptr noalias %src) {
301 ; NON-POW2-LABEL: @shl0(
302 ; NON-POW2-NEXT: entry:
303 ; NON-POW2-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
304 ; NON-POW2-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
305 ; NON-POW2-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
306 ; NON-POW2-NEXT: store i32 [[TMP0]], ptr [[DST]], align 4
307 ; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[INCDEC_PTR]], align 4
308 ; NON-POW2-NEXT: [[TMP2:%.*]] = shl <3 x i32> [[TMP1]], <i32 1, i32 2, i32 3>
309 ; NON-POW2-NEXT: store <3 x i32> [[TMP2]], ptr [[INCDEC_PTR1]], align 4
310 ; NON-POW2-NEXT: ret void
312 ; POW2-ONLY-LABEL: @shl0(
313 ; POW2-ONLY-NEXT: entry:
314 ; POW2-ONLY-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
315 ; POW2-ONLY-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
316 ; POW2-ONLY-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
317 ; POW2-ONLY-NEXT: store i32 [[TMP0]], ptr [[DST]], align 4
318 ; POW2-ONLY-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
319 ; POW2-ONLY-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
320 ; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR]], align 4
321 ; POW2-ONLY-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 2>
322 ; POW2-ONLY-NEXT: store <2 x i32> [[TMP2]], ptr [[INCDEC_PTR1]], align 4
323 ; POW2-ONLY-NEXT: [[TMP3:%.*]] = load i32, ptr [[INCDEC_PTR4]], align 4
324 ; POW2-ONLY-NEXT: [[SHL8:%.*]] = shl i32 [[TMP3]], 3
325 ; POW2-ONLY-NEXT: store i32 [[SHL8]], ptr [[INCDEC_PTR6]], align 4
326 ; POW2-ONLY-NEXT: ret void
329 %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
330 %0 = load i32, ptr %src, align 4
331 %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
332 store i32 %0, ptr %dst, align 4
333 %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
334 %1 = load i32, ptr %incdec.ptr, align 4
336 %incdec.ptr3 = getelementptr inbounds i32, ptr %dst, i64 2
337 store i32 %shl, ptr %incdec.ptr1, align 4
338 %incdec.ptr4 = getelementptr inbounds i32, ptr %src, i64 3
339 %2 = load i32, ptr %incdec.ptr2, align 4
340 %shl5 = shl i32 %2, 2
341 %incdec.ptr6 = getelementptr inbounds i32, ptr %dst, i64 3
342 store i32 %shl5, ptr %incdec.ptr3, align 4
343 %3 = load i32, ptr %incdec.ptr4, align 4
344 %shl8 = shl i32 %3, 3
345 store i32 %shl8, ptr %incdec.ptr6, align 4
349 define void @shl1(ptr noalias %dst, ptr noalias %src) {
350 ; CHECK-LABEL: @shl1(
352 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4
353 ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[TMP0]], <i32 7, i32 1, i32 2, i32 3>
354 ; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
355 ; CHECK-NEXT: ret void
358 %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
359 %0 = load i32, ptr %src, align 4
361 %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
362 store i32 %shl, ptr %dst, align 4
363 %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
364 %1 = load i32, ptr %incdec.ptr, align 4
365 %shl3 = shl i32 %1, 1
366 %incdec.ptr4 = getelementptr inbounds i32, ptr %dst, i64 2
367 store i32 %shl3, ptr %incdec.ptr1, align 4
368 %incdec.ptr5 = getelementptr inbounds i32, ptr %src, i64 3
369 %2 = load i32, ptr %incdec.ptr2, align 4
370 %shl6 = shl i32 %2, 2
371 %incdec.ptr7 = getelementptr inbounds i32, ptr %dst, i64 3
372 store i32 %shl6, ptr %incdec.ptr4, align 4
373 %3 = load i32, ptr %incdec.ptr5, align 4
374 %shl9 = shl i32 %3, 3
375 store i32 %shl9, ptr %incdec.ptr7, align 4
379 define void @add0f(ptr noalias %dst, ptr noalias %src) {
380 ; CHECK-LABEL: @add0f(
382 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[SRC:%.*]], align 4
383 ; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <4 x float> [[TMP0]], <float 1.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
384 ; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[DST:%.*]], align 4
385 ; CHECK-NEXT: ret void
388 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
389 %0 = load float, ptr %src, align 4
390 %add = fadd fast float %0, 1.000000e+00
391 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
392 store float %add, ptr %dst, align 4
393 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
394 %1 = load float, ptr %incdec.ptr, align 4
395 %add3 = fadd fast float %1, 1.000000e+00
396 %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
397 store float %add3, ptr %incdec.ptr1, align 4
398 %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
399 %2 = load float, ptr %incdec.ptr2, align 4
400 %add6 = fadd fast float %2, 2.000000e+00
401 %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
402 store float %add6, ptr %incdec.ptr4, align 4
403 %3 = load float, ptr %incdec.ptr5, align 4
404 %add9 = fadd fast float %3, 3.000000e+00
405 store float %add9, ptr %incdec.ptr7, align 4
409 define void @add1f(ptr noalias %dst, ptr noalias %src) {
410 ; NON-POW2-LABEL: @add1f(
411 ; NON-POW2-NEXT: entry:
412 ; NON-POW2-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 1
413 ; NON-POW2-NEXT: [[TMP0:%.*]] = load float, ptr [[SRC]], align 4
414 ; NON-POW2-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 1
415 ; NON-POW2-NEXT: store float [[TMP0]], ptr [[DST]], align 4
416 ; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x float>, ptr [[INCDEC_PTR]], align 4
417 ; NON-POW2-NEXT: [[TMP2:%.*]] = fadd fast <3 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
418 ; NON-POW2-NEXT: store <3 x float> [[TMP2]], ptr [[INCDEC_PTR1]], align 4
419 ; NON-POW2-NEXT: ret void
421 ; POW2-ONLY-LABEL: @add1f(
422 ; POW2-ONLY-NEXT: entry:
423 ; POW2-ONLY-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 1
424 ; POW2-ONLY-NEXT: [[TMP0:%.*]] = load float, ptr [[SRC]], align 4
425 ; POW2-ONLY-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 1
426 ; POW2-ONLY-NEXT: store float [[TMP0]], ptr [[DST]], align 4
427 ; POW2-ONLY-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3
428 ; POW2-ONLY-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3
429 ; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[INCDEC_PTR]], align 4
430 ; POW2-ONLY-NEXT: [[TMP2:%.*]] = fadd fast <2 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00>
431 ; POW2-ONLY-NEXT: store <2 x float> [[TMP2]], ptr [[INCDEC_PTR1]], align 4
432 ; POW2-ONLY-NEXT: [[TMP3:%.*]] = load float, ptr [[INCDEC_PTR5]], align 4
433 ; POW2-ONLY-NEXT: [[ADD9:%.*]] = fadd fast float [[TMP3]], 3.000000e+00
434 ; POW2-ONLY-NEXT: store float [[ADD9]], ptr [[INCDEC_PTR7]], align 4
435 ; POW2-ONLY-NEXT: ret void
438 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
439 %0 = load float, ptr %src, align 4
440 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
441 store float %0, ptr %dst, align 4
442 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
443 %1 = load float, ptr %incdec.ptr, align 4
444 %add3 = fadd fast float %1, 1.000000e+00
445 %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
446 store float %add3, ptr %incdec.ptr1, align 4
447 %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
448 %2 = load float, ptr %incdec.ptr2, align 4
449 %add6 = fadd fast float %2, 2.000000e+00
450 %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
451 store float %add6, ptr %incdec.ptr4, align 4
452 %3 = load float, ptr %incdec.ptr5, align 4
453 %add9 = fadd fast float %3, 3.000000e+00
454 store float %add9, ptr %incdec.ptr7, align 4
458 define void @sub0f(ptr noalias %dst, ptr noalias %src) {
459 ; CHECK-LABEL: @sub0f(
461 ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 1
462 ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[SRC]], align 4
463 ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP0]], -1.000000e+00
464 ; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 1
465 ; CHECK-NEXT: store float [[ADD]], ptr [[DST]], align 4
466 ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 2
467 ; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[INCDEC_PTR]], align 4
468 ; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 2
469 ; CHECK-NEXT: store float [[TMP1]], ptr [[INCDEC_PTR1]], align 4
470 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[INCDEC_PTR2]], align 4
471 ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <2 x float> [[TMP2]], <float -2.000000e+00, float -3.000000e+00>
472 ; CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[INCDEC_PTR4]], align 4
473 ; CHECK-NEXT: ret void
476 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
477 %0 = load float, ptr %src, align 4
478 %add = fadd fast float %0, -1.000000e+00
479 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
480 store float %add, ptr %dst, align 4
481 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
482 %1 = load float, ptr %incdec.ptr, align 4
483 %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
484 store float %1, ptr %incdec.ptr1, align 4
485 %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
486 %2 = load float, ptr %incdec.ptr2, align 4
487 %add6 = fadd fast float %2, -2.000000e+00
488 %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
489 store float %add6, ptr %incdec.ptr4, align 4
490 %3 = load float, ptr %incdec.ptr5, align 4
491 %add9 = fadd fast float %3, -3.000000e+00
492 store float %add9, ptr %incdec.ptr7, align 4
496 define void @sub1f(ptr noalias %dst, ptr noalias %src) {
497 ; CHECK-LABEL: @sub1f(
499 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[SRC:%.*]], align 4
500 ; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <4 x float> [[TMP0]], <float 4.000000e+00, float -1.000000e+00, float -2.000000e+00, float -3.000000e+00>
501 ; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[DST:%.*]], align 4
502 ; CHECK-NEXT: ret void
505 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
506 %0 = load float, ptr %src, align 4
507 %add = fadd fast float %0, 4.000000e+00
508 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
509 store float %add, ptr %dst, align 4
510 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
511 %1 = load float, ptr %incdec.ptr, align 4
512 %sub = fadd fast float %1, -1.000000e+00
513 %incdec.ptr3 = getelementptr inbounds float, ptr %dst, i64 2
514 store float %sub, ptr %incdec.ptr1, align 4
515 %incdec.ptr4 = getelementptr inbounds float, ptr %src, i64 3
516 %2 = load float, ptr %incdec.ptr2, align 4
517 %sub5 = fadd fast float %2, -2.000000e+00
518 %incdec.ptr6 = getelementptr inbounds float, ptr %dst, i64 3
519 store float %sub5, ptr %incdec.ptr3, align 4
520 %3 = load float, ptr %incdec.ptr4, align 4
521 %sub8 = fadd fast float %3, -3.000000e+00
522 store float %sub8, ptr %incdec.ptr6, align 4
526 define void @sub2f(ptr noalias %dst, ptr noalias %src) {
527 ; CHECK-LABEL: @sub2f(
529 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[SRC:%.*]], align 4
530 ; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <4 x float> [[TMP0]], <float -1.000000e+00, float -1.000000e+00, float -2.000000e+00, float -3.000000e+00>
531 ; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[DST:%.*]], align 4
532 ; CHECK-NEXT: ret void
535 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
536 %0 = load float, ptr %src, align 4
537 %sub = fadd fast float %0, -1.000000e+00
538 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
539 store float %sub, ptr %dst, align 4
540 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
541 %1 = load float, ptr %incdec.ptr, align 4
542 %sub3 = fadd fast float %1, -1.000000e+00
543 %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
544 store float %sub3, ptr %incdec.ptr1, align 4
545 %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
546 %2 = load float, ptr %incdec.ptr2, align 4
547 %sub6 = fadd fast float %2, -2.000000e+00
548 %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
549 store float %sub6, ptr %incdec.ptr4, align 4
550 %3 = load float, ptr %incdec.ptr5, align 4
551 %sub9 = fadd fast float %3, -3.000000e+00
552 store float %sub9, ptr %incdec.ptr7, align 4
556 define void @addsub0f(ptr noalias %dst, ptr noalias %src) {
557 ; CHECK-LABEL: @addsub0f(
559 ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 1
560 ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[SRC]], align 4
561 ; CHECK-NEXT: [[SUB:%.*]] = fadd fast float [[TMP0]], -1.000000e+00
562 ; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 1
563 ; CHECK-NEXT: store float [[SUB]], ptr [[DST]], align 4
564 ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 2
565 ; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[INCDEC_PTR]], align 4
566 ; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 2
567 ; CHECK-NEXT: store float [[TMP1]], ptr [[INCDEC_PTR1]], align 4
568 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[INCDEC_PTR2]], align 4
569 ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <2 x float> [[TMP2]], <float -2.000000e+00, float -3.000000e+00>
570 ; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x float> [[TMP2]], <float -2.000000e+00, float -3.000000e+00>
571 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP4]], <2 x i32> <i32 0, i32 3>
572 ; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[INCDEC_PTR3]], align 4
573 ; CHECK-NEXT: ret void
576 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
577 %0 = load float, ptr %src, align 4
578 %sub = fadd fast float %0, -1.000000e+00
579 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
580 store float %sub, ptr %dst, align 4
581 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
582 %1 = load float, ptr %incdec.ptr, align 4
583 %incdec.ptr3 = getelementptr inbounds float, ptr %dst, i64 2
584 store float %1, ptr %incdec.ptr1, align 4
585 %incdec.ptr4 = getelementptr inbounds float, ptr %src, i64 3
586 %2 = load float, ptr %incdec.ptr2, align 4
587 %sub5 = fadd fast float %2, -2.000000e+00
588 %incdec.ptr6 = getelementptr inbounds float, ptr %dst, i64 3
589 store float %sub5, ptr %incdec.ptr3, align 4
590 %3 = load float, ptr %incdec.ptr4, align 4
591 %sub8 = fsub fast float %3, -3.000000e+00
592 store float %sub8, ptr %incdec.ptr6, align 4
596 define void @addsub1f(ptr noalias %dst, ptr noalias %src) {
597 ; CHECK-LABEL: @addsub1f(
599 ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 2
600 ; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 2
601 ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[SRC]], align 4
602 ; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <2 x float> [[TMP0]], splat (float -1.000000e+00)
603 ; CHECK-NEXT: [[TMP2:%.*]] = fsub fast <2 x float> [[TMP0]], splat (float -1.000000e+00)
604 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3>
605 ; CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[DST]], align 4
606 ; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3
607 ; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[INCDEC_PTR2]], align 4
608 ; CHECK-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3
609 ; CHECK-NEXT: store float [[TMP4]], ptr [[INCDEC_PTR3]], align 4
610 ; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[INCDEC_PTR4]], align 4
611 ; CHECK-NEXT: [[SUB8:%.*]] = fsub fast float [[TMP5]], -3.000000e+00
612 ; CHECK-NEXT: store float [[SUB8]], ptr [[INCDEC_PTR6]], align 4
613 ; CHECK-NEXT: ret void
616 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
617 %0 = load float, ptr %src, align 4
618 %sub = fadd fast float %0, -1.000000e+00
619 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
620 store float %sub, ptr %dst, align 4
621 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
622 %1 = load float, ptr %incdec.ptr, align 4
623 %sub1 = fsub fast float %1, -1.000000e+00
624 %incdec.ptr3 = getelementptr inbounds float, ptr %dst, i64 2
625 store float %sub1, ptr %incdec.ptr1, align 4
626 %incdec.ptr4 = getelementptr inbounds float, ptr %src, i64 3
627 %2 = load float, ptr %incdec.ptr2, align 4
628 %incdec.ptr6 = getelementptr inbounds float, ptr %dst, i64 3
629 store float %2, ptr %incdec.ptr3, align 4
630 %3 = load float, ptr %incdec.ptr4, align 4
631 %sub8 = fsub fast float %3, -3.000000e+00
632 store float %sub8, ptr %incdec.ptr6, align 4
636 define void @mulf(ptr noalias %dst, ptr noalias %src) {
637 ; CHECK-LABEL: @mulf(
639 ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 2
640 ; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 2
641 ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[SRC]], align 4
642 ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <2 x float> [[TMP0]], <float 2.570000e+02, float -3.000000e+00>
643 ; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[DST]], align 4
644 ; CHECK-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3
645 ; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[INCDEC_PTR2]], align 4
646 ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3
647 ; CHECK-NEXT: store float [[TMP2]], ptr [[INCDEC_PTR4]], align 4
648 ; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[INCDEC_PTR5]], align 4
649 ; CHECK-NEXT: [[SUB9:%.*]] = fmul fast float [[TMP3]], -9.000000e+00
650 ; CHECK-NEXT: store float [[SUB9]], ptr [[INCDEC_PTR7]], align 4
651 ; CHECK-NEXT: ret void
654 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
655 %0 = load float, ptr %src, align 4
656 %sub = fmul fast float %0, 2.570000e+02
657 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
658 store float %sub, ptr %dst, align 4
659 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
660 %1 = load float, ptr %incdec.ptr, align 4
661 %sub3 = fmul fast float %1, -3.000000e+00
662 %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
663 store float %sub3, ptr %incdec.ptr1, align 4
664 %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
665 %2 = load float, ptr %incdec.ptr2, align 4
666 %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
667 store float %2, ptr %incdec.ptr4, align 4
668 %3 = load float, ptr %incdec.ptr5, align 4
669 %sub9 = fmul fast float %3, -9.000000e+00
670 store float %sub9, ptr %incdec.ptr7, align 4
674 define void @add0fn(ptr noalias %dst, ptr noalias %src) {
675 ; CHECK-LABEL: @add0fn(
677 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[SRC:%.*]], align 4
678 ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[TMP0]], <float 1.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
679 ; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[DST:%.*]], align 4
680 ; CHECK-NEXT: ret void
683 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
684 %0 = load float, ptr %src, align 4
685 %add = fadd float %0, 1.000000e+00
686 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
687 store float %add, ptr %dst, align 4
688 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
689 %1 = load float, ptr %incdec.ptr, align 4
690 %add3 = fadd float %1, 1.000000e+00
691 %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
692 store float %add3, ptr %incdec.ptr1, align 4
693 %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
694 %2 = load float, ptr %incdec.ptr2, align 4
695 %add6 = fadd float %2, 2.000000e+00
696 %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
697 store float %add6, ptr %incdec.ptr4, align 4
698 %3 = load float, ptr %incdec.ptr5, align 4
699 %add9 = fadd float %3, 3.000000e+00
700 store float %add9, ptr %incdec.ptr7, align 4
704 define void @add1fn(ptr noalias %dst, ptr noalias %src) {
705 ; NON-POW2-LABEL: @add1fn(
706 ; NON-POW2-NEXT: entry:
707 ; NON-POW2-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 1
708 ; NON-POW2-NEXT: [[TMP0:%.*]] = load float, ptr [[SRC]], align 4
709 ; NON-POW2-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 1
710 ; NON-POW2-NEXT: store float [[TMP0]], ptr [[DST]], align 4
711 ; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x float>, ptr [[INCDEC_PTR]], align 4
712 ; NON-POW2-NEXT: [[TMP2:%.*]] = fadd <3 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
713 ; NON-POW2-NEXT: store <3 x float> [[TMP2]], ptr [[INCDEC_PTR1]], align 4
714 ; NON-POW2-NEXT: ret void
716 ; POW2-ONLY-LABEL: @add1fn(
717 ; POW2-ONLY-NEXT: entry:
718 ; POW2-ONLY-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 1
719 ; POW2-ONLY-NEXT: [[TMP0:%.*]] = load float, ptr [[SRC]], align 4
720 ; POW2-ONLY-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 1
721 ; POW2-ONLY-NEXT: store float [[TMP0]], ptr [[DST]], align 4
722 ; POW2-ONLY-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3
723 ; POW2-ONLY-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3
724 ; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[INCDEC_PTR]], align 4
725 ; POW2-ONLY-NEXT: [[TMP2:%.*]] = fadd <2 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00>
726 ; POW2-ONLY-NEXT: store <2 x float> [[TMP2]], ptr [[INCDEC_PTR1]], align 4
727 ; POW2-ONLY-NEXT: [[TMP3:%.*]] = load float, ptr [[INCDEC_PTR5]], align 4
728 ; POW2-ONLY-NEXT: [[ADD9:%.*]] = fadd float [[TMP3]], 3.000000e+00
729 ; POW2-ONLY-NEXT: store float [[ADD9]], ptr [[INCDEC_PTR7]], align 4
730 ; POW2-ONLY-NEXT: ret void
733 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
734 %0 = load float, ptr %src, align 4
735 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
736 store float %0, ptr %dst, align 4
737 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
738 %1 = load float, ptr %incdec.ptr, align 4
739 %add3 = fadd float %1, 1.000000e+00
740 %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
741 store float %add3, ptr %incdec.ptr1, align 4
742 %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
743 %2 = load float, ptr %incdec.ptr2, align 4
744 %add6 = fadd float %2, 2.000000e+00
745 %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
746 store float %add6, ptr %incdec.ptr4, align 4
747 %3 = load float, ptr %incdec.ptr5, align 4
748 %add9 = fadd float %3, 3.000000e+00
749 store float %add9, ptr %incdec.ptr7, align 4
753 define void @sub0fn(ptr noalias %dst, ptr noalias %src) {
754 ; CHECK-LABEL: @sub0fn(
756 ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 1
757 ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[SRC]], align 4
758 ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP0]], -1.000000e+00
759 ; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 1
760 ; CHECK-NEXT: store float [[ADD]], ptr [[DST]], align 4
761 ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 2
762 ; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[INCDEC_PTR]], align 4
763 ; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 2
764 ; CHECK-NEXT: store float [[TMP1]], ptr [[INCDEC_PTR1]], align 4
765 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[INCDEC_PTR2]], align 4
766 ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP2]], <float -2.000000e+00, float -3.000000e+00>
767 ; CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[INCDEC_PTR4]], align 4
768 ; CHECK-NEXT: ret void
771 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
772 %0 = load float, ptr %src, align 4
773 %add = fadd fast float %0, -1.000000e+00
774 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
775 store float %add, ptr %dst, align 4
776 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
777 %1 = load float, ptr %incdec.ptr, align 4
778 %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
779 store float %1, ptr %incdec.ptr1, align 4
780 %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
781 %2 = load float, ptr %incdec.ptr2, align 4
782 %add6 = fadd float %2, -2.000000e+00
783 %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
784 store float %add6, ptr %incdec.ptr4, align 4
785 %3 = load float, ptr %incdec.ptr5, align 4
786 %add9 = fadd float %3, -3.000000e+00
787 store float %add9, ptr %incdec.ptr7, align 4
791 define void @sub1fn(ptr noalias %dst, ptr noalias %src) {
792 ; CHECK-LABEL: @sub1fn(
794 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[SRC:%.*]], align 4
795 ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[TMP0]], <float 4.000000e+00, float -1.000000e+00, float -2.000000e+00, float -3.000000e+00>
796 ; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[DST:%.*]], align 4
797 ; CHECK-NEXT: ret void
800 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
801 %0 = load float, ptr %src, align 4
802 %add = fadd float %0, 4.000000e+00
803 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
804 store float %add, ptr %dst, align 4
805 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
806 %1 = load float, ptr %incdec.ptr, align 4
807 %sub = fadd float %1, -1.000000e+00
808 %incdec.ptr3 = getelementptr inbounds float, ptr %dst, i64 2
809 store float %sub, ptr %incdec.ptr1, align 4
810 %incdec.ptr4 = getelementptr inbounds float, ptr %src, i64 3
811 %2 = load float, ptr %incdec.ptr2, align 4
812 %sub5 = fadd float %2, -2.000000e+00
813 %incdec.ptr6 = getelementptr inbounds float, ptr %dst, i64 3
814 store float %sub5, ptr %incdec.ptr3, align 4
815 %3 = load float, ptr %incdec.ptr4, align 4
816 %sub8 = fadd float %3, -3.000000e+00
817 store float %sub8, ptr %incdec.ptr6, align 4
821 define void @sub2fn(ptr noalias %dst, ptr noalias %src) {
822 ; CHECK-LABEL: @sub2fn(
824 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[SRC:%.*]], align 4
825 ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[TMP0]], <float -1.000000e+00, float -1.000000e+00, float -2.000000e+00, float -3.000000e+00>
826 ; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[DST:%.*]], align 4
827 ; CHECK-NEXT: ret void
830 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
831 %0 = load float, ptr %src, align 4
832 %sub = fadd float %0, -1.000000e+00
833 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
834 store float %sub, ptr %dst, align 4
835 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
836 %1 = load float, ptr %incdec.ptr, align 4
837 %sub3 = fadd float %1, -1.000000e+00
838 %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
839 store float %sub3, ptr %incdec.ptr1, align 4
840 %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
841 %2 = load float, ptr %incdec.ptr2, align 4
842 %sub6 = fadd float %2, -2.000000e+00
843 %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
844 store float %sub6, ptr %incdec.ptr4, align 4
845 %3 = load float, ptr %incdec.ptr5, align 4
846 %sub9 = fadd float %3, -3.000000e+00
847 store float %sub9, ptr %incdec.ptr7, align 4
851 define void @mulfn(ptr noalias %dst, ptr noalias %src) {
852 ; CHECK-LABEL: @mulfn(
854 ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 2
855 ; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 2
856 ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[SRC]], align 4
857 ; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x float> [[TMP0]], <float 2.570000e+02, float -3.000000e+00>
858 ; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[DST]], align 4
859 ; CHECK-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3
860 ; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[INCDEC_PTR2]], align 4
861 ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3
862 ; CHECK-NEXT: store float [[TMP2]], ptr [[INCDEC_PTR4]], align 4
863 ; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[INCDEC_PTR5]], align 4
864 ; CHECK-NEXT: [[SUB9:%.*]] = fmul fast float [[TMP3]], -9.000000e+00
865 ; CHECK-NEXT: store float [[SUB9]], ptr [[INCDEC_PTR7]], align 4
866 ; CHECK-NEXT: ret void
869 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
870 %0 = load float, ptr %src, align 4
871 %sub = fmul float %0, 2.570000e+02
872 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
873 store float %sub, ptr %dst, align 4
874 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
875 %1 = load float, ptr %incdec.ptr, align 4
876 %sub3 = fmul float %1, -3.000000e+00
877 %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
878 store float %sub3, ptr %incdec.ptr1, align 4
879 %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
880 %2 = load float, ptr %incdec.ptr2, align 4
881 %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
882 store float %2, ptr %incdec.ptr4, align 4
883 %3 = load float, ptr %incdec.ptr5, align 4
884 %sub9 = fmul fast float %3, -9.000000e+00
885 store float %sub9, ptr %incdec.ptr7, align 4