1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 < %s | FileCheck %s
4 define void @add0(ptr noalias %dst, ptr noalias %src) {
7 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4
8 ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], <i32 1, i32 1, i32 2, i32 3>
9 ; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[DST:%.*]], align 4
10 ; CHECK-NEXT: ret void
13 %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
14 %0 = load i32, ptr %src, align 4
15 %add = add nsw i32 %0, 1
16 %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
17 store i32 %add, ptr %dst, align 4
18 %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
19 %1 = load i32, ptr %incdec.ptr, align 4
20 %add3 = add nsw i32 %1, 1
21 %incdec.ptr4 = getelementptr inbounds i32, ptr %dst, i64 2
22 store i32 %add3, ptr %incdec.ptr1, align 4
23 %incdec.ptr5 = getelementptr inbounds i32, ptr %src, i64 3
24 %2 = load i32, ptr %incdec.ptr2, align 4
25 %add6 = add nsw i32 %2, 2
26 %incdec.ptr7 = getelementptr inbounds i32, ptr %dst, i64 3
27 store i32 %add6, ptr %incdec.ptr4, align 4
28 %3 = load i32, ptr %incdec.ptr5, align 4
29 %add9 = add nsw i32 %3, 3
30 store i32 %add9, ptr %incdec.ptr7, align 4
34 define void @add1(ptr noalias %dst, ptr noalias %src) {
37 ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
38 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
39 ; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
40 ; CHECK-NEXT: store i32 [[TMP0]], ptr [[DST]], align 4
41 ; CHECK-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
42 ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
43 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR]], align 4
44 ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 1, i32 2>
45 ; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr [[INCDEC_PTR1]], align 4
46 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[INCDEC_PTR5]], align 4
47 ; CHECK-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP5]], 3
48 ; CHECK-NEXT: store i32 [[ADD9]], ptr [[INCDEC_PTR7]], align 4
49 ; CHECK-NEXT: ret void
52 %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
53 %0 = load i32, ptr %src, align 4
54 %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
55 store i32 %0, ptr %dst, align 4
56 %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
57 %1 = load i32, ptr %incdec.ptr, align 4
58 %add3 = add nsw i32 %1, 1
59 %incdec.ptr4 = getelementptr inbounds i32, ptr %dst, i64 2
60 store i32 %add3, ptr %incdec.ptr1, align 4
61 %incdec.ptr5 = getelementptr inbounds i32, ptr %src, i64 3
62 %2 = load i32, ptr %incdec.ptr2, align 4
63 %add6 = add nsw i32 %2, 2
64 %incdec.ptr7 = getelementptr inbounds i32, ptr %dst, i64 3
65 store i32 %add6, ptr %incdec.ptr4, align 4
66 %3 = load i32, ptr %incdec.ptr5, align 4
67 %add9 = add nsw i32 %3, 3
68 store i32 %add9, ptr %incdec.ptr7, align 4
72 define void @sub0(ptr noalias %dst, ptr noalias %src) {
75 ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
76 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
77 ; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[TMP0]], -1
78 ; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
79 ; CHECK-NEXT: store i32 [[SUB]], ptr [[DST]], align 4
80 ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 2
81 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[INCDEC_PTR]], align 4
82 ; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 2
83 ; CHECK-NEXT: store i32 [[TMP1]], ptr [[INCDEC_PTR1]], align 4
84 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR2]], align 4
85 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i32> [[TMP3]], <i32 -2, i32 -3>
86 ; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr [[INCDEC_PTR3]], align 4
87 ; CHECK-NEXT: ret void
90 %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
91 %0 = load i32, ptr %src, align 4
92 %sub = add nsw i32 %0, -1
93 %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
94 store i32 %sub, ptr %dst, align 4
95 %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
96 %1 = load i32, ptr %incdec.ptr, align 4
97 %incdec.ptr3 = getelementptr inbounds i32, ptr %dst, i64 2
98 store i32 %1, ptr %incdec.ptr1, align 4
99 %incdec.ptr4 = getelementptr inbounds i32, ptr %src, i64 3
100 %2 = load i32, ptr %incdec.ptr2, align 4
101 %sub5 = add nsw i32 %2, -2
102 %incdec.ptr6 = getelementptr inbounds i32, ptr %dst, i64 3
103 store i32 %sub5, ptr %incdec.ptr3, align 4
104 %3 = load i32, ptr %incdec.ptr4, align 4
105 %sub8 = add nsw i32 %3, -3
106 store i32 %sub8, ptr %incdec.ptr6, align 4
110 define void @sub1(ptr noalias %dst, ptr noalias %src) {
111 ; CHECK-LABEL: @sub1(
113 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4
114 ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], <i32 4, i32 -1, i32 -2, i32 -3>
115 ; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[DST:%.*]], align 4
116 ; CHECK-NEXT: ret void
119 %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
120 %0 = load i32, ptr %src, align 4
121 %add = add nsw i32 %0, 4
122 %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
123 store i32 %add, ptr %dst, align 4
124 %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
125 %1 = load i32, ptr %incdec.ptr, align 4
126 %sub = add nsw i32 %1, -1
127 %incdec.ptr3 = getelementptr inbounds i32, ptr %dst, i64 2
128 store i32 %sub, ptr %incdec.ptr1, align 4
129 %incdec.ptr4 = getelementptr inbounds i32, ptr %src, i64 3
130 %2 = load i32, ptr %incdec.ptr2, align 4
131 %sub5 = add nsw i32 %2, -2
132 %incdec.ptr6 = getelementptr inbounds i32, ptr %dst, i64 3
133 store i32 %sub5, ptr %incdec.ptr3, align 4
134 %3 = load i32, ptr %incdec.ptr4, align 4
135 %sub8 = add nsw i32 %3, -3
136 store i32 %sub8, ptr %incdec.ptr6, align 4
140 define void @sub2(ptr noalias %dst, ptr noalias %src) {
141 ; CHECK-LABEL: @sub2(
143 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4
144 ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], <i32 -1, i32 -1, i32 -2, i32 -3>
145 ; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[DST:%.*]], align 4
146 ; CHECK-NEXT: ret void
149 %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
150 %0 = load i32, ptr %src, align 4
151 %sub = add nsw i32 %0, -1
152 %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
153 store i32 %sub, ptr %dst, align 4
154 %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
155 %1 = load i32, ptr %incdec.ptr, align 4
156 %sub3 = add nsw i32 %1, -1
157 %incdec.ptr4 = getelementptr inbounds i32, ptr %dst, i64 2
158 store i32 %sub3, ptr %incdec.ptr1, align 4
159 %incdec.ptr5 = getelementptr inbounds i32, ptr %src, i64 3
160 %2 = load i32, ptr %incdec.ptr2, align 4
161 %sub6 = add nsw i32 %2, -2
162 %incdec.ptr7 = getelementptr inbounds i32, ptr %dst, i64 3
163 store i32 %sub6, ptr %incdec.ptr4, align 4
164 %3 = load i32, ptr %incdec.ptr5, align 4
165 %sub9 = add nsw i32 %3, -3
166 store i32 %sub9, ptr %incdec.ptr7, align 4
170 define void @addsub0(ptr noalias %dst, ptr noalias %src) {
171 ; CHECK-LABEL: @addsub0(
173 ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
174 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
175 ; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[TMP0]], -1
176 ; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
177 ; CHECK-NEXT: store i32 [[SUB]], ptr [[DST]], align 4
178 ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 2
179 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[INCDEC_PTR]], align 4
180 ; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 2
181 ; CHECK-NEXT: store i32 [[TMP1]], ptr [[INCDEC_PTR1]], align 4
182 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR2]], align 4
183 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i32> [[TMP3]], <i32 -2, i32 -3>
184 ; CHECK-NEXT: [[TMP5:%.*]] = sub nsw <2 x i32> [[TMP3]], <i32 -2, i32 -3>
185 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 3>
186 ; CHECK-NEXT: store <2 x i32> [[TMP6]], ptr [[INCDEC_PTR3]], align 4
187 ; CHECK-NEXT: ret void
190 %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
191 %0 = load i32, ptr %src, align 4
192 %sub = add nsw i32 %0, -1
193 %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
194 store i32 %sub, ptr %dst, align 4
195 %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
196 %1 = load i32, ptr %incdec.ptr, align 4
197 %incdec.ptr3 = getelementptr inbounds i32, ptr %dst, i64 2
198 store i32 %1, ptr %incdec.ptr1, align 4
199 %incdec.ptr4 = getelementptr inbounds i32, ptr %src, i64 3
200 %2 = load i32, ptr %incdec.ptr2, align 4
201 %sub5 = add nsw i32 %2, -2
202 %incdec.ptr6 = getelementptr inbounds i32, ptr %dst, i64 3
203 store i32 %sub5, ptr %incdec.ptr3, align 4
204 %3 = load i32, ptr %incdec.ptr4, align 4
205 %sub8 = sub nsw i32 %3, -3
206 store i32 %sub8, ptr %incdec.ptr6, align 4
210 define void @addsub1(ptr noalias %dst, ptr noalias %src) {
211 ; CHECK-LABEL: @addsub1(
213 ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 2
214 ; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 2
215 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[SRC]], align 4
216 ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[TMP1]], <i32 -1, i32 -1>
217 ; CHECK-NEXT: [[TMP3:%.*]] = sub nsw <2 x i32> [[TMP1]], <i32 -1, i32 -1>
218 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 3>
219 ; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr [[DST]], align 4
220 ; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
221 ; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[INCDEC_PTR2]], align 4
222 ; CHECK-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
223 ; CHECK-NEXT: store i32 [[TMP6]], ptr [[INCDEC_PTR3]], align 4
224 ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[INCDEC_PTR4]], align 4
225 ; CHECK-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP7]], -3
226 ; CHECK-NEXT: store i32 [[SUB8]], ptr [[INCDEC_PTR6]], align 4
227 ; CHECK-NEXT: ret void
230 %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
231 %0 = load i32, ptr %src, align 4
232 %sub = add nsw i32 %0, -1
233 %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
234 store i32 %sub, ptr %dst, align 4
235 %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
236 %1 = load i32, ptr %incdec.ptr, align 4
237 %sub1 = sub nsw i32 %1, -1
238 %incdec.ptr3 = getelementptr inbounds i32, ptr %dst, i64 2
239 store i32 %sub1, ptr %incdec.ptr1, align 4
240 %incdec.ptr4 = getelementptr inbounds i32, ptr %src, i64 3
241 %2 = load i32, ptr %incdec.ptr2, align 4
242 %incdec.ptr6 = getelementptr inbounds i32, ptr %dst, i64 3
243 store i32 %2, ptr %incdec.ptr3, align 4
244 %3 = load i32, ptr %incdec.ptr4, align 4
245 %sub8 = sub nsw i32 %3, -3
246 store i32 %sub8, ptr %incdec.ptr6, align 4
250 define void @mul(ptr noalias %dst, ptr noalias %src) {
253 ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 2
254 ; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 2
255 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[SRC]], align 4
256 ; CHECK-NEXT: [[TMP2:%.*]] = mul nsw <2 x i32> [[TMP1]], <i32 257, i32 -3>
257 ; CHECK-NEXT: store <2 x i32> [[TMP2]], ptr [[DST]], align 4
258 ; CHECK-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
259 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[INCDEC_PTR2]], align 4
260 ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
261 ; CHECK-NEXT: store i32 [[TMP4]], ptr [[INCDEC_PTR4]], align 4
262 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[INCDEC_PTR5]], align 4
263 ; CHECK-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP5]], -9
264 ; CHECK-NEXT: store i32 [[MUL9]], ptr [[INCDEC_PTR7]], align 4
265 ; CHECK-NEXT: ret void
268 %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
269 %0 = load i32, ptr %src, align 4
270 %mul = mul nsw i32 %0, 257
271 %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
272 store i32 %mul, ptr %dst, align 4
273 %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
274 %1 = load i32, ptr %incdec.ptr, align 4
275 %mul3 = mul nsw i32 %1, -3
276 %incdec.ptr4 = getelementptr inbounds i32, ptr %dst, i64 2
277 store i32 %mul3, ptr %incdec.ptr1, align 4
278 %incdec.ptr5 = getelementptr inbounds i32, ptr %src, i64 3
279 %2 = load i32, ptr %incdec.ptr2, align 4
280 %incdec.ptr7 = getelementptr inbounds i32, ptr %dst, i64 3
281 store i32 %2, ptr %incdec.ptr4, align 4
282 %3 = load i32, ptr %incdec.ptr5, align 4
283 %mul9 = mul nsw i32 %3, -9
284 store i32 %mul9, ptr %incdec.ptr7, align 4
288 define void @shl0(ptr noalias %dst, ptr noalias %src) {
289 ; CHECK-LABEL: @shl0(
291 ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
292 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
293 ; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
294 ; CHECK-NEXT: store i32 [[TMP0]], ptr [[DST]], align 4
295 ; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
296 ; CHECK-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
297 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR]], align 4
298 ; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i32> [[TMP2]], <i32 1, i32 2>
299 ; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr [[INCDEC_PTR1]], align 4
300 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[INCDEC_PTR4]], align 4
301 ; CHECK-NEXT: [[SHL8:%.*]] = shl i32 [[TMP5]], 3
302 ; CHECK-NEXT: store i32 [[SHL8]], ptr [[INCDEC_PTR6]], align 4
303 ; CHECK-NEXT: ret void
306 %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
307 %0 = load i32, ptr %src, align 4
308 %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
309 store i32 %0, ptr %dst, align 4
310 %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
311 %1 = load i32, ptr %incdec.ptr, align 4
313 %incdec.ptr3 = getelementptr inbounds i32, ptr %dst, i64 2
314 store i32 %shl, ptr %incdec.ptr1, align 4
315 %incdec.ptr4 = getelementptr inbounds i32, ptr %src, i64 3
316 %2 = load i32, ptr %incdec.ptr2, align 4
317 %shl5 = shl i32 %2, 2
318 %incdec.ptr6 = getelementptr inbounds i32, ptr %dst, i64 3
319 store i32 %shl5, ptr %incdec.ptr3, align 4
320 %3 = load i32, ptr %incdec.ptr4, align 4
321 %shl8 = shl i32 %3, 3
322 store i32 %shl8, ptr %incdec.ptr6, align 4
326 define void @shl1(ptr noalias %dst, ptr noalias %src) {
327 ; CHECK-LABEL: @shl1(
329 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4
330 ; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[TMP1]], <i32 7, i32 1, i32 2, i32 3>
331 ; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[DST:%.*]], align 4
332 ; CHECK-NEXT: ret void
335 %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
336 %0 = load i32, ptr %src, align 4
338 %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
339 store i32 %shl, ptr %dst, align 4
340 %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
341 %1 = load i32, ptr %incdec.ptr, align 4
342 %shl3 = shl i32 %1, 1
343 %incdec.ptr4 = getelementptr inbounds i32, ptr %dst, i64 2
344 store i32 %shl3, ptr %incdec.ptr1, align 4
345 %incdec.ptr5 = getelementptr inbounds i32, ptr %src, i64 3
346 %2 = load i32, ptr %incdec.ptr2, align 4
347 %shl6 = shl i32 %2, 2
348 %incdec.ptr7 = getelementptr inbounds i32, ptr %dst, i64 3
349 store i32 %shl6, ptr %incdec.ptr4, align 4
350 %3 = load i32, ptr %incdec.ptr5, align 4
351 %shl9 = shl i32 %3, 3
352 store i32 %shl9, ptr %incdec.ptr7, align 4
356 define void @add0f(ptr noalias %dst, ptr noalias %src) {
357 ; CHECK-LABEL: @add0f(
359 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[SRC:%.*]], align 4
360 ; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[TMP1]], <float 1.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
361 ; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[DST:%.*]], align 4
362 ; CHECK-NEXT: ret void
365 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
366 %0 = load float, ptr %src, align 4
367 %add = fadd fast float %0, 1.000000e+00
368 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
369 store float %add, ptr %dst, align 4
370 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
371 %1 = load float, ptr %incdec.ptr, align 4
372 %add3 = fadd fast float %1, 1.000000e+00
373 %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
374 store float %add3, ptr %incdec.ptr1, align 4
375 %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
376 %2 = load float, ptr %incdec.ptr2, align 4
377 %add6 = fadd fast float %2, 2.000000e+00
378 %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
379 store float %add6, ptr %incdec.ptr4, align 4
380 %3 = load float, ptr %incdec.ptr5, align 4
381 %add9 = fadd fast float %3, 3.000000e+00
382 store float %add9, ptr %incdec.ptr7, align 4
386 define void @add1f(ptr noalias %dst, ptr noalias %src) {
387 ; CHECK-LABEL: @add1f(
389 ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 1
390 ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[SRC]], align 4
391 ; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 1
392 ; CHECK-NEXT: store float [[TMP0]], ptr [[DST]], align 4
393 ; CHECK-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3
394 ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3
395 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[INCDEC_PTR]], align 4
396 ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <2 x float> [[TMP2]], <float 1.000000e+00, float 2.000000e+00>
397 ; CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[INCDEC_PTR1]], align 4
398 ; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[INCDEC_PTR5]], align 4
399 ; CHECK-NEXT: [[ADD9:%.*]] = fadd fast float [[TMP5]], 3.000000e+00
400 ; CHECK-NEXT: store float [[ADD9]], ptr [[INCDEC_PTR7]], align 4
401 ; CHECK-NEXT: ret void
404 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
405 %0 = load float, ptr %src, align 4
406 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
407 store float %0, ptr %dst, align 4
408 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
409 %1 = load float, ptr %incdec.ptr, align 4
410 %add3 = fadd fast float %1, 1.000000e+00
411 %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
412 store float %add3, ptr %incdec.ptr1, align 4
413 %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
414 %2 = load float, ptr %incdec.ptr2, align 4
415 %add6 = fadd fast float %2, 2.000000e+00
416 %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
417 store float %add6, ptr %incdec.ptr4, align 4
418 %3 = load float, ptr %incdec.ptr5, align 4
419 %add9 = fadd fast float %3, 3.000000e+00
420 store float %add9, ptr %incdec.ptr7, align 4
424 define void @sub0f(ptr noalias %dst, ptr noalias %src) {
425 ; CHECK-LABEL: @sub0f(
427 ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 1
428 ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[SRC]], align 4
429 ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP0]], -1.000000e+00
430 ; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 1
431 ; CHECK-NEXT: store float [[ADD]], ptr [[DST]], align 4
432 ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 2
433 ; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[INCDEC_PTR]], align 4
434 ; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 2
435 ; CHECK-NEXT: store float [[TMP1]], ptr [[INCDEC_PTR1]], align 4
436 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr [[INCDEC_PTR2]], align 4
437 ; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <2 x float> [[TMP3]], <float -2.000000e+00, float -3.000000e+00>
438 ; CHECK-NEXT: store <2 x float> [[TMP4]], ptr [[INCDEC_PTR4]], align 4
439 ; CHECK-NEXT: ret void
442 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
443 %0 = load float, ptr %src, align 4
444 %add = fadd fast float %0, -1.000000e+00
445 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
446 store float %add, ptr %dst, align 4
447 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
448 %1 = load float, ptr %incdec.ptr, align 4
449 %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
450 store float %1, ptr %incdec.ptr1, align 4
451 %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
452 %2 = load float, ptr %incdec.ptr2, align 4
453 %add6 = fadd fast float %2, -2.000000e+00
454 %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
455 store float %add6, ptr %incdec.ptr4, align 4
456 %3 = load float, ptr %incdec.ptr5, align 4
457 %add9 = fadd fast float %3, -3.000000e+00
458 store float %add9, ptr %incdec.ptr7, align 4
462 define void @sub1f(ptr noalias %dst, ptr noalias %src) {
463 ; CHECK-LABEL: @sub1f(
465 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[SRC:%.*]], align 4
466 ; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[TMP1]], <float 4.000000e+00, float -1.000000e+00, float -2.000000e+00, float -3.000000e+00>
467 ; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[DST:%.*]], align 4
468 ; CHECK-NEXT: ret void
471 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
472 %0 = load float, ptr %src, align 4
473 %add = fadd fast float %0, 4.000000e+00
474 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
475 store float %add, ptr %dst, align 4
476 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
477 %1 = load float, ptr %incdec.ptr, align 4
478 %sub = fadd fast float %1, -1.000000e+00
479 %incdec.ptr3 = getelementptr inbounds float, ptr %dst, i64 2
480 store float %sub, ptr %incdec.ptr1, align 4
481 %incdec.ptr4 = getelementptr inbounds float, ptr %src, i64 3
482 %2 = load float, ptr %incdec.ptr2, align 4
483 %sub5 = fadd fast float %2, -2.000000e+00
484 %incdec.ptr6 = getelementptr inbounds float, ptr %dst, i64 3
485 store float %sub5, ptr %incdec.ptr3, align 4
486 %3 = load float, ptr %incdec.ptr4, align 4
487 %sub8 = fadd fast float %3, -3.000000e+00
488 store float %sub8, ptr %incdec.ptr6, align 4
492 define void @sub2f(ptr noalias %dst, ptr noalias %src) {
493 ; CHECK-LABEL: @sub2f(
495 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[SRC:%.*]], align 4
496 ; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[TMP1]], <float -1.000000e+00, float -1.000000e+00, float -2.000000e+00, float -3.000000e+00>
497 ; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[DST:%.*]], align 4
498 ; CHECK-NEXT: ret void
501 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
502 %0 = load float, ptr %src, align 4
503 %sub = fadd fast float %0, -1.000000e+00
504 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
505 store float %sub, ptr %dst, align 4
506 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
507 %1 = load float, ptr %incdec.ptr, align 4
508 %sub3 = fadd fast float %1, -1.000000e+00
509 %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
510 store float %sub3, ptr %incdec.ptr1, align 4
511 %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
512 %2 = load float, ptr %incdec.ptr2, align 4
513 %sub6 = fadd fast float %2, -2.000000e+00
514 %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
515 store float %sub6, ptr %incdec.ptr4, align 4
516 %3 = load float, ptr %incdec.ptr5, align 4
517 %sub9 = fadd fast float %3, -3.000000e+00
518 store float %sub9, ptr %incdec.ptr7, align 4
522 define void @addsub0f(ptr noalias %dst, ptr noalias %src) {
523 ; CHECK-LABEL: @addsub0f(
525 ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 1
526 ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[SRC]], align 4
527 ; CHECK-NEXT: [[SUB:%.*]] = fadd fast float [[TMP0]], -1.000000e+00
528 ; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 1
529 ; CHECK-NEXT: store float [[SUB]], ptr [[DST]], align 4
530 ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 2
531 ; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[INCDEC_PTR]], align 4
532 ; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 2
533 ; CHECK-NEXT: store float [[TMP1]], ptr [[INCDEC_PTR1]], align 4
534 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr [[INCDEC_PTR2]], align 4
535 ; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <2 x float> [[TMP3]], <float -2.000000e+00, float -3.000000e+00>
536 ; CHECK-NEXT: [[TMP5:%.*]] = fsub fast <2 x float> [[TMP3]], <float -2.000000e+00, float -3.000000e+00>
537 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 3>
538 ; CHECK-NEXT: store <2 x float> [[TMP6]], ptr [[INCDEC_PTR3]], align 4
539 ; CHECK-NEXT: ret void
542 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
543 %0 = load float, ptr %src, align 4
544 %sub = fadd fast float %0, -1.000000e+00
545 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
546 store float %sub, ptr %dst, align 4
547 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
548 %1 = load float, ptr %incdec.ptr, align 4
549 %incdec.ptr3 = getelementptr inbounds float, ptr %dst, i64 2
550 store float %1, ptr %incdec.ptr1, align 4
551 %incdec.ptr4 = getelementptr inbounds float, ptr %src, i64 3
552 %2 = load float, ptr %incdec.ptr2, align 4
553 %sub5 = fadd fast float %2, -2.000000e+00
554 %incdec.ptr6 = getelementptr inbounds float, ptr %dst, i64 3
555 store float %sub5, ptr %incdec.ptr3, align 4
556 %3 = load float, ptr %incdec.ptr4, align 4
557 %sub8 = fsub fast float %3, -3.000000e+00
558 store float %sub8, ptr %incdec.ptr6, align 4
562 define void @addsub1f(ptr noalias %dst, ptr noalias %src) {
563 ; CHECK-LABEL: @addsub1f(
565 ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 2
566 ; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 2
567 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[SRC]], align 4
568 ; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <2 x float> [[TMP1]], <float -1.000000e+00, float -1.000000e+00>
569 ; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <2 x float> [[TMP1]], <float -1.000000e+00, float -1.000000e+00>
570 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP3]], <2 x i32> <i32 0, i32 3>
571 ; CHECK-NEXT: store <2 x float> [[TMP4]], ptr [[DST]], align 4
572 ; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3
573 ; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[INCDEC_PTR2]], align 4
574 ; CHECK-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3
575 ; CHECK-NEXT: store float [[TMP6]], ptr [[INCDEC_PTR3]], align 4
576 ; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[INCDEC_PTR4]], align 4
577 ; CHECK-NEXT: [[SUB8:%.*]] = fsub fast float [[TMP7]], -3.000000e+00
578 ; CHECK-NEXT: store float [[SUB8]], ptr [[INCDEC_PTR6]], align 4
579 ; CHECK-NEXT: ret void
582 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
583 %0 = load float, ptr %src, align 4
584 %sub = fadd fast float %0, -1.000000e+00
585 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
586 store float %sub, ptr %dst, align 4
587 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
588 %1 = load float, ptr %incdec.ptr, align 4
589 %sub1 = fsub fast float %1, -1.000000e+00
590 %incdec.ptr3 = getelementptr inbounds float, ptr %dst, i64 2
591 store float %sub1, ptr %incdec.ptr1, align 4
592 %incdec.ptr4 = getelementptr inbounds float, ptr %src, i64 3
593 %2 = load float, ptr %incdec.ptr2, align 4
594 %incdec.ptr6 = getelementptr inbounds float, ptr %dst, i64 3
595 store float %2, ptr %incdec.ptr3, align 4
596 %3 = load float, ptr %incdec.ptr4, align 4
597 %sub8 = fsub fast float %3, -3.000000e+00
598 store float %sub8, ptr %incdec.ptr6, align 4
602 define void @mulf(ptr noalias %dst, ptr noalias %src) {
603 ; CHECK-LABEL: @mulf(
605 ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 2
606 ; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 2
607 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[SRC]], align 4
608 ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], <float 2.570000e+02, float -3.000000e+00>
609 ; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[DST]], align 4
610 ; CHECK-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3
611 ; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[INCDEC_PTR2]], align 4
612 ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3
613 ; CHECK-NEXT: store float [[TMP4]], ptr [[INCDEC_PTR4]], align 4
614 ; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[INCDEC_PTR5]], align 4
615 ; CHECK-NEXT: [[SUB9:%.*]] = fmul fast float [[TMP5]], -9.000000e+00
616 ; CHECK-NEXT: store float [[SUB9]], ptr [[INCDEC_PTR7]], align 4
617 ; CHECK-NEXT: ret void
620 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
621 %0 = load float, ptr %src, align 4
622 %sub = fmul fast float %0, 2.570000e+02
623 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
624 store float %sub, ptr %dst, align 4
625 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
626 %1 = load float, ptr %incdec.ptr, align 4
627 %sub3 = fmul fast float %1, -3.000000e+00
628 %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
629 store float %sub3, ptr %incdec.ptr1, align 4
630 %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
631 %2 = load float, ptr %incdec.ptr2, align 4
632 %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
633 store float %2, ptr %incdec.ptr4, align 4
634 %3 = load float, ptr %incdec.ptr5, align 4
635 %sub9 = fmul fast float %3, -9.000000e+00
636 store float %sub9, ptr %incdec.ptr7, align 4
640 define void @add0fn(ptr noalias %dst, ptr noalias %src) {
641 ; CHECK-LABEL: @add0fn(
643 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[SRC:%.*]], align 4
644 ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], <float 1.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
645 ; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[DST:%.*]], align 4
646 ; CHECK-NEXT: ret void
649 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
650 %0 = load float, ptr %src, align 4
651 %add = fadd float %0, 1.000000e+00
652 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
653 store float %add, ptr %dst, align 4
654 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
655 %1 = load float, ptr %incdec.ptr, align 4
656 %add3 = fadd float %1, 1.000000e+00
657 %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
658 store float %add3, ptr %incdec.ptr1, align 4
659 %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
660 %2 = load float, ptr %incdec.ptr2, align 4
661 %add6 = fadd float %2, 2.000000e+00
662 %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
663 store float %add6, ptr %incdec.ptr4, align 4
664 %3 = load float, ptr %incdec.ptr5, align 4
665 %add9 = fadd float %3, 3.000000e+00
666 store float %add9, ptr %incdec.ptr7, align 4
670 define void @add1fn(ptr noalias %dst, ptr noalias %src) {
671 ; CHECK-LABEL: @add1fn(
673 ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 1
674 ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[SRC]], align 4
675 ; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 1
676 ; CHECK-NEXT: store float [[TMP0]], ptr [[DST]], align 4
677 ; CHECK-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3
678 ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3
679 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[INCDEC_PTR]], align 4
680 ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP2]], <float 1.000000e+00, float 2.000000e+00>
681 ; CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[INCDEC_PTR1]], align 4
682 ; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[INCDEC_PTR5]], align 4
683 ; CHECK-NEXT: [[ADD9:%.*]] = fadd float [[TMP5]], 3.000000e+00
684 ; CHECK-NEXT: store float [[ADD9]], ptr [[INCDEC_PTR7]], align 4
685 ; CHECK-NEXT: ret void
688 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
689 %0 = load float, ptr %src, align 4
690 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
691 store float %0, ptr %dst, align 4
692 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
693 %1 = load float, ptr %incdec.ptr, align 4
694 %add3 = fadd float %1, 1.000000e+00
695 %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
696 store float %add3, ptr %incdec.ptr1, align 4
697 %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
698 %2 = load float, ptr %incdec.ptr2, align 4
699 %add6 = fadd float %2, 2.000000e+00
700 %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
701 store float %add6, ptr %incdec.ptr4, align 4
702 %3 = load float, ptr %incdec.ptr5, align 4
703 %add9 = fadd float %3, 3.000000e+00
704 store float %add9, ptr %incdec.ptr7, align 4
708 define void @sub0fn(ptr noalias %dst, ptr noalias %src) {
709 ; CHECK-LABEL: @sub0fn(
711 ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 1
712 ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[SRC]], align 4
713 ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP0]], -1.000000e+00
714 ; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 1
715 ; CHECK-NEXT: store float [[ADD]], ptr [[DST]], align 4
716 ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 2
717 ; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[INCDEC_PTR]], align 4
718 ; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 2
719 ; CHECK-NEXT: store float [[TMP1]], ptr [[INCDEC_PTR1]], align 4
720 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr [[INCDEC_PTR2]], align 4
721 ; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x float> [[TMP3]], <float -2.000000e+00, float -3.000000e+00>
722 ; CHECK-NEXT: store <2 x float> [[TMP4]], ptr [[INCDEC_PTR4]], align 4
723 ; CHECK-NEXT: ret void
726 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
727 %0 = load float, ptr %src, align 4
728 %add = fadd fast float %0, -1.000000e+00
729 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
730 store float %add, ptr %dst, align 4
731 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
732 %1 = load float, ptr %incdec.ptr, align 4
733 %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
734 store float %1, ptr %incdec.ptr1, align 4
735 %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
736 %2 = load float, ptr %incdec.ptr2, align 4
737 %add6 = fadd float %2, -2.000000e+00
738 %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
739 store float %add6, ptr %incdec.ptr4, align 4
740 %3 = load float, ptr %incdec.ptr5, align 4
741 %add9 = fadd float %3, -3.000000e+00
742 store float %add9, ptr %incdec.ptr7, align 4
746 define void @sub1fn(ptr noalias %dst, ptr noalias %src) {
747 ; CHECK-LABEL: @sub1fn(
749 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[SRC:%.*]], align 4
750 ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], <float 4.000000e+00, float -1.000000e+00, float -2.000000e+00, float -3.000000e+00>
751 ; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[DST:%.*]], align 4
752 ; CHECK-NEXT: ret void
755 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
756 %0 = load float, ptr %src, align 4
757 %add = fadd float %0, 4.000000e+00
758 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
759 store float %add, ptr %dst, align 4
760 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
761 %1 = load float, ptr %incdec.ptr, align 4
762 %sub = fadd float %1, -1.000000e+00
763 %incdec.ptr3 = getelementptr inbounds float, ptr %dst, i64 2
764 store float %sub, ptr %incdec.ptr1, align 4
765 %incdec.ptr4 = getelementptr inbounds float, ptr %src, i64 3
766 %2 = load float, ptr %incdec.ptr2, align 4
767 %sub5 = fadd float %2, -2.000000e+00
768 %incdec.ptr6 = getelementptr inbounds float, ptr %dst, i64 3
769 store float %sub5, ptr %incdec.ptr3, align 4
770 %3 = load float, ptr %incdec.ptr4, align 4
771 %sub8 = fadd float %3, -3.000000e+00
772 store float %sub8, ptr %incdec.ptr6, align 4
776 define void @sub2fn(ptr noalias %dst, ptr noalias %src) {
777 ; CHECK-LABEL: @sub2fn(
779 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[SRC:%.*]], align 4
780 ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], <float -1.000000e+00, float -1.000000e+00, float -2.000000e+00, float -3.000000e+00>
781 ; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[DST:%.*]], align 4
782 ; CHECK-NEXT: ret void
785 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
786 %0 = load float, ptr %src, align 4
787 %sub = fadd float %0, -1.000000e+00
788 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
789 store float %sub, ptr %dst, align 4
790 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
791 %1 = load float, ptr %incdec.ptr, align 4
792 %sub3 = fadd float %1, -1.000000e+00
793 %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
794 store float %sub3, ptr %incdec.ptr1, align 4
795 %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
796 %2 = load float, ptr %incdec.ptr2, align 4
797 %sub6 = fadd float %2, -2.000000e+00
798 %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
799 store float %sub6, ptr %incdec.ptr4, align 4
800 %3 = load float, ptr %incdec.ptr5, align 4
801 %sub9 = fadd float %3, -3.000000e+00
802 store float %sub9, ptr %incdec.ptr7, align 4
806 define void @mulfn(ptr noalias %dst, ptr noalias %src) {
807 ; CHECK-LABEL: @mulfn(
809 ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 2
810 ; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 2
811 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[SRC]], align 4
812 ; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], <float 2.570000e+02, float -3.000000e+00>
813 ; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[DST]], align 4
814 ; CHECK-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3
815 ; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[INCDEC_PTR2]], align 4
816 ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3
817 ; CHECK-NEXT: store float [[TMP4]], ptr [[INCDEC_PTR4]], align 4
818 ; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[INCDEC_PTR5]], align 4
819 ; CHECK-NEXT: [[SUB9:%.*]] = fmul fast float [[TMP5]], -9.000000e+00
820 ; CHECK-NEXT: store float [[SUB9]], ptr [[INCDEC_PTR7]], align 4
821 ; CHECK-NEXT: ret void
824 %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
825 %0 = load float, ptr %src, align 4
826 %sub = fmul float %0, 2.570000e+02
827 %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
828 store float %sub, ptr %dst, align 4
829 %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
830 %1 = load float, ptr %incdec.ptr, align 4
831 %sub3 = fmul float %1, -3.000000e+00
832 %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
833 store float %sub3, ptr %incdec.ptr1, align 4
834 %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
835 %2 = load float, ptr %incdec.ptr2, align 4
836 %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
837 store float %2, ptr %incdec.ptr4, align 4
838 %3 = load float, ptr %incdec.ptr5, align 4
839 %sub9 = fmul fast float %3, -9.000000e+00
840 store float %sub9, ptr %incdec.ptr7, align 4