1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s
4 define void @test(ptr %p, ptr noalias %s) {
7 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0
8 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30
9 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
10 ; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
11 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
12 ; CHECK-NEXT: [[TMP2:%.*]] = fsub fast <8 x float> [[TMP1]], [[TMP0]]
13 ; CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[ARRAYIDX2]], align 4
14 ; CHECK-NEXT: ret void
17 %arrayidx = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 0
18 %i = load float, ptr %arrayidx, align 4
19 %arrayidx1 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 30
20 %i1 = load float, ptr %arrayidx1, align 4
21 %add = fsub fast float %i1, %i
22 %arrayidx2 = getelementptr inbounds float, ptr %s, i64 0
23 store float %add, ptr %arrayidx2, align 4
24 %arrayidx4 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 4
25 %i2 = load float, ptr %arrayidx4, align 4
26 %arrayidx6 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 26
27 %i3 = load float, ptr %arrayidx6, align 4
28 %add7 = fsub fast float %i3, %i2
29 %arrayidx9 = getelementptr inbounds float, ptr %s, i64 1
30 store float %add7, ptr %arrayidx9, align 4
31 %arrayidx11 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 8
32 %i4 = load float, ptr %arrayidx11, align 4
33 %arrayidx13 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 22
34 %i5 = load float, ptr %arrayidx13, align 4
35 %add14 = fsub fast float %i5, %i4
36 %arrayidx16 = getelementptr inbounds float, ptr %s, i64 2
37 store float %add14, ptr %arrayidx16, align 4
38 %arrayidx18 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 12
39 %i6 = load float, ptr %arrayidx18, align 4
40 %arrayidx20 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 18
41 %i7 = load float, ptr %arrayidx20, align 4
42 %add21 = fsub fast float %i7, %i6
43 %arrayidx23 = getelementptr inbounds float, ptr %s, i64 3
44 store float %add21, ptr %arrayidx23, align 4
45 %arrayidx25 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 16
46 %i8 = load float, ptr %arrayidx25, align 4
47 %arrayidx27 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 14
48 %i9 = load float, ptr %arrayidx27, align 4
49 %add28 = fsub fast float %i9, %i8
50 %arrayidx30 = getelementptr inbounds float, ptr %s, i64 4
51 store float %add28, ptr %arrayidx30, align 4
52 %arrayidx32 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 20
53 %i10 = load float, ptr %arrayidx32, align 4
54 %arrayidx34 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 10
55 %i11 = load float, ptr %arrayidx34, align 4
56 %add35 = fsub fast float %i11, %i10
57 %arrayidx37 = getelementptr inbounds float, ptr %s, i64 5
58 store float %add35, ptr %arrayidx37, align 4
59 %arrayidx39 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 24
60 %i12 = load float, ptr %arrayidx39, align 4
61 %arrayidx41 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 6
62 %i13 = load float, ptr %arrayidx41, align 4
63 %add42 = fsub fast float %i13, %i12
64 %arrayidx44 = getelementptr inbounds float, ptr %s, i64 6
65 store float %add42, ptr %arrayidx44, align 4
66 %arrayidx46 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 28
67 %i14 = load float, ptr %arrayidx46, align 4
68 %arrayidx48 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 2
69 %i15 = load float, ptr %arrayidx48, align 4
70 %add49 = fsub fast float %i15, %i14
71 %arrayidx51 = getelementptr inbounds float, ptr %s, i64 7
72 store float %add49, ptr %arrayidx51, align 4
76 define void @test1(ptr %p, ptr noalias %s, i32 %stride) {
77 ; CHECK-LABEL: @test1(
79 ; CHECK-NEXT: [[STR:%.*]] = zext i32 [[STRIDE:%.*]] to i64
80 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0
81 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30
82 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
83 ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[STR]], 4
84 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 [[TMP0]], <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
85 ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
86 ; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP1]]
87 ; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4
88 ; CHECK-NEXT: ret void
91 %str = zext i32 %stride to i64
92 %arrayidx = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 0
93 %i = load float, ptr %arrayidx, align 4
94 %arrayidx1 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 30
95 %i1 = load float, ptr %arrayidx1, align 4
96 %add = fsub fast float %i1, %i
97 %arrayidx2 = getelementptr inbounds float, ptr %s, i64 0
98 store float %add, ptr %arrayidx2, align 4
99 %arrayidx4 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %str
100 %i2 = load float, ptr %arrayidx4, align 4
101 %arrayidx6 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 26
102 %i3 = load float, ptr %arrayidx6, align 4
103 %add7 = fsub fast float %i3, %i2
104 %arrayidx9 = getelementptr inbounds float, ptr %s, i64 1
105 store float %add7, ptr %arrayidx9, align 4
106 %st1 = mul i64 %str, 2
107 %arrayidx11 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st1
108 %i4 = load float, ptr %arrayidx11, align 4
109 %arrayidx13 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 22
110 %i5 = load float, ptr %arrayidx13, align 4
111 %add14 = fsub fast float %i5, %i4
112 %arrayidx16 = getelementptr inbounds float, ptr %s, i64 2
113 store float %add14, ptr %arrayidx16, align 4
114 %st2 = mul i64 %str, 3
115 %arrayidx18 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st2
116 %i6 = load float, ptr %arrayidx18, align 4
117 %arrayidx20 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 18
118 %i7 = load float, ptr %arrayidx20, align 4
119 %add21 = fsub fast float %i7, %i6
120 %arrayidx23 = getelementptr inbounds float, ptr %s, i64 3
121 store float %add21, ptr %arrayidx23, align 4
122 %st3 = mul i64 %str, 4
123 %arrayidx25 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st3
124 %i8 = load float, ptr %arrayidx25, align 4
125 %arrayidx27 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 14
126 %i9 = load float, ptr %arrayidx27, align 4
127 %add28 = fsub fast float %i9, %i8
128 %arrayidx30 = getelementptr inbounds float, ptr %s, i64 4
129 store float %add28, ptr %arrayidx30, align 4
130 %st4 = mul i64 %str, 5
131 %arrayidx32 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st4
132 %i10 = load float, ptr %arrayidx32, align 4
133 %arrayidx34 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 10
134 %i11 = load float, ptr %arrayidx34, align 4
135 %add35 = fsub fast float %i11, %i10
136 %arrayidx37 = getelementptr inbounds float, ptr %s, i64 5
137 store float %add35, ptr %arrayidx37, align 4
138 %st5 = mul i64 %str, 6
139 %arrayidx39 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st5
140 %i12 = load float, ptr %arrayidx39, align 4
141 %arrayidx41 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 6
142 %i13 = load float, ptr %arrayidx41, align 4
143 %add42 = fsub fast float %i13, %i12
144 %arrayidx44 = getelementptr inbounds float, ptr %s, i64 6
145 store float %add42, ptr %arrayidx44, align 4
146 %st6 = mul i64 %str, 7
147 %arrayidx46 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st6
148 %i14 = load float, ptr %arrayidx46, align 4
149 %arrayidx48 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 2
150 %i15 = load float, ptr %arrayidx48, align 4
151 %add49 = fsub fast float %i15, %i14
152 %arrayidx51 = getelementptr inbounds float, ptr %s, i64 7
153 store float %add49, ptr %arrayidx51, align 4
157 define void @test2(ptr %p, ptr noalias %s, i32 %stride) {
158 ; CHECK-LABEL: @test2(
160 ; CHECK-NEXT: [[STR:%.*]] = zext i32 [[STRIDE:%.*]] to i64
161 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 2
162 ; CHECK-NEXT: [[ST6:%.*]] = mul i64 [[STR]], 7
163 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST6]]
164 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
165 ; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
166 ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STR]], -4
167 ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 [[TMP1]], <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
168 ; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP0]]
169 ; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4
170 ; CHECK-NEXT: ret void
173 %str = zext i32 %stride to i64
174 %arrayidx = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 2
175 %i = load float, ptr %arrayidx, align 4
176 %st6 = mul i64 %str, 7
177 %arrayidx1 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st6
178 %i1 = load float, ptr %arrayidx1, align 4
179 %add = fsub fast float %i1, %i
180 %arrayidx2 = getelementptr inbounds float, ptr %s, i64 0
181 store float %add, ptr %arrayidx2, align 4
182 %arrayidx4 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 6
183 %i2 = load float, ptr %arrayidx4, align 4
184 %st5 = mul i64 %str, 6
185 %arrayidx6 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st5
186 %i3 = load float, ptr %arrayidx6, align 4
187 %add7 = fsub fast float %i3, %i2
188 %arrayidx9 = getelementptr inbounds float, ptr %s, i64 1
189 store float %add7, ptr %arrayidx9, align 4
190 %arrayidx11 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 10
191 %i4 = load float, ptr %arrayidx11, align 4
192 %st4 = mul i64 %str, 5
193 %arrayidx13 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st4
194 %i5 = load float, ptr %arrayidx13, align 4
195 %add14 = fsub fast float %i5, %i4
196 %arrayidx16 = getelementptr inbounds float, ptr %s, i64 2
197 store float %add14, ptr %arrayidx16, align 4
198 %arrayidx18 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 14
199 %i6 = load float, ptr %arrayidx18, align 4
200 %st3 = mul i64 %str, 4
201 %arrayidx20 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st3
202 %i7 = load float, ptr %arrayidx20, align 4
203 %add21 = fsub fast float %i7, %i6
204 %arrayidx23 = getelementptr inbounds float, ptr %s, i64 3
205 store float %add21, ptr %arrayidx23, align 4
206 %arrayidx25 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 18
207 %st2 = mul i64 %str, 3
208 %i8 = load float, ptr %arrayidx25, align 4
209 %arrayidx27 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st2
210 %i9 = load float, ptr %arrayidx27, align 4
211 %add28 = fsub fast float %i9, %i8
212 %arrayidx30 = getelementptr inbounds float, ptr %s, i64 4
213 store float %add28, ptr %arrayidx30, align 4
214 %arrayidx32 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 22
215 %i10 = load float, ptr %arrayidx32, align 4
216 %st1 = mul i64 %str, 2
217 %arrayidx34 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st1
218 %i11 = load float, ptr %arrayidx34, align 4
219 %add35 = fsub fast float %i11, %i10
220 %arrayidx37 = getelementptr inbounds float, ptr %s, i64 5
221 store float %add35, ptr %arrayidx37, align 4
222 %arrayidx39 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 26
223 %i12 = load float, ptr %arrayidx39, align 4
224 %arrayidx41 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %str
225 %i13 = load float, ptr %arrayidx41, align 4
226 %add42 = fsub fast float %i13, %i12
227 %arrayidx44 = getelementptr inbounds float, ptr %s, i64 6
228 store float %add42, ptr %arrayidx44, align 4
229 %arrayidx46 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 30
230 %i14 = load float, ptr %arrayidx46, align 4
231 %arrayidx48 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 0
232 %i15 = load float, ptr %arrayidx48, align 4
233 %add49 = fsub fast float %i15, %i14
234 %arrayidx51 = getelementptr inbounds float, ptr %s, i64 7
235 store float %add49, ptr %arrayidx51, align 4
239 define void @test3(ptr %p, ptr noalias %s) {
240 ; CHECK-LABEL: @test3(
242 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0
243 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30
244 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
245 ; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
246 ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
247 ; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP0]]
248 ; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4
249 ; CHECK-NEXT: ret void
252 %arrayidx = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 0
253 %i = load float, ptr %arrayidx, align 4
254 %arrayidx1 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 30
255 %i1 = load float, ptr %arrayidx1, align 4
256 %add = fsub fast float %i1, %i
257 %arrayidx2 = getelementptr inbounds float, ptr %s, i64 0
258 store float %add, ptr %arrayidx2, align 4
259 %arrayidx4 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 4
260 %i2 = load float, ptr %arrayidx4, align 4
261 %arrayidx6 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 29
262 %i3 = load float, ptr %arrayidx6, align 4
263 %add7 = fsub fast float %i3, %i2
264 %arrayidx9 = getelementptr inbounds float, ptr %s, i64 1
265 store float %add7, ptr %arrayidx9, align 4
266 %arrayidx11 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 8
267 %i4 = load float, ptr %arrayidx11, align 4
268 %arrayidx13 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 28
269 %i5 = load float, ptr %arrayidx13, align 4
270 %add14 = fsub fast float %i5, %i4
271 %arrayidx16 = getelementptr inbounds float, ptr %s, i64 2
272 store float %add14, ptr %arrayidx16, align 4
273 %arrayidx18 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 12
274 %i6 = load float, ptr %arrayidx18, align 4
275 %arrayidx20 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 27
276 %i7 = load float, ptr %arrayidx20, align 4
277 %add21 = fsub fast float %i7, %i6
278 %arrayidx23 = getelementptr inbounds float, ptr %s, i64 3
279 store float %add21, ptr %arrayidx23, align 4
280 %arrayidx25 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 16
281 %i8 = load float, ptr %arrayidx25, align 4
282 %arrayidx27 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 26
283 %i9 = load float, ptr %arrayidx27, align 4
284 %add28 = fsub fast float %i9, %i8
285 %arrayidx30 = getelementptr inbounds float, ptr %s, i64 4
286 store float %add28, ptr %arrayidx30, align 4
287 %arrayidx32 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 20
288 %i10 = load float, ptr %arrayidx32, align 4
289 %arrayidx34 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 25
290 %i11 = load float, ptr %arrayidx34, align 4
291 %add35 = fsub fast float %i11, %i10
292 %arrayidx37 = getelementptr inbounds float, ptr %s, i64 5
293 store float %add35, ptr %arrayidx37, align 4
294 %arrayidx39 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 24
295 %i12 = load float, ptr %arrayidx39, align 4
296 %arrayidx41 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 24
297 %i13 = load float, ptr %arrayidx41, align 4
298 %add42 = fsub fast float %i13, %i12
299 %arrayidx44 = getelementptr inbounds float, ptr %s, i64 6
300 store float %add42, ptr %arrayidx44, align 4
301 %arrayidx46 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 28
302 %i14 = load float, ptr %arrayidx46, align 4
303 %arrayidx48 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 23
304 %i15 = load float, ptr %arrayidx48, align 4
305 %add49 = fsub fast float %i15, %i14
306 %arrayidx51 = getelementptr inbounds float, ptr %s, i64 7
307 store float %add49, ptr %arrayidx51, align 4