1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -S | FileCheck --check-prefixes=CHECK,NOSTRIDED %s
3 ; RUN: opt < %s -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -lv-strided-pointer-ivs=true -laa-speculate-unit-stride=false -S | FileCheck --check-prefixes=CHECK,STRIDED %s
6 define void @single_constant_stride_int_scaled(ptr %p) {
7 ; CHECK-LABEL: @single_constant_stride_int_scaled(
9 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
10 ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
11 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 1024, [[TMP1]]
12 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
14 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
15 ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
16 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
17 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
18 ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]]
19 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[TMP5]]
20 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
21 ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
22 ; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
23 ; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i64> [[TMP8]], zeroinitializer
24 ; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP9]], splat (i64 1)
25 ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]]
26 ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 1, [[TMP7]]
27 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0
28 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
29 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
31 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
32 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
33 ; CHECK-NEXT: [[TMP14:%.*]] = mul nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 8)
34 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[P:%.*]], <vscale x 4 x i64> [[TMP14]]
35 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP15]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison)
36 ; CHECK-NEXT: [[TMP16:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 1)
37 ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP16]], <vscale x 4 x ptr> [[TMP15]], i32 4, <vscale x 4 x i1> splat (i1 true))
38 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
39 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
40 ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
41 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
42 ; CHECK: middle.block:
43 ; CHECK-NEXT: br label [[SCALAR_PH]]
45 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
46 ; CHECK-NEXT: br label [[LOOP:%.*]]
48 ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
49 ; CHECK-NEXT: [[OFFSET:%.*]] = mul nuw nsw i64 [[I]], 8
50 ; CHECK-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
51 ; CHECK-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
52 ; CHECK-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
53 ; CHECK-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4
54 ; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1
55 ; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
56 ; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
58 ; CHECK-NEXT: ret void
63 %i = phi i64 [0, %entry], [%nexti, %loop]
65 %offset = mul nsw nuw i64 %i, 8
66 %q0 = getelementptr i32, ptr %p, i64 %offset
67 %x0 = load i32, ptr %q0
69 store i32 %y0, ptr %q0
71 %nexti = add i64 %i, 1
72 %done = icmp eq i64 %nexti, 1024
73 br i1 %done, label %exit, label %loop
78 define void @single_constant_stride_int_iv(ptr %p) {
79 ; CHECK-LABEL: @single_constant_stride_int_iv(
81 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
82 ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
83 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
84 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
86 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
87 ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
88 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
89 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
90 ; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 64
91 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
92 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
93 ; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
94 ; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 4 x i64> [[TMP6]], zeroinitializer
95 ; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i64> [[TMP7]], splat (i64 64)
96 ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP8]]
97 ; CHECK-NEXT: [[TMP11:%.*]] = mul i64 64, [[TMP5]]
98 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
99 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
100 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
101 ; CHECK: vector.body:
102 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
103 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
104 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[P:%.*]], <vscale x 4 x i64> [[VEC_IND]]
105 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP12]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison)
106 ; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 1)
107 ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP13]], <vscale x 4 x ptr> [[TMP12]], i32 4, <vscale x 4 x i1> splat (i1 true))
108 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
109 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
110 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
111 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
112 ; CHECK: middle.block:
113 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
114 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
116 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
117 ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
118 ; CHECK-NEXT: br label [[LOOP:%.*]]
120 ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
121 ; CHECK-NEXT: [[OFFSET:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[OFFSET_NEXT:%.*]], [[LOOP]] ]
122 ; CHECK-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
123 ; CHECK-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
124 ; CHECK-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
125 ; CHECK-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4
126 ; CHECK-NEXT: [[OFFSET_NEXT]] = add nuw nsw i64 [[OFFSET]], 64
127 ; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1
128 ; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
129 ; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
131 ; CHECK-NEXT: ret void
136 %i = phi i64 [0, %entry], [%nexti, %loop]
137 %offset = phi i64 [0, %entry], [%offset.next, %loop]
139 %q0 = getelementptr i32, ptr %p, i64 %offset
140 %x0 = load i32, ptr %q0
142 store i32 %y0, ptr %q0
144 %offset.next = add nsw nuw i64 %offset, 64
145 %nexti = add i64 %i, 1
146 %done = icmp eq i64 %nexti, 1024
147 br i1 %done, label %exit, label %loop
153 define void @single_constant_stride_ptr_iv(ptr %p) {
154 ; CHECK-LABEL: @single_constant_stride_ptr_iv(
156 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
157 ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
158 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 1024, [[TMP1]]
159 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
161 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
162 ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
163 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
164 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
165 ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]]
166 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[TMP5]]
167 ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[N_VEC]], 8
168 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP6]]
169 ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
170 ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
171 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
172 ; CHECK: vector.body:
173 ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
174 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
175 ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
176 ; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
177 ; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 1
178 ; CHECK-NEXT: [[TMP12:%.*]] = mul i64 8, [[TMP11]]
179 ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP10]], 0
180 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0
181 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
182 ; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
183 ; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT]], [[TMP14]]
184 ; CHECK-NEXT: [[TMP16:%.*]] = mul <vscale x 4 x i64> [[TMP15]], splat (i64 8)
185 ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 4 x i64> [[TMP16]]
186 ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <vscale x 4 x ptr> [[VECTOR_GEP]], i32 0
187 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP17]], align 4
188 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
189 ; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
190 ; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 4 x i32> [[TMP19]], splat (i32 1)
191 ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP20]], <vscale x 4 x ptr> [[VECTOR_GEP]], i32 4, <vscale x 4 x i1> splat (i1 true))
192 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
193 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP12]]
194 ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
195 ; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
196 ; CHECK: middle.block:
197 ; CHECK-NEXT: br label [[SCALAR_PH]]
199 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
200 ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY]] ]
201 ; CHECK-NEXT: br label [[LOOP:%.*]]
203 ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
204 ; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[PTR_NEXT:%.*]], [[LOOP]] ]
205 ; CHECK-NEXT: [[X0:%.*]] = load i32, ptr [[PTR]], align 4
206 ; CHECK-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
207 ; CHECK-NEXT: store i32 [[Y0]], ptr [[PTR]], align 4
208 ; CHECK-NEXT: [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 8
209 ; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1
210 ; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
211 ; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
213 ; CHECK-NEXT: ret void
218 %i = phi i64 [0, %entry], [%nexti, %loop]
219 %ptr = phi ptr [%p, %entry], [%ptr.next, %loop]
221 %x0 = load i32, ptr %ptr
223 store i32 %y0, ptr %ptr
225 %ptr.next = getelementptr inbounds i8, ptr %ptr, i64 8
226 %nexti = add i64 %i, 1
227 %done = icmp eq i64 %nexti, 1024
228 br i1 %done, label %exit, label %loop
234 define void @single_stride_int_scaled(ptr %p, i64 %stride) {
235 ; NOSTRIDED-LABEL: @single_stride_int_scaled(
236 ; NOSTRIDED-NEXT: entry:
237 ; NOSTRIDED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
238 ; NOSTRIDED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
239 ; NOSTRIDED-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP1]])
240 ; NOSTRIDED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP2]]
241 ; NOSTRIDED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
242 ; NOSTRIDED: vector.scevcheck:
243 ; NOSTRIDED-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[STRIDE:%.*]], 1
244 ; NOSTRIDED-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
245 ; NOSTRIDED: vector.ph:
246 ; NOSTRIDED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
247 ; NOSTRIDED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4
248 ; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
249 ; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
250 ; NOSTRIDED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
251 ; NOSTRIDED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
252 ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
253 ; NOSTRIDED: vector.body:
254 ; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
255 ; NOSTRIDED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0
256 ; NOSTRIDED-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]]
257 ; NOSTRIDED-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0
258 ; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP9]], align 4
259 ; NOSTRIDED-NEXT: [[TMP10:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1)
260 ; NOSTRIDED-NEXT: store <vscale x 4 x i32> [[TMP10]], ptr [[TMP9]], align 4
261 ; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
262 ; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
263 ; NOSTRIDED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
264 ; NOSTRIDED: middle.block:
265 ; NOSTRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
266 ; NOSTRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
267 ; NOSTRIDED: scalar.ph:
268 ; NOSTRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
269 ; NOSTRIDED-NEXT: br label [[LOOP:%.*]]
271 ; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
272 ; NOSTRIDED-NEXT: [[OFFSET:%.*]] = mul nuw nsw i64 [[I]], [[STRIDE]]
273 ; NOSTRIDED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
274 ; NOSTRIDED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
275 ; NOSTRIDED-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
276 ; NOSTRIDED-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4
277 ; NOSTRIDED-NEXT: [[NEXTI]] = add i64 [[I]], 1
278 ; NOSTRIDED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
279 ; NOSTRIDED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
281 ; NOSTRIDED-NEXT: ret void
283 ; STRIDED-LABEL: @single_stride_int_scaled(
284 ; STRIDED-NEXT: entry:
285 ; STRIDED-NEXT: br label [[LOOP:%.*]]
287 ; STRIDED-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
288 ; STRIDED-NEXT: [[OFFSET:%.*]] = mul nuw nsw i64 [[I]], [[STRIDE:%.*]]
289 ; STRIDED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET]]
290 ; STRIDED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
291 ; STRIDED-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
292 ; STRIDED-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4
293 ; STRIDED-NEXT: [[NEXTI]] = add i64 [[I]], 1
294 ; STRIDED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
295 ; STRIDED-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]]
297 ; STRIDED-NEXT: ret void
302 %i = phi i64 [0, %entry], [%nexti, %loop]
304 %offset = mul nsw nuw i64 %i, %stride
305 %q0 = getelementptr i32, ptr %p, i64 %offset
306 %x0 = load i32, ptr %q0
308 store i32 %y0, ptr %q0
310 %nexti = add i64 %i, 1
311 %done = icmp eq i64 %nexti, 1024
312 br i1 %done, label %exit, label %loop
317 define void @single_stride_int_iv(ptr %p, i64 %stride) {
318 ; NOSTRIDED-LABEL: @single_stride_int_iv(
319 ; NOSTRIDED-NEXT: entry:
320 ; NOSTRIDED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
321 ; NOSTRIDED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
322 ; NOSTRIDED-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP1]])
323 ; NOSTRIDED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP2]]
324 ; NOSTRIDED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
325 ; NOSTRIDED: vector.scevcheck:
326 ; NOSTRIDED-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[STRIDE:%.*]], 1
327 ; NOSTRIDED-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
328 ; NOSTRIDED: vector.ph:
329 ; NOSTRIDED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
330 ; NOSTRIDED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4
331 ; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
332 ; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
333 ; NOSTRIDED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[STRIDE]]
334 ; NOSTRIDED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
335 ; NOSTRIDED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
336 ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
337 ; NOSTRIDED: vector.body:
338 ; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
339 ; NOSTRIDED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0
340 ; NOSTRIDED-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]]
341 ; NOSTRIDED-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0
342 ; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP9]], align 4
343 ; NOSTRIDED-NEXT: [[TMP10:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1)
344 ; NOSTRIDED-NEXT: store <vscale x 4 x i32> [[TMP10]], ptr [[TMP9]], align 4
345 ; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
346 ; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
347 ; NOSTRIDED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
348 ; NOSTRIDED: middle.block:
349 ; NOSTRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
350 ; NOSTRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
351 ; NOSTRIDED: scalar.ph:
352 ; NOSTRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
353 ; NOSTRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
354 ; NOSTRIDED-NEXT: br label [[LOOP:%.*]]
356 ; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
357 ; NOSTRIDED-NEXT: [[OFFSET:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[OFFSET_NEXT:%.*]], [[LOOP]] ]
358 ; NOSTRIDED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
359 ; NOSTRIDED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
360 ; NOSTRIDED-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
361 ; NOSTRIDED-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4
362 ; NOSTRIDED-NEXT: [[OFFSET_NEXT]] = add nuw nsw i64 [[OFFSET]], [[STRIDE]]
363 ; NOSTRIDED-NEXT: [[NEXTI]] = add i64 [[I]], 1
364 ; NOSTRIDED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
365 ; NOSTRIDED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
367 ; NOSTRIDED-NEXT: ret void
369 ; STRIDED-LABEL: @single_stride_int_iv(
370 ; STRIDED-NEXT: entry:
371 ; STRIDED-NEXT: br label [[LOOP:%.*]]
373 ; STRIDED-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
374 ; STRIDED-NEXT: [[OFFSET:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[OFFSET_NEXT:%.*]], [[LOOP]] ]
375 ; STRIDED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET]]
376 ; STRIDED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
377 ; STRIDED-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
378 ; STRIDED-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4
379 ; STRIDED-NEXT: [[OFFSET_NEXT]] = add nuw nsw i64 [[OFFSET]], [[STRIDE:%.*]]
380 ; STRIDED-NEXT: [[NEXTI]] = add i64 [[I]], 1
381 ; STRIDED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
382 ; STRIDED-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]]
384 ; STRIDED-NEXT: ret void
389 %i = phi i64 [0, %entry], [%nexti, %loop]
390 %offset = phi i64 [0, %entry], [%offset.next, %loop]
392 %q0 = getelementptr i32, ptr %p, i64 %offset
393 %x0 = load i32, ptr %q0
395 store i32 %y0, ptr %q0
397 %offset.next = add nsw nuw i64 %offset, %stride
398 %nexti = add i64 %i, 1
399 %done = icmp eq i64 %nexti, 1024
400 br i1 %done, label %exit, label %loop
406 define void @single_stride_ptr_iv(ptr %p, i64 %stride) {
407 ; CHECK-LABEL: @single_stride_ptr_iv(
409 ; CHECK-NEXT: br label [[LOOP:%.*]]
411 ; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
412 ; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[P:%.*]], [[ENTRY]] ], [ [[PTR_NEXT:%.*]], [[LOOP]] ]
413 ; CHECK-NEXT: [[X0:%.*]] = load i32, ptr [[PTR]], align 4
414 ; CHECK-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
415 ; CHECK-NEXT: store i32 [[Y0]], ptr [[PTR]], align 4
416 ; CHECK-NEXT: [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[STRIDE:%.*]]
417 ; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1
418 ; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
419 ; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]]
421 ; CHECK-NEXT: ret void
426 %i = phi i64 [0, %entry], [%nexti, %loop]
427 %ptr = phi ptr [%p, %entry], [%ptr.next, %loop]
429 %x0 = load i32, ptr %ptr
431 store i32 %y0, ptr %ptr
433 %ptr.next = getelementptr inbounds i8, ptr %ptr, i64 %stride
434 %nexti = add i64 %i, 1
435 %done = icmp eq i64 %nexti, 1024
436 br i1 %done, label %exit, label %loop
441 define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
442 ; NOSTRIDED-LABEL: @double_stride_int_scaled(
443 ; NOSTRIDED-NEXT: entry:
444 ; NOSTRIDED-NEXT: [[P3:%.*]] = ptrtoint ptr [[P:%.*]] to i64
445 ; NOSTRIDED-NEXT: [[P21:%.*]] = ptrtoint ptr [[P2:%.*]] to i64
446 ; NOSTRIDED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
447 ; NOSTRIDED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
448 ; NOSTRIDED-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]])
449 ; NOSTRIDED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP2]]
450 ; NOSTRIDED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
451 ; NOSTRIDED: vector.scevcheck:
452 ; NOSTRIDED-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[STRIDE:%.*]], 1
453 ; NOSTRIDED-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
454 ; NOSTRIDED: vector.memcheck:
455 ; NOSTRIDED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
456 ; NOSTRIDED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4
457 ; NOSTRIDED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
458 ; NOSTRIDED-NEXT: [[TMP6:%.*]] = sub i64 [[P21]], [[P3]]
459 ; NOSTRIDED-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]]
460 ; NOSTRIDED-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
461 ; NOSTRIDED: vector.ph:
462 ; NOSTRIDED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
463 ; NOSTRIDED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
464 ; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP8]]
465 ; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
466 ; NOSTRIDED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
467 ; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
468 ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
469 ; NOSTRIDED: vector.body:
470 ; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
471 ; NOSTRIDED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0
472 ; NOSTRIDED-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP11]]
473 ; NOSTRIDED-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0
474 ; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP13]], align 4
475 ; NOSTRIDED-NEXT: [[TMP14:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1)
476 ; NOSTRIDED-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[P2]], i64 [[TMP11]]
477 ; NOSTRIDED-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP15]], i32 0
478 ; NOSTRIDED-NEXT: store <vscale x 4 x i32> [[TMP14]], ptr [[TMP16]], align 4
479 ; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
480 ; NOSTRIDED-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
481 ; NOSTRIDED-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
482 ; NOSTRIDED: middle.block:
483 ; NOSTRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
484 ; NOSTRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
485 ; NOSTRIDED: scalar.ph:
486 ; NOSTRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
487 ; NOSTRIDED-NEXT: br label [[LOOP:%.*]]
489 ; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
490 ; NOSTRIDED-NEXT: [[OFFSET:%.*]] = mul nuw nsw i64 [[I]], [[STRIDE]]
491 ; NOSTRIDED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
492 ; NOSTRIDED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
493 ; NOSTRIDED-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
494 ; NOSTRIDED-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P2]], i64 [[OFFSET]]
495 ; NOSTRIDED-NEXT: store i32 [[Y0]], ptr [[Q1]], align 4
496 ; NOSTRIDED-NEXT: [[NEXTI]] = add i64 [[I]], 1
497 ; NOSTRIDED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
498 ; NOSTRIDED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
500 ; NOSTRIDED-NEXT: ret void
502 ; STRIDED-LABEL: @double_stride_int_scaled(
503 ; STRIDED-NEXT: entry:
504 ; STRIDED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
505 ; STRIDED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
506 ; STRIDED-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 24, i64 [[TMP1]])
507 ; STRIDED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP2]]
508 ; STRIDED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
509 ; STRIDED: vector.memcheck:
510 ; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 [[STRIDE:%.*]], 4092
511 ; STRIDED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P2:%.*]], i64 [[TMP3]]
512 ; STRIDED-NEXT: [[TMP4:%.*]] = icmp ult ptr [[P2]], [[SCEVGEP]]
513 ; STRIDED-NEXT: [[UMIN:%.*]] = select i1 [[TMP4]], ptr [[P2]], ptr [[SCEVGEP]]
514 ; STRIDED-NEXT: [[TMP5:%.*]] = icmp ugt ptr [[P2]], [[SCEVGEP]]
515 ; STRIDED-NEXT: [[UMAX:%.*]] = select i1 [[TMP5]], ptr [[P2]], ptr [[SCEVGEP]]
516 ; STRIDED-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[UMAX]], i64 4
517 ; STRIDED-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP3]]
518 ; STRIDED-NEXT: [[TMP6:%.*]] = icmp ult ptr [[P]], [[SCEVGEP2]]
519 ; STRIDED-NEXT: [[UMIN3:%.*]] = select i1 [[TMP6]], ptr [[P]], ptr [[SCEVGEP2]]
520 ; STRIDED-NEXT: [[TMP7:%.*]] = icmp ugt ptr [[P]], [[SCEVGEP2]]
521 ; STRIDED-NEXT: [[UMAX4:%.*]] = select i1 [[TMP7]], ptr [[P]], ptr [[SCEVGEP2]]
522 ; STRIDED-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[UMAX4]], i64 4
523 ; STRIDED-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[UMIN]], [[SCEVGEP5]]
524 ; STRIDED-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[UMIN3]], [[SCEVGEP1]]
525 ; STRIDED-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
526 ; STRIDED-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
527 ; STRIDED: vector.ph:
528 ; STRIDED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
529 ; STRIDED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4
530 ; STRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP9]]
531 ; STRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
532 ; STRIDED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
533 ; STRIDED-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
534 ; STRIDED-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
535 ; STRIDED-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i64> [[TMP12]], zeroinitializer
536 ; STRIDED-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i64> [[TMP13]], splat (i64 1)
537 ; STRIDED-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP14]]
538 ; STRIDED-NEXT: [[TMP17:%.*]] = mul i64 1, [[TMP11]]
539 ; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP17]], i64 0
540 ; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
541 ; STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0
542 ; STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
543 ; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
544 ; STRIDED: vector.body:
545 ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
546 ; STRIDED-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
547 ; STRIDED-NEXT: [[TMP18:%.*]] = mul nuw nsw <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
548 ; STRIDED-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[P]], <vscale x 4 x i64> [[TMP18]]
549 ; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP19]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison), !alias.scope [[META8:![0-9]+]]
550 ; STRIDED-NEXT: [[TMP20:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 1)
551 ; STRIDED-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[P2]], <vscale x 4 x i64> [[TMP18]]
552 ; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP20]], <vscale x 4 x ptr> [[TMP21]], i32 4, <vscale x 4 x i1> splat (i1 true)), !alias.scope [[META11:![0-9]+]], !noalias [[META8]]
553 ; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
554 ; STRIDED-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
555 ; STRIDED-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
556 ; STRIDED-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
557 ; STRIDED: middle.block:
558 ; STRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
559 ; STRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
560 ; STRIDED: scalar.ph:
561 ; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
562 ; STRIDED-NEXT: br label [[LOOP:%.*]]
564 ; STRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
565 ; STRIDED-NEXT: [[OFFSET:%.*]] = mul nuw nsw i64 [[I]], [[STRIDE]]
566 ; STRIDED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
567 ; STRIDED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
568 ; STRIDED-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
569 ; STRIDED-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P2]], i64 [[OFFSET]]
570 ; STRIDED-NEXT: store i32 [[Y0]], ptr [[Q1]], align 4
571 ; STRIDED-NEXT: [[NEXTI]] = add i64 [[I]], 1
572 ; STRIDED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
573 ; STRIDED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP14:![0-9]+]]
575 ; STRIDED-NEXT: ret void
580 %i = phi i64 [0, %entry], [%nexti, %loop]
582 %offset = mul nsw nuw i64 %i, %stride
583 %q0 = getelementptr i32, ptr %p, i64 %offset
584 %x0 = load i32, ptr %q0
586 %q1 = getelementptr i32, ptr %p2, i64 %offset
587 store i32 %y0, ptr %q1
589 %nexti = add i64 %i, 1
590 %done = icmp eq i64 %nexti, 1024
591 br i1 %done, label %exit, label %loop
596 define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) {
597 ; NOSTRIDED-LABEL: @double_stride_int_iv(
598 ; NOSTRIDED-NEXT: entry:
599 ; NOSTRIDED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
600 ; NOSTRIDED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
601 ; NOSTRIDED-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP1]])
602 ; NOSTRIDED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP2]]
603 ; NOSTRIDED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
604 ; NOSTRIDED: vector.scevcheck:
605 ; NOSTRIDED-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[STRIDE:%.*]], 1
606 ; NOSTRIDED-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
607 ; NOSTRIDED: vector.ph:
608 ; NOSTRIDED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
609 ; NOSTRIDED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4
610 ; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
611 ; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
612 ; NOSTRIDED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[STRIDE]]
613 ; NOSTRIDED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
614 ; NOSTRIDED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
615 ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
616 ; NOSTRIDED: vector.body:
617 ; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
618 ; NOSTRIDED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0
619 ; NOSTRIDED-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]]
620 ; NOSTRIDED-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0
621 ; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP9]], align 4
622 ; NOSTRIDED-NEXT: [[TMP10:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1)
623 ; NOSTRIDED-NEXT: store <vscale x 4 x i32> [[TMP10]], ptr [[TMP9]], align 4
624 ; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
625 ; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
626 ; NOSTRIDED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
627 ; NOSTRIDED: middle.block:
628 ; NOSTRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
629 ; NOSTRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
630 ; NOSTRIDED: scalar.ph:
631 ; NOSTRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
632 ; NOSTRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
633 ; NOSTRIDED-NEXT: br label [[LOOP:%.*]]
635 ; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
636 ; NOSTRIDED-NEXT: [[OFFSET:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[OFFSET_NEXT:%.*]], [[LOOP]] ]
637 ; NOSTRIDED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
638 ; NOSTRIDED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
639 ; NOSTRIDED-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
640 ; NOSTRIDED-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
641 ; NOSTRIDED-NEXT: store i32 [[Y0]], ptr [[Q1]], align 4
642 ; NOSTRIDED-NEXT: [[OFFSET_NEXT]] = add nuw nsw i64 [[OFFSET]], [[STRIDE]]
643 ; NOSTRIDED-NEXT: [[NEXTI]] = add i64 [[I]], 1
644 ; NOSTRIDED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
645 ; NOSTRIDED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
647 ; NOSTRIDED-NEXT: ret void
649 ; STRIDED-LABEL: @double_stride_int_iv(
650 ; STRIDED-NEXT: entry:
651 ; STRIDED-NEXT: br label [[LOOP:%.*]]
653 ; STRIDED-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
654 ; STRIDED-NEXT: [[OFFSET:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[OFFSET_NEXT:%.*]], [[LOOP]] ]
655 ; STRIDED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET]]
656 ; STRIDED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
657 ; STRIDED-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
658 ; STRIDED-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
659 ; STRIDED-NEXT: store i32 [[Y0]], ptr [[Q1]], align 4
660 ; STRIDED-NEXT: [[OFFSET_NEXT]] = add nuw nsw i64 [[OFFSET]], [[STRIDE:%.*]]
661 ; STRIDED-NEXT: [[NEXTI]] = add i64 [[I]], 1
662 ; STRIDED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
663 ; STRIDED-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]]
665 ; STRIDED-NEXT: ret void
670 %i = phi i64 [0, %entry], [%nexti, %loop]
671 %offset = phi i64 [0, %entry], [%offset.next, %loop]
673 %q0 = getelementptr i32, ptr %p, i64 %offset
674 %x0 = load i32, ptr %q0
676 %q1 = getelementptr i32, ptr %p, i64 %offset
677 store i32 %y0, ptr %q1
679 %offset.next = add nsw nuw i64 %offset, %stride
680 %nexti = add i64 %i, 1
681 %done = icmp eq i64 %nexti, 1024
682 br i1 %done, label %exit, label %loop
688 define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) {
689 ; NOSTRIDED-LABEL: @double_stride_ptr_iv(
690 ; NOSTRIDED-NEXT: entry:
691 ; NOSTRIDED-NEXT: br label [[LOOP:%.*]]
693 ; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
694 ; NOSTRIDED-NEXT: [[PTR:%.*]] = phi ptr [ [[P:%.*]], [[ENTRY]] ], [ [[PTR_NEXT:%.*]], [[LOOP]] ]
695 ; NOSTRIDED-NEXT: [[PTR2:%.*]] = phi ptr [ [[P2:%.*]], [[ENTRY]] ], [ [[PTR2_NEXT:%.*]], [[LOOP]] ]
696 ; NOSTRIDED-NEXT: [[X0:%.*]] = load i32, ptr [[PTR]], align 4
697 ; NOSTRIDED-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
698 ; NOSTRIDED-NEXT: store i32 [[Y0]], ptr [[PTR2]], align 4
699 ; NOSTRIDED-NEXT: [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[STRIDE:%.*]]
700 ; NOSTRIDED-NEXT: [[PTR2_NEXT]] = getelementptr inbounds i8, ptr [[PTR2]], i64 [[STRIDE]]
701 ; NOSTRIDED-NEXT: [[NEXTI]] = add i64 [[I]], 1
702 ; NOSTRIDED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
703 ; NOSTRIDED-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]]
705 ; NOSTRIDED-NEXT: ret void
707 ; STRIDED-LABEL: @double_stride_ptr_iv(
708 ; STRIDED-NEXT: entry:
709 ; STRIDED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
710 ; STRIDED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
711 ; STRIDED-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]])
712 ; STRIDED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP2]]
713 ; STRIDED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
714 ; STRIDED: vector.memcheck:
715 ; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 [[STRIDE:%.*]], 1023
716 ; STRIDED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P2:%.*]], i64 [[TMP3]]
717 ; STRIDED-NEXT: [[TMP4:%.*]] = icmp ult ptr [[P2]], [[SCEVGEP]]
718 ; STRIDED-NEXT: [[UMIN:%.*]] = select i1 [[TMP4]], ptr [[P2]], ptr [[SCEVGEP]]
719 ; STRIDED-NEXT: [[TMP5:%.*]] = icmp ugt ptr [[P2]], [[SCEVGEP]]
720 ; STRIDED-NEXT: [[UMAX:%.*]] = select i1 [[TMP5]], ptr [[P2]], ptr [[SCEVGEP]]
721 ; STRIDED-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[UMAX]], i64 4
722 ; STRIDED-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP3]]
723 ; STRIDED-NEXT: [[TMP6:%.*]] = icmp ult ptr [[P]], [[SCEVGEP2]]
724 ; STRIDED-NEXT: [[UMIN3:%.*]] = select i1 [[TMP6]], ptr [[P]], ptr [[SCEVGEP2]]
725 ; STRIDED-NEXT: [[TMP7:%.*]] = icmp ugt ptr [[P]], [[SCEVGEP2]]
726 ; STRIDED-NEXT: [[UMAX4:%.*]] = select i1 [[TMP7]], ptr [[P]], ptr [[SCEVGEP2]]
727 ; STRIDED-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[UMAX4]], i64 4
728 ; STRIDED-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[UMIN]], [[SCEVGEP5]]
729 ; STRIDED-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[UMIN3]], [[SCEVGEP1]]
730 ; STRIDED-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
731 ; STRIDED-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
732 ; STRIDED: vector.ph:
733 ; STRIDED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
734 ; STRIDED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4
735 ; STRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP9]]
736 ; STRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
737 ; STRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[N_VEC]], [[STRIDE]]
738 ; STRIDED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP10]]
739 ; STRIDED-NEXT: [[TMP11:%.*]] = mul i64 [[N_VEC]], [[STRIDE]]
740 ; STRIDED-NEXT: [[IND_END7:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP11]]
741 ; STRIDED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
742 ; STRIDED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4
743 ; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
744 ; STRIDED: vector.body:
745 ; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
746 ; STRIDED-NEXT: [[POINTER_PHI11:%.*]] = phi ptr [ [[P2]], [[VECTOR_PH]] ], [ [[PTR_IND12:%.*]], [[VECTOR_BODY]] ]
747 ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
748 ; STRIDED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
749 ; STRIDED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 4
750 ; STRIDED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 1
751 ; STRIDED-NEXT: [[TMP17:%.*]] = mul i64 [[STRIDE]], [[TMP16]]
752 ; STRIDED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP15]], 0
753 ; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP18]], i64 0
754 ; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
755 ; STRIDED-NEXT: [[TMP19:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
756 ; STRIDED-NEXT: [[TMP20:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT]], [[TMP19]]
757 ; STRIDED-NEXT: [[DOTSPLATINSERT9:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0
758 ; STRIDED-NEXT: [[DOTSPLAT10:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT9]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
759 ; STRIDED-NEXT: [[TMP21:%.*]] = mul <vscale x 4 x i64> [[TMP20]], [[DOTSPLAT10]]
760 ; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 4 x i64> [[TMP21]]
761 ; STRIDED-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
762 ; STRIDED-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 4
763 ; STRIDED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 1
764 ; STRIDED-NEXT: [[TMP25:%.*]] = mul i64 [[STRIDE]], [[TMP24]]
765 ; STRIDED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP23]], 0
766 ; STRIDED-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP26]], i64 0
767 ; STRIDED-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT13]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
768 ; STRIDED-NEXT: [[TMP27:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
769 ; STRIDED-NEXT: [[TMP28:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT14]], [[TMP27]]
770 ; STRIDED-NEXT: [[TMP29:%.*]] = mul <vscale x 4 x i64> [[TMP28]], [[DOTSPLAT10]]
771 ; STRIDED-NEXT: [[VECTOR_GEP17:%.*]] = getelementptr i8, ptr [[POINTER_PHI11]], <vscale x 4 x i64> [[TMP29]]
772 ; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[VECTOR_GEP]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison), !alias.scope [[META15:![0-9]+]]
773 ; STRIDED-NEXT: [[TMP30:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 1)
774 ; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP30]], <vscale x 4 x ptr> [[VECTOR_GEP17]], i32 4, <vscale x 4 x i1> splat (i1 true)), !alias.scope [[META18:![0-9]+]], !noalias [[META15]]
775 ; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]]
776 ; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP17]]
777 ; STRIDED-NEXT: [[PTR_IND12]] = getelementptr i8, ptr [[POINTER_PHI11]], i64 [[TMP25]]
778 ; STRIDED-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
779 ; STRIDED-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
780 ; STRIDED: middle.block:
781 ; STRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
782 ; STRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
783 ; STRIDED: scalar.ph:
784 ; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
785 ; STRIDED-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY]] ], [ [[P]], [[VECTOR_MEMCHECK]] ]
786 ; STRIDED-NEXT: [[BC_RESUME_VAL8:%.*]] = phi ptr [ [[IND_END7]], [[MIDDLE_BLOCK]] ], [ [[P2]], [[ENTRY]] ], [ [[P2]], [[VECTOR_MEMCHECK]] ]
787 ; STRIDED-NEXT: br label [[LOOP:%.*]]
789 ; STRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
790 ; STRIDED-NEXT: [[PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ], [ [[PTR_NEXT:%.*]], [[LOOP]] ]
791 ; STRIDED-NEXT: [[PTR2:%.*]] = phi ptr [ [[BC_RESUME_VAL8]], [[SCALAR_PH]] ], [ [[PTR2_NEXT:%.*]], [[LOOP]] ]
792 ; STRIDED-NEXT: [[X0:%.*]] = load i32, ptr [[PTR]], align 4
793 ; STRIDED-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
794 ; STRIDED-NEXT: store i32 [[Y0]], ptr [[PTR2]], align 4
795 ; STRIDED-NEXT: [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[STRIDE]]
796 ; STRIDED-NEXT: [[PTR2_NEXT]] = getelementptr inbounds i8, ptr [[PTR2]], i64 [[STRIDE]]
797 ; STRIDED-NEXT: [[NEXTI]] = add i64 [[I]], 1
798 ; STRIDED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
799 ; STRIDED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]]
801 ; STRIDED-NEXT: ret void
806 %i = phi i64 [0, %entry], [%nexti, %loop]
807 %ptr = phi ptr [%p, %entry], [%ptr.next, %loop]
808 %ptr2 = phi ptr [%p2, %entry], [%ptr2.next, %loop]
810 %x0 = load i32, ptr %ptr
812 store i32 %y0, ptr %ptr2
814 %ptr.next = getelementptr inbounds i8, ptr %ptr, i64 %stride
815 %ptr2.next = getelementptr inbounds i8, ptr %ptr2, i64 %stride
816 %nexti = add i64 %i, 1
817 %done = icmp eq i64 %nexti, 1024
818 br i1 %done, label %exit, label %loop