1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt %s -S -riscv-gather-scatter-lowering -mtriple=riscv64 -mattr=+m,+v | FileCheck %s --check-prefixes=CHECK
4 %struct.foo = type { i32, i32, i32, i32 }
6 declare <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
8 define <vscale x 1 x i64> @gather(ptr %a, i32 %len) {
9 ; CHECK-LABEL: @gather(
10 ; CHECK-NEXT: vector.ph:
11 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
12 ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
13 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
15 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
16 ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
17 ; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
18 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]], i32 3
19 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
20 ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]])
21 ; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> undef, i32 [[TMP2]])
22 ; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
23 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP0]]
24 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], [[TMP0]]
25 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[WIDE_TRIP_COUNT]]
26 ; CHECK-NEXT: br i1 [[TMP4]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
27 ; CHECK: for.cond.cleanup:
28 ; CHECK-NEXT: ret <vscale x 1 x i64> [[ACCUM_NEXT]]
31 %wide.trip.count = zext i32 %len to i64
32 %0 = tail call i64 @llvm.vscale.i64()
33 %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
34 %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %0, i64 0
35 %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
38 vector.body: ; preds = %vector.body, %vector.ph
39 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
40 %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
41 %accum = phi <vscale x 1 x i64> [ zeroinitializer, %vector.ph ], [ %accum.next, %vector.body ]
42 %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3
43 %gather = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> %2, i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> undef)
44 %accum.next = add <vscale x 1 x i64> %accum, %gather
45 %index.next = add nuw i64 %index, %0
46 %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat
47 %3 = icmp ne i64 %index.next, %wide.trip.count
48 br i1 %3, label %for.cond.cleanup, label %vector.body
50 for.cond.cleanup: ; preds = %vector.body
51 ret <vscale x 1 x i64> %accum.next
54 define <vscale x 1 x i64> @gather_disjoint_or(ptr %a, i64 %len) {
55 ; CHECK-LABEL: @gather_disjoint_or(
56 ; CHECK-NEXT: vector.ph:
57 ; CHECK-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
58 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
60 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
61 ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 1, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
62 ; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
63 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]]
64 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
65 ; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP0]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]])
66 ; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> poison, i32 [[TMP1]])
67 ; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
68 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[VSCALE]]
69 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 2
70 ; CHECK-NEXT: [[EXIT:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[LEN:%.*]]
71 ; CHECK-NEXT: br i1 [[EXIT]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
72 ; CHECK: for.cond.cleanup:
73 ; CHECK-NEXT: ret <vscale x 1 x i64> [[ACCUM_NEXT]]
76 %vscale = call i64 @llvm.vscale.i64()
77 %step = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
78 %step.mul2 = shl <vscale x 1 x i64> %step, splat (i64 1)
81 vector.body: ; preds = %vector.body, %vector.ph
82 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
83 %vec.ind = phi <vscale x 1 x i64> [ %step.mul2, %vector.ph ], [ %vec.ind.next, %vector.body ]
85 %accum = phi <vscale x 1 x i64> [ zeroinitializer, %vector.ph ], [ %accum.next, %vector.body ]
87 %vec.ind.or = or disjoint <vscale x 1 x i64> %vec.ind, splat (i64 1)
89 %gep = getelementptr i64, ptr %a, <vscale x 1 x i64> %vec.ind.or
90 %gather = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(
91 <vscale x 1 x ptr> %gep,
93 <vscale x 1 x i1> splat (i1 true),
94 <vscale x 1 x i64> poison
97 %accum.next = add <vscale x 1 x i64> %accum, %gather
98 %index.next = add nuw i64 %index, %vscale
99 %vec.ind.next = add <vscale x 1 x i64> %vec.ind, splat (i64 2)
101 %exit = icmp ne i64 %index.next, %len
102 br i1 %exit, label %for.cond.cleanup, label %vector.body
104 for.cond.cleanup: ; preds = %vector.body
105 ret <vscale x 1 x i64> %accum.next
108 define <vscale x 1 x i64> @gather_non_invariant_step(ptr %a, ptr %b, i32 %len) {
109 ; CHECK-LABEL: @gather_non_invariant_step(
110 ; CHECK-NEXT: vector.ph:
111 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
112 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
113 ; CHECK: vector.body:
114 ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
115 ; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
116 ; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
117 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
118 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
119 ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP0]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]])
120 ; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> undef, i32 [[TMP1]])
121 ; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
122 ; CHECK-NEXT: [[B:%.*]] = getelementptr i64, ptr [[B1:%.*]], i64 [[VEC_IND_SCALAR]]
123 ; CHECK-NEXT: [[STEP:%.*]] = load i64, ptr [[B]], align 8
124 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[STEP]]
125 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[STEP]]
126 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
127 ; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
128 ; CHECK: for.cond.cleanup:
129 ; CHECK-NEXT: ret <vscale x 1 x i64> [[ACCUM_NEXT]]
132 %wide.trip.count = zext i32 %len to i64
133 %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
134 br label %vector.body
136 vector.body: ; preds = %vector.body, %vector.ph
137 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
138 %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
139 %accum = phi <vscale x 1 x i64> [ zeroinitializer, %vector.ph ], [ %accum.next, %vector.body ]
140 %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3
141 %gather = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> %2, i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> undef)
142 %accum.next = add <vscale x 1 x i64> %accum, %gather
144 %b.gep = getelementptr i64, ptr %b, i64 %index
145 %step = load i64, ptr %b.gep
146 %index.next = add nuw i64 %index, %step
147 %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %step, i64 0
148 %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
149 %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat
150 %3 = icmp ne i64 %index.next, %wide.trip.count
151 br i1 %3, label %for.cond.cleanup, label %vector.body
153 for.cond.cleanup: ; preds = %vector.body
154 ret <vscale x 1 x i64> %accum.next
157 define <vscale x 1 x i64> @gather_non_invariant_step_shl(ptr %a, ptr %b, i32 %len) {
158 ; CHECK-LABEL: @gather_non_invariant_step_shl(
159 ; CHECK-NEXT: vector.ph:
160 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
161 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
162 ; CHECK: vector.body:
163 ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
164 ; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 168, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
165 ; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
166 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
167 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
168 ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP0]], i64 64, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]])
169 ; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> undef, i32 [[TMP1]])
170 ; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
171 ; CHECK-NEXT: [[B:%.*]] = getelementptr i64, ptr [[B1:%.*]], i64 [[VEC_IND_SCALAR]]
172 ; CHECK-NEXT: [[STEP:%.*]] = load i64, ptr [[B]], align 8
173 ; CHECK-NEXT: [[STEP1:%.*]] = shl i64 [[STEP]], 2
174 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[STEP]]
175 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[STEP1]]
176 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
177 ; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
178 ; CHECK: for.cond.cleanup:
179 ; CHECK-NEXT: ret <vscale x 1 x i64> [[ACCUM_NEXT]]
182 %wide.trip.count = zext i32 %len to i64
183 %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
184 br label %vector.body
186 vector.body: ; preds = %vector.body, %vector.ph
187 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
188 %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
189 %accum = phi <vscale x 1 x i64> [ zeroinitializer, %vector.ph ], [ %accum.next, %vector.body ]
191 %vec.ind.add = add <vscale x 1 x i64> %vec.ind, splat (i64 42)
192 %vec.ind.shl = shl <vscale x 1 x i64> %vec.ind.add, splat (i64 2)
194 %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind.shl, i32 3
195 %gather = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> %2, i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> undef)
196 %accum.next = add <vscale x 1 x i64> %accum, %gather
198 %b.gep = getelementptr i64, ptr %b, i64 %index
199 %step = load i64, ptr %b.gep
200 %index.next = add nuw i64 %index, %step
201 %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %step, i64 0
202 %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
203 %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat
204 %3 = icmp ne i64 %index.next, %wide.trip.count
205 br i1 %3, label %for.cond.cleanup, label %vector.body
207 for.cond.cleanup: ; preds = %vector.body
208 ret <vscale x 1 x i64> %accum.next
211 ; Check that the operand of the binary op (%scale.splat in shl) always dominates
212 ; the existing step value when we're adjusting it.
213 define <vscale x 1 x i64> @gather_splat_op_after_step(ptr %a, ptr %b, i32 %len) {
214 ; CHECK-LABEL: @gather_splat_op_after_step(
215 ; CHECK-NEXT: vector.ph:
216 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
217 ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
218 ; CHECK-NEXT: [[SCALE:%.*]] = load i64, ptr [[B:%.*]], align 8
219 ; CHECK-NEXT: [[STRIDE:%.*]] = shl i64 1, [[SCALE]]
220 ; CHECK-NEXT: [[STEP:%.*]] = shl i64 [[TMP0]], [[SCALE]]
221 ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STRIDE]], 16
222 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
223 ; CHECK: vector.body:
224 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
225 ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
226 ; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
227 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]], i32 3
228 ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
229 ; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP2]], i64 [[TMP1]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
230 ; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP4]], <vscale x 1 x i64> undef, i32 [[TMP3]])
231 ; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
232 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP0]]
233 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], [[STEP]]
234 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[WIDE_TRIP_COUNT]]
235 ; CHECK-NEXT: br i1 [[TMP5]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
236 ; CHECK: for.cond.cleanup:
237 ; CHECK-NEXT: ret <vscale x 1 x i64> [[ACCUM_NEXT]]
240 %wide.trip.count = zext i32 %len to i64
241 %0 = tail call i64 @llvm.vscale.i64()
242 %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
243 %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %0, i64 0
244 %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
246 %scale = load i64, ptr %b
247 %scale.head = insertelement <vscale x 1 x i64> poison, i64 %scale, i64 0
248 %scale.splat = shufflevector <vscale x 1 x i64> %scale.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
249 br label %vector.body
251 vector.body: ; preds = %vector.body, %vector.ph
252 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
253 %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
254 %accum = phi <vscale x 1 x i64> [ zeroinitializer, %vector.ph ], [ %accum.next, %vector.body ]
255 %vec.ind.shl = shl <vscale x 1 x i64> %vec.ind, %scale.splat
256 %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind.shl, i32 3
257 %gather = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> %2, i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> undef)
258 %accum.next = add <vscale x 1 x i64> %accum, %gather
259 %index.next = add nuw i64 %index, %0
260 %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat
261 %3 = icmp ne i64 %index.next, %wide.trip.count
262 br i1 %3, label %for.cond.cleanup, label %vector.body
264 for.cond.cleanup: ; preds = %vector.body
265 ret <vscale x 1 x i64> %accum.next
268 define void @scatter(ptr %a, i32 %len) {
269 ; CHECK-LABEL: @scatter(
270 ; CHECK-NEXT: vector.ph:
271 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
272 ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
273 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
274 ; CHECK: vector.body:
275 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
276 ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
277 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]], i32 3
278 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
279 ; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP1]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]])
280 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP0]]
281 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], [[TMP0]]
282 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[WIDE_TRIP_COUNT]]
283 ; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
284 ; CHECK: for.cond.cleanup:
285 ; CHECK-NEXT: ret void
288 %wide.trip.count = zext i32 %len to i64
289 %0 = tail call i64 @llvm.vscale.i64()
290 %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
291 %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %0, i64 0
292 %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
293 br label %vector.body
295 vector.body: ; preds = %vector.body, %vector.ph
296 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
297 %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
298 %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3
299 tail call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x ptr> %2, i32 8, <vscale x 1 x i1> splat (i1 true))
300 %index.next = add nuw i64 %index, %0
301 %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat
302 %3 = icmp ne i64 %index.next, %wide.trip.count
303 br i1 %3, label %for.cond.cleanup, label %vector.body
305 for.cond.cleanup: ; preds = %vector.body
309 define void @scatter_non_invariant_step(ptr %a, ptr %b, i32 %len) {
310 ; CHECK-LABEL: @scatter_non_invariant_step(
311 ; CHECK-NEXT: vector.ph:
312 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
313 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
314 ; CHECK: vector.body:
315 ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
316 ; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
317 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
318 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
319 ; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP0]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]])
320 ; CHECK-NEXT: [[B:%.*]] = getelementptr i64, ptr [[B1:%.*]], i64 [[VEC_IND_SCALAR]]
321 ; CHECK-NEXT: [[STEP:%.*]] = load i64, ptr [[B]], align 8
322 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[STEP]]
323 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[STEP]]
324 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
325 ; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
326 ; CHECK: for.cond.cleanup:
327 ; CHECK-NEXT: ret void
330 %wide.trip.count = zext i32 %len to i64
331 %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
332 br label %vector.body
334 vector.body: ; preds = %vector.body, %vector.ph
335 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
336 %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
337 %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3
338 tail call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x ptr> %2, i32 8, <vscale x 1 x i1> splat (i1 true))
340 %b.gep = getelementptr i64, ptr %b, i64 %index
341 %step = load i64, ptr %b.gep
342 %index.next = add nuw i64 %index, %step
343 %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %step, i64 0
344 %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
345 %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat
346 %3 = icmp ne i64 %index.next, %wide.trip.count
347 br i1 %3, label %for.cond.cleanup, label %vector.body
349 for.cond.cleanup: ; preds = %vector.body
353 define void @scatter_non_invariant_step_add_shl(ptr %a, ptr %b, i32 %len) {
354 ; CHECK-LABEL: @scatter_non_invariant_step_add_shl(
355 ; CHECK-NEXT: vector.ph:
356 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
357 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
358 ; CHECK: vector.body:
359 ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
360 ; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 168, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
361 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
362 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
363 ; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP0]], i64 64, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]])
364 ; CHECK-NEXT: [[B:%.*]] = getelementptr i64, ptr [[B1:%.*]], i64 [[VEC_IND_SCALAR]]
365 ; CHECK-NEXT: [[STEP:%.*]] = load i64, ptr [[B]], align 8
366 ; CHECK-NEXT: [[STEP1:%.*]] = shl i64 [[STEP]], 2
367 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[STEP]]
368 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[STEP1]]
369 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
370 ; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
371 ; CHECK: for.cond.cleanup:
372 ; CHECK-NEXT: ret void
375 %wide.trip.count = zext i32 %len to i64
376 %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
377 br label %vector.body
379 vector.body: ; preds = %vector.body, %vector.ph
380 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
381 %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
383 %vec.ind.add = add <vscale x 1 x i64> %vec.ind, splat (i64 42)
384 %vec.ind.shl = shl <vscale x 1 x i64> %vec.ind.add, splat (i64 2)
386 %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind.shl, i32 3
387 tail call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x ptr> %2, i32 8, <vscale x 1 x i1> splat (i1 true))
389 %b.gep = getelementptr i64, ptr %b, i64 %index
390 %step = load i64, ptr %b.gep
391 %index.next = add nuw i64 %index, %step
392 %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %step, i64 0
393 %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
394 %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat
395 %3 = icmp ne i64 %index.next, %wide.trip.count
396 br i1 %3, label %for.cond.cleanup, label %vector.body
398 for.cond.cleanup: ; preds = %vector.body
402 define <vscale x 1 x i64> @gather_loopless(ptr %p, i64 %stride) {
403 ; CHECK-LABEL: @gather_loopless(
404 ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STRIDE:%.*]], 4
405 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
406 ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[P:%.*]], i64 [[TMP1]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]])
407 ; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> poison, i32 [[TMP2]])
408 ; CHECK-NEXT: ret <vscale x 1 x i64> [[X]]
410 %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
411 %splat.insert = insertelement <vscale x 1 x i64> poison, i64 %stride, i64 0
412 %splat = shufflevector <vscale x 1 x i64> %splat.insert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
413 %offsets = mul <vscale x 1 x i64> %step, %splat
414 %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> %offsets
415 %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(
416 <vscale x 1 x ptr> %ptrs,
418 <vscale x 1 x i1> splat (i1 1),
419 <vscale x 1 x i64> poison
421 ret <vscale x 1 x i64> %x
424 define <vscale x 1 x i64> @straightline_offset_add(ptr %p, i64 %offset) {
425 ; CHECK-LABEL: @straightline_offset_add(
426 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET:%.*]]
427 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
428 ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 4, <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]])
429 ; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> poison, i32 [[TMP2]])
430 ; CHECK-NEXT: ret <vscale x 1 x i64> [[X]]
432 %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
433 %splat.insert = insertelement <vscale x 1 x i64> poison, i64 %offset, i64 0
434 %splat = shufflevector <vscale x 1 x i64> %splat.insert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
435 %offsetv = add <vscale x 1 x i64> %step, %splat
436 %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> %offsetv
437 %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(
438 <vscale x 1 x ptr> %ptrs,
440 <vscale x 1 x i1> splat (i1 1),
441 <vscale x 1 x i64> poison
443 ret <vscale x 1 x i64> %x
446 define <vscale x 1 x i64> @straightline_offset_disjoint_or(ptr %p, i64 %offset) {
447 ; CHECK-LABEL: @straightline_offset_disjoint_or(
448 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 1
449 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
450 ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 8, <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]])
451 ; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> poison, i32 [[TMP2]])
452 ; CHECK-NEXT: ret <vscale x 1 x i64> [[X]]
454 %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
455 %step.shl = shl <vscale x 1 x i64> %step, splat (i64 1)
456 %offsetv = or disjoint <vscale x 1 x i64> %step.shl, splat (i64 1)
457 %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> %offsetv
458 %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(
459 <vscale x 1 x ptr> %ptrs,
461 <vscale x 1 x i1> splat (i1 true),
462 <vscale x 1 x i64> poison
464 ret <vscale x 1 x i64> %x
467 define <vscale x 1 x i64> @straightline_offset_shl(ptr %p) {
468 ; CHECK-LABEL: @straightline_offset_shl(
469 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
470 ; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[P:%.*]], i64 32, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]])
471 ; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> poison, i32 [[TMP1]])
472 ; CHECK-NEXT: ret <vscale x 1 x i64> [[X]]
474 %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
475 %offset = shl <vscale x 1 x i64> %step, splat (i64 3)
476 %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> %offset
477 %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(
478 <vscale x 1 x ptr> %ptrs,
480 <vscale x 1 x i1> splat (i1 1),
481 <vscale x 1 x i64> poison
483 ret <vscale x 1 x i64> %x
486 define <vscale x 1 x i64> @neg_shl_is_not_commutative(ptr %p) {
487 ; CHECK-LABEL: @neg_shl_is_not_commutative(
488 ; CHECK-NEXT: [[STEP:%.*]] = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
489 ; CHECK-NEXT: [[SPLAT_INSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 3, i64 0
490 ; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[SPLAT_INSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
491 ; CHECK-NEXT: [[OFFSET:%.*]] = shl <vscale x 1 x i64> [[SPLAT]], [[STEP]]
492 ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr i32, ptr [[P:%.*]], <vscale x 1 x i64> [[OFFSET]]
493 ; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[PTRS]], i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> poison)
494 ; CHECK-NEXT: ret <vscale x 1 x i64> [[X]]
496 %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
497 %splat.insert = insertelement <vscale x 1 x i64> poison, i64 3, i64 0
498 %splat = shufflevector <vscale x 1 x i64> %splat.insert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
499 %offset = shl <vscale x 1 x i64> %splat, %step
500 %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> %offset
501 %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(
502 <vscale x 1 x ptr> %ptrs,
504 <vscale x 1 x i1> splat (i1 1),
505 <vscale x 1 x i64> poison
507 ret <vscale x 1 x i64> %x
510 define <vscale x 1 x i64> @straightline_offset_shl_nonc(ptr %p, i64 %shift) {
511 ; CHECK-LABEL: @straightline_offset_shl_nonc(
512 ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 1, [[SHIFT:%.*]]
513 ; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
514 ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
515 ; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[P:%.*]], i64 [[TMP2]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
516 ; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP4]], <vscale x 1 x i64> poison, i32 [[TMP3]])
517 ; CHECK-NEXT: ret <vscale x 1 x i64> [[X]]
519 %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
520 %splat.insert = insertelement <vscale x 1 x i64> poison, i64 %shift, i64 0
521 %splat = shufflevector <vscale x 1 x i64> %splat.insert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
522 %offset = shl <vscale x 1 x i64> %step, %splat
523 %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> %offset
524 %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(
525 <vscale x 1 x ptr> %ptrs,
527 <vscale x 1 x i1> splat (i1 1),
528 <vscale x 1 x i64> poison
530 ret <vscale x 1 x i64> %x
533 define void @scatter_loopless(<vscale x 1 x i64> %x, ptr %p, i64 %stride) {
534 ; CHECK-LABEL: @scatter_loopless(
535 ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STRIDE:%.*]], 4
536 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
537 ; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> [[X:%.*]], ptr [[P:%.*]], i64 [[TMP1]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]])
538 ; CHECK-NEXT: ret void
540 %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
541 %splat.insert = insertelement <vscale x 1 x i64> poison, i64 %stride, i64 0
542 %splat = shufflevector <vscale x 1 x i64> %splat.insert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
543 %offsets = mul <vscale x 1 x i64> %step, %splat
544 %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> %offsets
545 call void @llvm.masked.scatter.nxv1i64.nxv1p0(
546 <vscale x 1 x i64> %x,
547 <vscale x 1 x ptr> %ptrs,
549 <vscale x 1 x i1> splat (i1 1)
554 ; We previously crashed expecting a constant to be fixed length.
555 define void @constant_stride(<vscale x 1 x i64> %x, ptr %p, i64 %stride) {
556 ; CHECK-LABEL: @constant_stride(
557 ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr i32, ptr [[P:%.*]], <vscale x 1 x i64> zeroinitializer
558 ; CHECK-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x ptr> [[PTRS]], i32 8, <vscale x 1 x i1> splat (i1 true))
559 ; CHECK-NEXT: ret void
561 %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> zeroinitializer
562 call void @llvm.masked.scatter.nxv1i64.nxv1p0(
563 <vscale x 1 x i64> %x,
564 <vscale x 1 x ptr> %ptrs,
566 <vscale x 1 x i1> splat (i1 1)
571 define <vscale x 1 x i64> @vector_base_scalar_offset(ptr %p, i64 %offset) {
572 ; CHECK-LABEL: @vector_base_scalar_offset(
573 ; CHECK-NEXT: [[PTRS2OFFSET:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET:%.*]]
574 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
575 ; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[PTRS2OFFSET]], i64 8, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]])
576 ; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> poison, i32 [[TMP1]])
577 ; CHECK-NEXT: ret <vscale x 1 x i64> [[X]]
579 %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
580 %ptrs1 = getelementptr i64, ptr %p, <vscale x 1 x i64> %step
581 %ptrs2 = getelementptr i64, <vscale x 1 x ptr> %ptrs1, i64 %offset
582 %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(
583 <vscale x 1 x ptr> %ptrs2,
585 <vscale x 1 x i1> splat (i1 1),
586 <vscale x 1 x i64> poison
588 ret <vscale x 1 x i64> %x
591 define <vscale x 1 x i64> @splat_base_scalar_offset(ptr %p, i64 %offset) {
592 ; CHECK-LABEL: @splat_base_scalar_offset(
593 ; CHECK-NEXT: [[PTRSOFFSET:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET:%.*]]
594 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
595 ; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[PTRSOFFSET]], i64 0, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]])
596 ; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> poison, i32 [[TMP1]])
597 ; CHECK-NEXT: ret <vscale x 1 x i64> [[X]]
599 %head = insertelement <vscale x 1 x ptr> poison, ptr %p, i32 0
600 %splat = shufflevector <vscale x 1 x ptr> %head, <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer
601 %ptrs = getelementptr i64, <vscale x 1 x ptr> %splat, i64 %offset
602 %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(
603 <vscale x 1 x ptr> %ptrs,
605 <vscale x 1 x i1> splat (i1 1),
606 <vscale x 1 x i64> poison
608 ret <vscale x 1 x i64> %x
611 ; We shouldn't be able to determine a stride here.
612 define <vscale x 1 x i64> @nonstrided_base_scalar_offset(ptr %p, <vscale x 1 x i64> %v, i64 %offset) {
613 ; CHECK-LABEL: @nonstrided_base_scalar_offset(
614 ; CHECK-NEXT: [[PTRS1:%.*]] = getelementptr i64, ptr [[P:%.*]], <vscale x 1 x i64> [[V:%.*]]
615 ; CHECK-NEXT: [[PTRS2:%.*]] = getelementptr i64, <vscale x 1 x ptr> [[PTRS1]], i64 [[OFFSET:%.*]]
616 ; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[PTRS2]], i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> poison)
617 ; CHECK-NEXT: ret <vscale x 1 x i64> [[X]]
619 %ptrs1 = getelementptr i64, ptr %p, <vscale x 1 x i64> %v
620 %ptrs2 = getelementptr i64, <vscale x 1 x ptr> %ptrs1, i64 %offset
621 %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(
622 <vscale x 1 x ptr> %ptrs2,
624 <vscale x 1 x i1> splat (i1 1),
625 <vscale x 1 x i64> poison
627 ret <vscale x 1 x i64> %x
630 ; We shouldn't be able to determine a scalar base here.
631 define <vscale x 1 x i64> @vector_base_vector_offset(ptr %p, <vscale x 1 x i64> %offset) {
632 ; CHECK-LABEL: @vector_base_vector_offset(
633 ; CHECK-NEXT: [[STEP:%.*]] = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
634 ; CHECK-NEXT: [[PTRS1:%.*]] = getelementptr i64, ptr [[P:%.*]], <vscale x 1 x i64> [[STEP]]
635 ; CHECK-NEXT: [[PTRS2:%.*]] = getelementptr i64, <vscale x 1 x ptr> [[PTRS1]], <vscale x 1 x i64> [[OFFSET:%.*]]
636 ; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[PTRS2]], i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> poison)
637 ; CHECK-NEXT: ret <vscale x 1 x i64> [[X]]
639 %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
640 %ptrs1 = getelementptr i64, ptr %p, <vscale x 1 x i64> %step
641 %ptrs2 = getelementptr i64, <vscale x 1 x ptr> %ptrs1, <vscale x 1 x i64> %offset
642 %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(
643 <vscale x 1 x ptr> %ptrs2,
645 <vscale x 1 x i1> splat (i1 1),
646 <vscale x 1 x i64> poison
648 ret <vscale x 1 x i64> %x
651 declare i64 @llvm.vscale.i64()
652 declare void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
653 declare <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x i64>)
656 define <vscale x 1 x i64> @vp_gather(ptr %a, i32 %len) {
657 ; CHECK-LABEL: @vp_gather(
658 ; CHECK-NEXT: vector.ph:
659 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
660 ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
661 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
662 ; CHECK: vector.body:
663 ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
664 ; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
665 ; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
666 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
667 ; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP2]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 42)
668 ; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
669 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[TMP0]]
670 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[TMP0]]
671 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
672 ; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
673 ; CHECK: for.cond.cleanup:
674 ; CHECK-NEXT: ret <vscale x 1 x i64> [[ACCUM_NEXT]]
677 %wide.trip.count = zext i32 %len to i64
678 %0 = tail call i64 @llvm.vscale.i64()
679 %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
680 %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %0, i64 0
681 %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
682 br label %vector.body
684 vector.body: ; preds = %vector.body, %vector.ph
685 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
686 %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
687 %accum = phi <vscale x 1 x i64> [ zeroinitializer, %vector.ph ], [ %accum.next, %vector.body ]
688 %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3
689 %gather = call <vscale x 1 x i64> @llvm.vp.gather(<vscale x 1 x ptr> %2, <vscale x 1 x i1> splat (i1 true), i32 42)
690 %accum.next = add <vscale x 1 x i64> %accum, %gather
691 %index.next = add nuw i64 %index, %0
692 %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat
693 %3 = icmp ne i64 %index.next, %wide.trip.count
694 br i1 %3, label %for.cond.cleanup, label %vector.body
696 for.cond.cleanup: ; preds = %vector.body
697 ret <vscale x 1 x i64> %accum.next
700 define void @vp_scatter(ptr %a, i32 %len) {
701 ; CHECK-LABEL: @vp_scatter(
702 ; CHECK-NEXT: vector.ph:
703 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
704 ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
705 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
706 ; CHECK: vector.body:
707 ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
708 ; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
709 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
710 ; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP2]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 42)
711 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[TMP0]]
712 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[TMP0]]
713 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
714 ; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
715 ; CHECK: for.cond.cleanup:
716 ; CHECK-NEXT: ret void
719 %wide.trip.count = zext i32 %len to i64
720 %0 = tail call i64 @llvm.vscale.i64()
721 %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
722 %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %0, i64 0
723 %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
724 br label %vector.body
726 vector.body: ; preds = %vector.body, %vector.ph
727 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
728 %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
729 %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3
730 tail call void @llvm.vp.scatter(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x ptr> %2, <vscale x 1 x i1> splat (i1 true), i32 42)
731 %index.next = add nuw i64 %index, %0
732 %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat
733 %3 = icmp ne i64 %index.next, %wide.trip.count
734 br i1 %3, label %for.cond.cleanup, label %vector.body
736 for.cond.cleanup: ; preds = %vector.body
740 ; Test that reflects what the loop vectorizer will generate for an EVL tail
743 define <vscale x 1 x i64> @evl_gather(ptr %a, i32 %len) {
744 ; CHECK-LABEL: @evl_gather(
745 ; CHECK-NEXT: vector.ph:
746 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
747 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
748 ; CHECK: vector.body:
749 ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
750 ; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
751 ; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
752 ; CHECK-NEXT: [[ELEMS:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[VEC_IND_SCALAR]]
753 ; CHECK-NEXT: [[EVL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[ELEMS]], i32 1, i1 true)
754 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
755 ; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP0]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[EVL]])
756 ; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
757 ; CHECK-NEXT: [[EVL_ZEXT:%.*]] = zext i32 [[EVL]] to i64
758 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[EVL_ZEXT]]
759 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[EVL_ZEXT]]
760 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
761 ; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
762 ; CHECK: for.cond.cleanup:
763 ; CHECK-NEXT: ret <vscale x 1 x i64> [[ACCUM_NEXT]]
766 %wide.trip.count = zext i32 %len to i64
767 %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
768 br label %vector.body
770 vector.body: ; preds = %vector.body, %vector.ph
771 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
772 %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
773 %accum = phi <vscale x 1 x i64> [ zeroinitializer, %vector.ph ], [ %accum.next, %vector.body ]
775 %elems = sub i64 %wide.trip.count, %index
776 %evl = call i32 @llvm.experimental.get.vector.length.i64(i64 %elems, i32 1, i1 true)
778 %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3
779 %gather = call <vscale x 1 x i64> @llvm.vp.gather(<vscale x 1 x ptr> %2, <vscale x 1 x i1> splat (i1 true), i32 %evl)
780 %accum.next = add <vscale x 1 x i64> %accum, %gather
782 %evl.zext = zext i32 %evl to i64
783 %index.next = add nuw i64 %index, %evl.zext
784 %evl.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %evl.zext, i64 0
785 %evl.splat = shufflevector <vscale x 1 x i64> %evl.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
786 %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %evl.splat
787 %3 = icmp ne i64 %index.next, %wide.trip.count
788 br i1 %3, label %for.cond.cleanup, label %vector.body
790 for.cond.cleanup: ; preds = %vector.body
791 ret <vscale x 1 x i64> %accum.next
794 ; Test that reflects what the loop vectorizer will generate for an EVL tail
797 define void @evl_scatter(ptr %a, i32 %len) {
798 ; CHECK-LABEL: @evl_scatter(
799 ; CHECK-NEXT: vector.ph:
800 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
801 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
802 ; CHECK: vector.body:
803 ; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
804 ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
805 ; CHECK-NEXT: [[ELEMS:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[VEC_IND_SCALAR1]]
806 ; CHECK-NEXT: [[EVL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[ELEMS]], i32 1, i1 true)
807 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]], i32 3
808 ; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP0]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[EVL]])
809 ; CHECK-NEXT: [[EVL_ZEXT:%.*]] = zext i32 [[EVL]] to i64
810 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add nuw i64 [[VEC_IND_SCALAR1]], [[EVL_ZEXT]]
811 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], [[EVL_ZEXT]]
812 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR1]], [[WIDE_TRIP_COUNT]]
813 ; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
814 ; CHECK: for.cond.cleanup:
815 ; CHECK-NEXT: ret void
818 %wide.trip.count = zext i32 %len to i64
819 %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
820 br label %vector.body
822 vector.body: ; preds = %vector.body, %vector.ph
823 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
824 %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
826 %elems = sub i64 %wide.trip.count, %index
827 %evl = call i32 @llvm.experimental.get.vector.length.i64(i64 %elems, i32 1, i1 true)
829 %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3
830 tail call void @llvm.vp.scatter(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x ptr> %2, <vscale x 1 x i1> splat (i1 true), i32 %evl)
832 %evl.zext = zext i32 %evl to i64
833 %index.next = add nuw i64 %index, %evl.zext
834 %evl.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %evl.zext, i64 0
835 %evl.splat = shufflevector <vscale x 1 x i64> %evl.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
836 %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %evl.splat
837 %3 = icmp ne i64 %index.next, %wide.trip.count
838 br i1 %3, label %for.cond.cleanup, label %vector.body
840 for.cond.cleanup: ; preds = %vector.body