1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -mcpu=skx -S -loop-vectorize -instcombine -simplifycfg -licm -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck %s -check-prefix=DISABLED_MASKED_STRIDED
3 ; RUN: opt -mcpu=skx -S -loop-vectorize -instcombine -simplifycfg -licm -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck %s -check-prefix=ENABLED_MASKED_STRIDED
5 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
6 target triple = "x86_64-unknown-linux-gnu"
8 ; (1) Interleave-group with factor 4, storing only 2 members out of the 4.
9 ; Check that when we allow masked-memops to support interleave-group with gaps,
10 ; the store is vectorized using a wide masked store, with a 1,1,0,0,1,1,0,0,... mask.
11 ; Check that when we don't allow masked-memops to support interleave-group with gaps,
12 ; the store is scalarized.
13 ; The input IR was generated from this source:
14 ; for(i=0;i<1024;i++){
16 ; points[i*4 + 1] = y[i];
18 ; (relates to the testcase in PR50566)
20 ; Function Attrs: nofree norecurse nosync nounwind uwtable
21 define dso_local void @test1(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) local_unnamed_addr {
22 ; DISABLED_MASKED_STRIDED-LABEL: @test1(
23 ; DISABLED_MASKED_STRIDED-NEXT: entry:
24 ; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
25 ; DISABLED_MASKED_STRIDED: vector.body:
26 ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
27 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
28 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[INDEX]]
29 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <4 x i16>*
30 ; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2
31 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
32 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP2]], i32 0
33 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[POINTS:%.*]], i64 [[TMP3]]
34 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP2]], i32 1
35 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP5]]
36 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP2]], i32 2
37 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP7]]
38 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP2]], i32 3
39 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP9]]
40 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 0
41 ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP11]], i16* [[TMP4]], align 2
42 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 1
43 ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP12]], i16* [[TMP6]], align 2
44 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
45 ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP13]], i16* [[TMP8]], align 2
46 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
47 ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP14]], i16* [[TMP10]], align 2
48 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i64 [[INDEX]]
49 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = bitcast i16* [[TMP15]] to <4 x i16>*
50 ; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP16]], align 2
51 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = or <4 x i64> [[TMP2]], <i64 1, i64 1, i64 1, i64 1>
52 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = extractelement <4 x i64> [[TMP17]], i32 0
53 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP18]]
54 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP17]], i32 1
55 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP20]]
56 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP17]], i32 2
57 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP22]]
58 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP17]], i32 3
59 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP24]]
60 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 0
61 ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP26]], i16* [[TMP19]], align 2
62 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 1
63 ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP27]], i16* [[TMP21]], align 2
64 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 2
65 ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP28]], i16* [[TMP23]], align 2
66 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 3
67 ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP29]], i16* [[TMP25]], align 2
68 ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
69 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
70 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
71 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP30]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
72 ; DISABLED_MASKED_STRIDED: for.end:
73 ; DISABLED_MASKED_STRIDED-NEXT: ret void
75 ; ENABLED_MASKED_STRIDED-LABEL: @test1(
76 ; ENABLED_MASKED_STRIDED-NEXT: entry:
77 ; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
78 ; ENABLED_MASKED_STRIDED: vector.body:
79 ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
80 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[INDEX]]
81 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <4 x i16>*
82 ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2
83 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[INDEX]], 2
84 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i64 [[INDEX]]
85 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <4 x i16>*
86 ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP4]], align 2
87 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[POINTS:%.*]], i64 [[TMP2]]
88 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <16 x i16>*
89 ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <16 x i32> <i32 0, i32 4, i32 undef, i32 undef, i32 1, i32 5, i32 undef, i32 undef, i32 2, i32 6, i32 undef, i32 undef, i32 3, i32 7, i32 undef, i32 undef>
90 ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> [[INTERLEAVED_VEC]], <16 x i16>* [[TMP6]], i32 2, <16 x i1> <i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false>)
91 ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
92 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
93 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
94 ; ENABLED_MASKED_STRIDED: for.end:
95 ; ENABLED_MASKED_STRIDED-NEXT: ret void
101 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
102 %arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv
103 %0 = load i16, i16* %arrayidx, align 2
104 %1 = shl nuw nsw i64 %indvars.iv, 2
105 %arrayidx2 = getelementptr inbounds i16, i16* %points, i64 %1
106 store i16 %0, i16* %arrayidx2, align 2
107 %arrayidx4 = getelementptr inbounds i16, i16* %y, i64 %indvars.iv
108 %2 = load i16, i16* %arrayidx4, align 2
110 %arrayidx7 = getelementptr inbounds i16, i16* %points, i64 %3
111 store i16 %2, i16* %arrayidx7, align 2
112 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
113 %exitcond.not = icmp eq i64 %indvars.iv.next, 1024
114 br i1 %exitcond.not, label %for.end, label %for.body
120 ; (2) Same as above, but this time the gaps mask of the store is also And-ed with the
121 ; fold-tail mask. If using masked memops to vectorize interleaved-group with gaps is
122 ; not allowed, the store is scalarized and predicated.
123 ; The input IR was generated from this source:
124 ; for(i=0;i<numPoints;i++){
125 ; points[i*4] = x[i];
126 ; points[i*4 + 1] = y[i];
129 ; Function Attrs: nofree norecurse nosync nounwind uwtable
130 define dso_local void @test2(i16* noalias nocapture %points, i32 %numPoints, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) local_unnamed_addr {
131 ; DISABLED_MASKED_STRIDED-LABEL: @test2(
132 ; DISABLED_MASKED_STRIDED-NEXT: entry:
133 ; DISABLED_MASKED_STRIDED-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[NUMPOINTS:%.*]], 0
134 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[CMP15]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
135 ; DISABLED_MASKED_STRIDED: for.body.preheader:
136 ; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[NUMPOINTS]] to i64
137 ; DISABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add nuw nsw i64 [[WIDE_TRIP_COUNT]], 3
138 ; DISABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i64 [[N_RND_UP]], 8589934588
139 ; DISABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
140 ; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i32 0
141 ; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
142 ; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
143 ; DISABLED_MASKED_STRIDED: vector.body:
144 ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE15:%.*]] ]
145 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[FOR_BODY_PREHEADER]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE15]] ]
146 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
147 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[INDEX]]
148 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = bitcast i16* [[TMP1]] to <4 x i16>*
149 ; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP2]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison)
150 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = shl nsw <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
151 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
152 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
153 ; DISABLED_MASKED_STRIDED: pred.store.if:
154 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
155 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[POINTS:%.*]], i64 [[TMP5]]
156 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD]], i32 0
157 ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP7]], i16* [[TMP6]], align 2
158 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE]]
159 ; DISABLED_MASKED_STRIDED: pred.store.continue:
160 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
161 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
162 ; DISABLED_MASKED_STRIDED: pred.store.if1:
163 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
164 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP9]]
165 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD]], i32 1
166 ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP11]], i16* [[TMP10]], align 2
167 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE2]]
168 ; DISABLED_MASKED_STRIDED: pred.store.continue2:
169 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
170 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
171 ; DISABLED_MASKED_STRIDED: pred.store.if3:
172 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
173 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP13]]
174 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD]], i32 2
175 ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP15]], i16* [[TMP14]], align 2
176 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE4]]
177 ; DISABLED_MASKED_STRIDED: pred.store.continue4:
178 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
179 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
180 ; DISABLED_MASKED_STRIDED: pred.store.if5:
181 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
182 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP17]]
183 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD]], i32 3
184 ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP19]], i16* [[TMP18]], align 2
185 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE6]]
186 ; DISABLED_MASKED_STRIDED: pred.store.continue6:
187 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i64 [[INDEX]]
188 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = bitcast i16* [[TMP20]] to <4 x i16>*
189 ; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_LOAD7:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP21]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison)
190 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = or <4 x i64> [[TMP3]], <i64 1, i64 1, i64 1, i64 1>
191 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
192 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
193 ; DISABLED_MASKED_STRIDED: pred.store.if8:
194 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP22]], i32 0
195 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP24]]
196 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD7]], i32 0
197 ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP26]], i16* [[TMP25]], align 2
198 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE9]]
199 ; DISABLED_MASKED_STRIDED: pred.store.continue9:
200 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
201 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
202 ; DISABLED_MASKED_STRIDED: pred.store.if10:
203 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP22]], i32 1
204 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP28]]
205 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD7]], i32 1
206 ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP30]], i16* [[TMP29]], align 2
207 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE11]]
208 ; DISABLED_MASKED_STRIDED: pred.store.continue11:
209 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
210 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]]
211 ; DISABLED_MASKED_STRIDED: pred.store.if12:
212 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP22]], i32 2
213 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP32]]
214 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD7]], i32 2
215 ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP34]], i16* [[TMP33]], align 2
216 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE13]]
217 ; DISABLED_MASKED_STRIDED: pred.store.continue13:
218 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
219 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP35]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15]]
220 ; DISABLED_MASKED_STRIDED: pred.store.if14:
221 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP36:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3
222 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP37:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP36]]
223 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP38:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD7]], i32 3
224 ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP38]], i16* [[TMP37]], align 2
225 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE15]]
226 ; DISABLED_MASKED_STRIDED: pred.store.continue15:
227 ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
228 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
229 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP39:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
230 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP39]], label [[FOR_END_LOOPEXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
231 ; DISABLED_MASKED_STRIDED: for.end.loopexit:
232 ; DISABLED_MASKED_STRIDED-NEXT: br label [[FOR_END]]
233 ; DISABLED_MASKED_STRIDED: for.end:
234 ; DISABLED_MASKED_STRIDED-NEXT: ret void
236 ; ENABLED_MASKED_STRIDED-LABEL: @test2(
237 ; ENABLED_MASKED_STRIDED-NEXT: entry:
238 ; ENABLED_MASKED_STRIDED-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[NUMPOINTS:%.*]], 0
239 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[CMP15]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
240 ; ENABLED_MASKED_STRIDED: for.body.preheader:
241 ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[NUMPOINTS]] to i64
242 ; ENABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add nuw nsw i64 [[WIDE_TRIP_COUNT]], 3
243 ; ENABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i64 [[N_RND_UP]], 8589934588
244 ; ENABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
245 ; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i32 0
246 ; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
247 ; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
248 ; ENABLED_MASKED_STRIDED: vector.body:
249 ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
250 ; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0
251 ; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
252 ; ENABLED_MASKED_STRIDED-NEXT: [[INDUCTION:%.*]] = or <4 x i64> [[BROADCAST_SPLAT2]], <i64 0, i64 1, i64 2, i64 3>
253 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[INDUCTION]], [[BROADCAST_SPLAT]]
254 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[INDEX]]
255 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = bitcast i16* [[TMP1]] to <4 x i16>*
256 ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP2]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison)
257 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = shl nsw i64 [[INDEX]], 2
258 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i64 [[INDEX]]
259 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>*
260 ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP5]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison)
261 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[POINTS:%.*]], i64 [[TMP3]]
262 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <16 x i16>*
263 ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i16> [[WIDE_MASKED_LOAD]], <4 x i16> [[WIDE_MASKED_LOAD3]], <16 x i32> <i32 0, i32 4, i32 undef, i32 undef, i32 1, i32 5, i32 undef, i32 undef, i32 2, i32 6, i32 undef, i32 undef, i32 3, i32 7, i32 undef, i32 undef>
264 ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <4 x i1> [[TMP0]], <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
265 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false>
266 ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> [[INTERLEAVED_VEC]], <16 x i16>* [[TMP7]], i32 2, <16 x i1> [[TMP8]])
267 ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
268 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
269 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP9]], label [[FOR_END_LOOPEXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
270 ; ENABLED_MASKED_STRIDED: for.end.loopexit:
271 ; ENABLED_MASKED_STRIDED-NEXT: br label [[FOR_END]]
272 ; ENABLED_MASKED_STRIDED: for.end:
273 ; ENABLED_MASKED_STRIDED-NEXT: ret void
276 %cmp15 = icmp sgt i32 %numPoints, 0
277 br i1 %cmp15, label %for.body.preheader, label %for.end
280 %wide.trip.count = zext i32 %numPoints to i64
284 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
285 %arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv
286 %0 = load i16, i16* %arrayidx, align 2
287 %1 = shl nsw i64 %indvars.iv, 2
288 %arrayidx2 = getelementptr inbounds i16, i16* %points, i64 %1
289 store i16 %0, i16* %arrayidx2, align 2
290 %arrayidx4 = getelementptr inbounds i16, i16* %y, i64 %indvars.iv
291 %2 = load i16, i16* %arrayidx4, align 2
293 %arrayidx7 = getelementptr inbounds i16, i16* %points, i64 %3
294 store i16 %2, i16* %arrayidx7, align 2
295 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
296 %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
297 br i1 %exitcond.not, label %for.end.loopexit, label %for.body
306 ; (3) Testing a scenario of a conditional store. The gaps mask of the store is also
307 ; And-ed with the condition mask (x[i] > 0).
308 ; If using masked memops to vectorize interleaved-group with gaps is
309 ; not allowed, the store is scalarized and predicated.
310 ; Here the Interleave-group is with factor 3, storing only 1 member out of the 3.
311 ; The input IR was generated from this source:
312 ; for(i=0;i<1024;i++){
314 ; points[i*3] = x[i];
316 ; Function Attrs: nofree norecurse nosync nounwind uwtable
317 define dso_local void @test(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readnone %y) local_unnamed_addr {
318 ; DISABLED_MASKED_STRIDED-LABEL: @test(
319 ; DISABLED_MASKED_STRIDED-NEXT: entry:
320 ; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
321 ; DISABLED_MASKED_STRIDED: vector.body:
322 ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
323 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
324 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[INDEX]]
325 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <4 x i16>*
326 ; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2
327 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i16> [[WIDE_LOAD]], zeroinitializer
328 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = mul nuw nsw <4 x i64> [[VEC_IND]], <i64 3, i64 3, i64 3, i64 3>
329 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
330 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
331 ; DISABLED_MASKED_STRIDED: pred.store.if:
332 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
333 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[POINTS:%.*]], i64 [[TMP5]]
334 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 0
335 ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP7]], i16* [[TMP6]], align 2
336 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE]]
337 ; DISABLED_MASKED_STRIDED: pred.store.continue:
338 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
339 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
340 ; DISABLED_MASKED_STRIDED: pred.store.if1:
341 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
342 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP9]]
343 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 1
344 ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP11]], i16* [[TMP10]], align 2
345 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE2]]
346 ; DISABLED_MASKED_STRIDED: pred.store.continue2:
347 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
348 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
349 ; DISABLED_MASKED_STRIDED: pred.store.if3:
350 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
351 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP13]]
352 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
353 ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP15]], i16* [[TMP14]], align 2
354 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE4]]
355 ; DISABLED_MASKED_STRIDED: pred.store.continue4:
356 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
357 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
358 ; DISABLED_MASKED_STRIDED: pred.store.if5:
359 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
360 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP17]]
361 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
362 ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP19]], i16* [[TMP18]], align 2
363 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE6]]
364 ; DISABLED_MASKED_STRIDED: pred.store.continue6:
365 ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
366 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
367 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
368 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP20]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
369 ; DISABLED_MASKED_STRIDED: for.end:
370 ; DISABLED_MASKED_STRIDED-NEXT: ret void
372 ; ENABLED_MASKED_STRIDED-LABEL: @test(
373 ; ENABLED_MASKED_STRIDED-NEXT: entry:
374 ; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
375 ; ENABLED_MASKED_STRIDED: vector.body:
376 ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
377 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[INDEX]]
378 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <4 x i16>*
379 ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2
380 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i16> [[WIDE_LOAD]], zeroinitializer
381 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = mul nuw nsw i64 [[INDEX]], 3
382 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[POINTS:%.*]], i64 [[TMP3]]
383 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <12 x i16>*
384 ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> poison, <12 x i32> <i32 0, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 3, i32 undef, i32 undef>
385 ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3>
386 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = and <12 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false>
387 ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v12i16.p0v12i16(<12 x i16> [[INTERLEAVED_VEC]], <12 x i16>* [[TMP5]], i32 2, <12 x i1> [[TMP6]])
388 ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
389 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
390 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
391 ; ENABLED_MASKED_STRIDED: for.end:
392 ; ENABLED_MASKED_STRIDED-NEXT: ret void
398 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
399 %arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv
400 %0 = load i16, i16* %arrayidx, align 2
401 %cmp1 = icmp sgt i16 %0, 0
402 br i1 %cmp1, label %if.then, label %for.inc
405 %1 = mul nuw nsw i64 %indvars.iv, 3
406 %arrayidx6 = getelementptr inbounds i16, i16* %points, i64 %1
407 store i16 %0, i16* %arrayidx6, align 2
411 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
412 %exitcond.not = icmp eq i64 %indvars.iv.next, 1024
413 br i1 %exitcond.not, label %for.end, label %for.body