1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -loop-vectorize -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX -check-prefix=AVX1
3 ; RUN: opt < %s -loop-vectorize -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX -check-prefix=AVX2
4 ; RUN: opt < %s -loop-vectorize -mcpu=knl -S | FileCheck %s -check-prefix=AVX512
6 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
7 target triple = "x86_64-pc_linux"
11 ;void foo1(int *A, int *B, int *trigger) {
13 ; for (int i=0; i<10000; i++) {
14 ; if (trigger[i] < 100) {
15 ; A[i] = B[i] + trigger[i];
20 define void @foo1(i32* nocapture %A, i32* nocapture readonly %B, i32* nocapture readonly %trigger) local_unnamed_addr #0 {
23 ; AVX1-NEXT: [[A1:%.*]] = bitcast i32* [[A:%.*]] to i8*
24 ; AVX1-NEXT: [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8*
25 ; AVX1-NEXT: [[B6:%.*]] = bitcast i32* [[B:%.*]] to i8*
26 ; AVX1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
27 ; AVX1: vector.memcheck:
28 ; AVX1-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A]], i64 10000
29 ; AVX1-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
30 ; AVX1-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 10000
31 ; AVX1-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
32 ; AVX1-NEXT: [[SCEVGEP7:%.*]] = getelementptr i32, i32* [[B]], i64 10000
33 ; AVX1-NEXT: [[SCEVGEP78:%.*]] = bitcast i32* [[SCEVGEP7]] to i8*
34 ; AVX1-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP45]]
35 ; AVX1-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]]
36 ; AVX1-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
37 ; AVX1-NEXT: [[BOUND09:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP78]]
38 ; AVX1-NEXT: [[BOUND110:%.*]] = icmp ult i8* [[B6]], [[SCEVGEP2]]
39 ; AVX1-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
40 ; AVX1-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
41 ; AVX1-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true
42 ; AVX1-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
44 ; AVX1-NEXT: br label [[VECTOR_BODY:%.*]]
46 ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
47 ; AVX1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0
48 ; AVX1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer
49 ; AVX1-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
50 ; AVX1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
51 ; AVX1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]]
52 ; AVX1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
53 ; AVX1-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <8 x i32>*
54 ; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP3]], align 4, !alias.scope !0
55 ; AVX1-NEXT: [[TMP4:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
56 ; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]]
57 ; AVX1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
58 ; AVX1-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <8 x i32>*
59 ; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP7]], i32 4, <8 x i1> [[TMP4]], <8 x i32> undef), !alias.scope !3
60 ; AVX1-NEXT: [[TMP8:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]]
61 ; AVX1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]]
62 ; AVX1-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0
63 ; AVX1-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <8 x i32>*
64 ; AVX1-NEXT: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> [[TMP8]], <8 x i32>* [[TMP11]], i32 4, <8 x i1> [[TMP4]]), !alias.scope !5, !noalias !7
65 ; AVX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
66 ; AVX1-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
67 ; AVX1-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !8
69 ; AVX1-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, 10000
70 ; AVX1-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
72 ; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
73 ; AVX1-NEXT: br label [[FOR_BODY:%.*]]
75 ; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
76 ; AVX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
77 ; AVX1-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
78 ; AVX1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP13]], 100
79 ; AVX1-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
81 ; AVX1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
82 ; AVX1-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
83 ; AVX1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
84 ; AVX1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
85 ; AVX1-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX7]], align 4
86 ; AVX1-NEXT: br label [[FOR_INC]]
88 ; AVX1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
89 ; AVX1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000
90 ; AVX1-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !10
96 ; AVX2-NEXT: [[A1:%.*]] = bitcast i32* [[A:%.*]] to i8*
97 ; AVX2-NEXT: [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8*
98 ; AVX2-NEXT: [[B6:%.*]] = bitcast i32* [[B:%.*]] to i8*
99 ; AVX2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
100 ; AVX2: vector.memcheck:
101 ; AVX2-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A]], i64 10000
102 ; AVX2-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
103 ; AVX2-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 10000
104 ; AVX2-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
105 ; AVX2-NEXT: [[SCEVGEP7:%.*]] = getelementptr i32, i32* [[B]], i64 10000
106 ; AVX2-NEXT: [[SCEVGEP78:%.*]] = bitcast i32* [[SCEVGEP7]] to i8*
107 ; AVX2-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP45]]
108 ; AVX2-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]]
109 ; AVX2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
110 ; AVX2-NEXT: [[BOUND09:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP78]]
111 ; AVX2-NEXT: [[BOUND110:%.*]] = icmp ult i8* [[B6]], [[SCEVGEP2]]
112 ; AVX2-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
113 ; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
114 ; AVX2-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true
115 ; AVX2-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
117 ; AVX2-NEXT: br label [[VECTOR_BODY:%.*]]
119 ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
120 ; AVX2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0
121 ; AVX2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer
122 ; AVX2-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
123 ; AVX2-NEXT: [[INDUCTION12:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
124 ; AVX2-NEXT: [[INDUCTION13:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23>
125 ; AVX2-NEXT: [[INDUCTION14:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>
126 ; AVX2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
127 ; AVX2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8
128 ; AVX2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 16
129 ; AVX2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 24
130 ; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]]
131 ; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]]
132 ; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]]
133 ; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP3]]
134 ; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
135 ; AVX2-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <8 x i32>*
136 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP9]], align 4, !alias.scope !0
137 ; AVX2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 8
138 ; AVX2-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <8 x i32>*
139 ; AVX2-NEXT: [[WIDE_LOAD15:%.*]] = load <8 x i32>, <8 x i32>* [[TMP11]], align 4, !alias.scope !0
140 ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 16
141 ; AVX2-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <8 x i32>*
142 ; AVX2-NEXT: [[WIDE_LOAD16:%.*]] = load <8 x i32>, <8 x i32>* [[TMP13]], align 4, !alias.scope !0
143 ; AVX2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 24
144 ; AVX2-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <8 x i32>*
145 ; AVX2-NEXT: [[WIDE_LOAD17:%.*]] = load <8 x i32>, <8 x i32>* [[TMP15]], align 4, !alias.scope !0
146 ; AVX2-NEXT: [[TMP16:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
147 ; AVX2-NEXT: [[TMP17:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD15]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
148 ; AVX2-NEXT: [[TMP18:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD16]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
149 ; AVX2-NEXT: [[TMP19:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD17]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
150 ; AVX2-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]]
151 ; AVX2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
152 ; AVX2-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]]
153 ; AVX2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP3]]
154 ; AVX2-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 0
155 ; AVX2-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <8 x i32>*
156 ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP25]], i32 4, <8 x i1> [[TMP16]], <8 x i32> undef), !alias.scope !3
157 ; AVX2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 8
158 ; AVX2-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <8 x i32>*
159 ; AVX2-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP27]], i32 4, <8 x i1> [[TMP17]], <8 x i32> undef), !alias.scope !3
160 ; AVX2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 16
161 ; AVX2-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <8 x i32>*
162 ; AVX2-NEXT: [[WIDE_MASKED_LOAD19:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP29]], i32 4, <8 x i1> [[TMP18]], <8 x i32> undef), !alias.scope !3
163 ; AVX2-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 24
164 ; AVX2-NEXT: [[TMP31:%.*]] = bitcast i32* [[TMP30]] to <8 x i32>*
165 ; AVX2-NEXT: [[WIDE_MASKED_LOAD20:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP31]], i32 4, <8 x i1> [[TMP19]], <8 x i32> undef), !alias.scope !3
166 ; AVX2-NEXT: [[TMP32:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]]
167 ; AVX2-NEXT: [[TMP33:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD18]], [[WIDE_LOAD15]]
168 ; AVX2-NEXT: [[TMP34:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD19]], [[WIDE_LOAD16]]
169 ; AVX2-NEXT: [[TMP35:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD20]], [[WIDE_LOAD17]]
170 ; AVX2-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]]
171 ; AVX2-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]]
172 ; AVX2-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]]
173 ; AVX2-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]]
174 ; AVX2-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[TMP36]], i32 0
175 ; AVX2-NEXT: [[TMP41:%.*]] = bitcast i32* [[TMP40]] to <8 x i32>*
176 ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> [[TMP32]], <8 x i32>* [[TMP41]], i32 4, <8 x i1> [[TMP16]]), !alias.scope !5, !noalias !7
177 ; AVX2-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, i32* [[TMP36]], i32 8
178 ; AVX2-NEXT: [[TMP43:%.*]] = bitcast i32* [[TMP42]] to <8 x i32>*
179 ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> [[TMP33]], <8 x i32>* [[TMP43]], i32 4, <8 x i1> [[TMP17]]), !alias.scope !5, !noalias !7
180 ; AVX2-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, i32* [[TMP36]], i32 16
181 ; AVX2-NEXT: [[TMP45:%.*]] = bitcast i32* [[TMP44]] to <8 x i32>*
182 ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> [[TMP34]], <8 x i32>* [[TMP45]], i32 4, <8 x i1> [[TMP18]]), !alias.scope !5, !noalias !7
183 ; AVX2-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[TMP36]], i32 24
184 ; AVX2-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <8 x i32>*
185 ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> [[TMP35]], <8 x i32>* [[TMP47]], i32 4, <8 x i1> [[TMP19]]), !alias.scope !5, !noalias !7
186 ; AVX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 32
187 ; AVX2-NEXT: [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984
188 ; AVX2-NEXT: br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !8
189 ; AVX2: middle.block:
190 ; AVX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, 9984
191 ; AVX2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
193 ; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 9984, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
194 ; AVX2-NEXT: br label [[FOR_BODY:%.*]]
196 ; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
197 ; AVX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
198 ; AVX2-NEXT: [[TMP49:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
199 ; AVX2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP49]], 100
200 ; AVX2-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
202 ; AVX2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
203 ; AVX2-NEXT: [[TMP50:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
204 ; AVX2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP50]], [[TMP49]]
205 ; AVX2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
206 ; AVX2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX7]], align 4
207 ; AVX2-NEXT: br label [[FOR_INC]]
209 ; AVX2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
210 ; AVX2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000
211 ; AVX2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !10
213 ; AVX2-NEXT: ret void
215 ; AVX512-LABEL: @foo1(
216 ; AVX512-NEXT: entry:
217 ; AVX512-NEXT: [[A1:%.*]] = bitcast i32* [[A:%.*]] to i8*
218 ; AVX512-NEXT: [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8*
219 ; AVX512-NEXT: [[B6:%.*]] = bitcast i32* [[B:%.*]] to i8*
220 ; AVX512-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
221 ; AVX512: vector.memcheck:
222 ; AVX512-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A]], i64 10000
223 ; AVX512-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
224 ; AVX512-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 10000
225 ; AVX512-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
226 ; AVX512-NEXT: [[SCEVGEP7:%.*]] = getelementptr i32, i32* [[B]], i64 10000
227 ; AVX512-NEXT: [[SCEVGEP78:%.*]] = bitcast i32* [[SCEVGEP7]] to i8*
228 ; AVX512-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP45]]
229 ; AVX512-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]]
230 ; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
231 ; AVX512-NEXT: [[BOUND09:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP78]]
232 ; AVX512-NEXT: [[BOUND110:%.*]] = icmp ult i8* [[B6]], [[SCEVGEP2]]
233 ; AVX512-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
234 ; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
235 ; AVX512-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true
236 ; AVX512-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
238 ; AVX512-NEXT: br label [[VECTOR_BODY:%.*]]
239 ; AVX512: vector.body:
240 ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
241 ; AVX512-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> undef, i64 [[INDEX]], i32 0
242 ; AVX512-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> undef, <16 x i32> zeroinitializer
243 ; AVX512-NEXT: [[INDUCTION:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
244 ; AVX512-NEXT: [[INDUCTION12:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], <i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>
245 ; AVX512-NEXT: [[INDUCTION13:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], <i64 32, i64 33, i64 34, i64 35, i64 36, i64 37, i64 38, i64 39, i64 40, i64 41, i64 42, i64 43, i64 44, i64 45, i64 46, i64 47>
246 ; AVX512-NEXT: [[INDUCTION14:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], <i64 48, i64 49, i64 50, i64 51, i64 52, i64 53, i64 54, i64 55, i64 56, i64 57, i64 58, i64 59, i64 60, i64 61, i64 62, i64 63>
247 ; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
248 ; AVX512-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 16
249 ; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 32
250 ; AVX512-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 48
251 ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]]
252 ; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]]
253 ; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]]
254 ; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP3]]
255 ; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
256 ; AVX512-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <16 x i32>*
257 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP9]], align 4, !alias.scope !0
258 ; AVX512-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 16
259 ; AVX512-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <16 x i32>*
260 ; AVX512-NEXT: [[WIDE_LOAD15:%.*]] = load <16 x i32>, <16 x i32>* [[TMP11]], align 4, !alias.scope !0
261 ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 32
262 ; AVX512-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <16 x i32>*
263 ; AVX512-NEXT: [[WIDE_LOAD16:%.*]] = load <16 x i32>, <16 x i32>* [[TMP13]], align 4, !alias.scope !0
264 ; AVX512-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 48
265 ; AVX512-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <16 x i32>*
266 ; AVX512-NEXT: [[WIDE_LOAD17:%.*]] = load <16 x i32>, <16 x i32>* [[TMP15]], align 4, !alias.scope !0
267 ; AVX512-NEXT: [[TMP16:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
268 ; AVX512-NEXT: [[TMP17:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD15]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
269 ; AVX512-NEXT: [[TMP18:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD16]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
270 ; AVX512-NEXT: [[TMP19:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD17]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
271 ; AVX512-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]]
272 ; AVX512-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
273 ; AVX512-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]]
274 ; AVX512-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP3]]
275 ; AVX512-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 0
276 ; AVX512-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <16 x i32>*
277 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP25]], i32 4, <16 x i1> [[TMP16]], <16 x i32> undef), !alias.scope !3
278 ; AVX512-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 16
279 ; AVX512-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <16 x i32>*
280 ; AVX512-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP27]], i32 4, <16 x i1> [[TMP17]], <16 x i32> undef), !alias.scope !3
281 ; AVX512-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 32
282 ; AVX512-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <16 x i32>*
283 ; AVX512-NEXT: [[WIDE_MASKED_LOAD19:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP29]], i32 4, <16 x i1> [[TMP18]], <16 x i32> undef), !alias.scope !3
284 ; AVX512-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 48
285 ; AVX512-NEXT: [[TMP31:%.*]] = bitcast i32* [[TMP30]] to <16 x i32>*
286 ; AVX512-NEXT: [[WIDE_MASKED_LOAD20:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP31]], i32 4, <16 x i1> [[TMP19]], <16 x i32> undef), !alias.scope !3
287 ; AVX512-NEXT: [[TMP32:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]]
288 ; AVX512-NEXT: [[TMP33:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD18]], [[WIDE_LOAD15]]
289 ; AVX512-NEXT: [[TMP34:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD19]], [[WIDE_LOAD16]]
290 ; AVX512-NEXT: [[TMP35:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD20]], [[WIDE_LOAD17]]
291 ; AVX512-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]]
292 ; AVX512-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]]
293 ; AVX512-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]]
294 ; AVX512-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]]
295 ; AVX512-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[TMP36]], i32 0
296 ; AVX512-NEXT: [[TMP41:%.*]] = bitcast i32* [[TMP40]] to <16 x i32>*
297 ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> [[TMP32]], <16 x i32>* [[TMP41]], i32 4, <16 x i1> [[TMP16]]), !alias.scope !5, !noalias !7
298 ; AVX512-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, i32* [[TMP36]], i32 16
299 ; AVX512-NEXT: [[TMP43:%.*]] = bitcast i32* [[TMP42]] to <16 x i32>*
300 ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> [[TMP33]], <16 x i32>* [[TMP43]], i32 4, <16 x i1> [[TMP17]]), !alias.scope !5, !noalias !7
301 ; AVX512-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, i32* [[TMP36]], i32 32
302 ; AVX512-NEXT: [[TMP45:%.*]] = bitcast i32* [[TMP44]] to <16 x i32>*
303 ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> [[TMP34]], <16 x i32>* [[TMP45]], i32 4, <16 x i1> [[TMP18]]), !alias.scope !5, !noalias !7
304 ; AVX512-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[TMP36]], i32 48
305 ; AVX512-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <16 x i32>*
306 ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> [[TMP35]], <16 x i32>* [[TMP47]], i32 4, <16 x i1> [[TMP19]]), !alias.scope !5, !noalias !7
307 ; AVX512-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 64
308 ; AVX512-NEXT: [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984
309 ; AVX512-NEXT: br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !8
310 ; AVX512: middle.block:
311 ; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, 9984
312 ; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
314 ; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 9984, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
315 ; AVX512-NEXT: br label [[FOR_BODY:%.*]]
317 ; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
318 ; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
319 ; AVX512-NEXT: [[TMP49:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
320 ; AVX512-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP49]], 100
321 ; AVX512-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
323 ; AVX512-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
324 ; AVX512-NEXT: [[TMP50:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
325 ; AVX512-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP50]], [[TMP49]]
326 ; AVX512-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
327 ; AVX512-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX7]], align 4
328 ; AVX512-NEXT: br label [[FOR_INC]]
330 ; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
331 ; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000
332 ; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !10
334 ; AVX512-NEXT: ret void
339 for.body: ; preds = %for.inc, %entry
340 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
341 %arrayidx = getelementptr inbounds i32, i32* %trigger, i64 %indvars.iv
342 %0 = load i32, i32* %arrayidx, align 4
343 %cmp1 = icmp slt i32 %0, 100
344 br i1 %cmp1, label %if.then, label %for.inc
346 if.then: ; preds = %for.body
347 %arrayidx3 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
348 %1 = load i32, i32* %arrayidx3, align 4
349 %add = add nsw i32 %1, %0
350 %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
351 store i32 %add, i32* %arrayidx7, align 4
354 for.inc: ; preds = %for.body, %if.then
355 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
356 %exitcond = icmp eq i64 %indvars.iv.next, 10000
357 br i1 %exitcond, label %for.end, label %for.body
359 for.end: ; preds = %for.inc
363 ; The same as @foo1 but all the pointers are address space 1 pointers.
365 define void @foo1_addrspace1(i32 addrspace(1)* nocapture %A, i32 addrspace(1)* nocapture readonly %B, i32 addrspace(1)* nocapture readonly %trigger) local_unnamed_addr #0 {
366 ; AVX1-LABEL: @foo1_addrspace1(
368 ; AVX1-NEXT: [[A1:%.*]] = bitcast i32 addrspace(1)* [[A:%.*]] to i8 addrspace(1)*
369 ; AVX1-NEXT: [[TRIGGER3:%.*]] = bitcast i32 addrspace(1)* [[TRIGGER:%.*]] to i8 addrspace(1)*
370 ; AVX1-NEXT: [[B6:%.*]] = bitcast i32 addrspace(1)* [[B:%.*]] to i8 addrspace(1)*
371 ; AVX1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
372 ; AVX1: vector.memcheck:
373 ; AVX1-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32 addrspace(1)* [[A]], i64 10000
374 ; AVX1-NEXT: [[SCEVGEP2:%.*]] = bitcast i32 addrspace(1)* [[SCEVGEP]] to i8 addrspace(1)*
375 ; AVX1-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32 addrspace(1)* [[TRIGGER]], i64 10000
376 ; AVX1-NEXT: [[SCEVGEP45:%.*]] = bitcast i32 addrspace(1)* [[SCEVGEP4]] to i8 addrspace(1)*
377 ; AVX1-NEXT: [[SCEVGEP7:%.*]] = getelementptr i32, i32 addrspace(1)* [[B]], i64 10000
378 ; AVX1-NEXT: [[SCEVGEP78:%.*]] = bitcast i32 addrspace(1)* [[SCEVGEP7]] to i8 addrspace(1)*
379 ; AVX1-NEXT: [[BOUND0:%.*]] = icmp ult i8 addrspace(1)* [[A1]], [[SCEVGEP45]]
380 ; AVX1-NEXT: [[BOUND1:%.*]] = icmp ult i8 addrspace(1)* [[TRIGGER3]], [[SCEVGEP2]]
381 ; AVX1-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
382 ; AVX1-NEXT: [[BOUND09:%.*]] = icmp ult i8 addrspace(1)* [[A1]], [[SCEVGEP78]]
383 ; AVX1-NEXT: [[BOUND110:%.*]] = icmp ult i8 addrspace(1)* [[B6]], [[SCEVGEP2]]
384 ; AVX1-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
385 ; AVX1-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
386 ; AVX1-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true
387 ; AVX1-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
389 ; AVX1-NEXT: br label [[VECTOR_BODY:%.*]]
391 ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
392 ; AVX1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0
393 ; AVX1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer
394 ; AVX1-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
395 ; AVX1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
396 ; AVX1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[TMP0]]
397 ; AVX1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP1]], i32 0
398 ; AVX1-NEXT: [[TMP3:%.*]] = bitcast i32 addrspace(1)* [[TMP2]] to <8 x i32> addrspace(1)*
399 ; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32> addrspace(1)* [[TMP3]], align 4, !alias.scope !11
400 ; AVX1-NEXT: [[TMP4:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
401 ; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[B]], i64 [[TMP0]]
402 ; AVX1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP5]], i32 0
403 ; AVX1-NEXT: [[TMP7:%.*]] = bitcast i32 addrspace(1)* [[TMP6]] to <8 x i32> addrspace(1)*
404 ; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1v8i32(<8 x i32> addrspace(1)* [[TMP7]], i32 4, <8 x i1> [[TMP4]], <8 x i32> undef), !alias.scope !14
405 ; AVX1-NEXT: [[TMP8:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]]
406 ; AVX1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[A]], i64 [[TMP0]]
407 ; AVX1-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP9]], i32 0
408 ; AVX1-NEXT: [[TMP11:%.*]] = bitcast i32 addrspace(1)* [[TMP10]] to <8 x i32> addrspace(1)*
409 ; AVX1-NEXT: call void @llvm.masked.store.v8i32.p1v8i32(<8 x i32> [[TMP8]], <8 x i32> addrspace(1)* [[TMP11]], i32 4, <8 x i1> [[TMP4]]), !alias.scope !16, !noalias !18
410 ; AVX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
411 ; AVX1-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
412 ; AVX1-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !19
413 ; AVX1: middle.block:
414 ; AVX1-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, 10000
415 ; AVX1-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
417 ; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
418 ; AVX1-NEXT: br label [[FOR_BODY:%.*]]
420 ; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
421 ; AVX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[INDVARS_IV]]
422 ; AVX1-NEXT: [[TMP13:%.*]] = load i32, i32 addrspace(1)* [[ARRAYIDX]], align 4
423 ; AVX1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP13]], 100
424 ; AVX1-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
426 ; AVX1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[B]], i64 [[INDVARS_IV]]
427 ; AVX1-NEXT: [[TMP14:%.*]] = load i32, i32 addrspace(1)* [[ARRAYIDX3]], align 4
428 ; AVX1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
429 ; AVX1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[A]], i64 [[INDVARS_IV]]
430 ; AVX1-NEXT: store i32 [[ADD]], i32 addrspace(1)* [[ARRAYIDX7]], align 4
431 ; AVX1-NEXT: br label [[FOR_INC]]
433 ; AVX1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
434 ; AVX1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000
435 ; AVX1-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !20
437 ; AVX1-NEXT: ret void
439 ; AVX2-LABEL: @foo1_addrspace1(
441 ; AVX2-NEXT: [[A1:%.*]] = bitcast i32 addrspace(1)* [[A:%.*]] to i8 addrspace(1)*
442 ; AVX2-NEXT: [[TRIGGER3:%.*]] = bitcast i32 addrspace(1)* [[TRIGGER:%.*]] to i8 addrspace(1)*
443 ; AVX2-NEXT: [[B6:%.*]] = bitcast i32 addrspace(1)* [[B:%.*]] to i8 addrspace(1)*
444 ; AVX2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
445 ; AVX2: vector.memcheck:
446 ; AVX2-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32 addrspace(1)* [[A]], i64 10000
447 ; AVX2-NEXT: [[SCEVGEP2:%.*]] = bitcast i32 addrspace(1)* [[SCEVGEP]] to i8 addrspace(1)*
448 ; AVX2-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32 addrspace(1)* [[TRIGGER]], i64 10000
449 ; AVX2-NEXT: [[SCEVGEP45:%.*]] = bitcast i32 addrspace(1)* [[SCEVGEP4]] to i8 addrspace(1)*
450 ; AVX2-NEXT: [[SCEVGEP7:%.*]] = getelementptr i32, i32 addrspace(1)* [[B]], i64 10000
451 ; AVX2-NEXT: [[SCEVGEP78:%.*]] = bitcast i32 addrspace(1)* [[SCEVGEP7]] to i8 addrspace(1)*
452 ; AVX2-NEXT: [[BOUND0:%.*]] = icmp ult i8 addrspace(1)* [[A1]], [[SCEVGEP45]]
453 ; AVX2-NEXT: [[BOUND1:%.*]] = icmp ult i8 addrspace(1)* [[TRIGGER3]], [[SCEVGEP2]]
454 ; AVX2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
455 ; AVX2-NEXT: [[BOUND09:%.*]] = icmp ult i8 addrspace(1)* [[A1]], [[SCEVGEP78]]
456 ; AVX2-NEXT: [[BOUND110:%.*]] = icmp ult i8 addrspace(1)* [[B6]], [[SCEVGEP2]]
457 ; AVX2-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
458 ; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
459 ; AVX2-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true
460 ; AVX2-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
462 ; AVX2-NEXT: br label [[VECTOR_BODY:%.*]]
464 ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
465 ; AVX2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0
466 ; AVX2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer
467 ; AVX2-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
468 ; AVX2-NEXT: [[INDUCTION12:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
469 ; AVX2-NEXT: [[INDUCTION13:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23>
470 ; AVX2-NEXT: [[INDUCTION14:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>
471 ; AVX2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
472 ; AVX2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8
473 ; AVX2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 16
474 ; AVX2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 24
475 ; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[TMP0]]
476 ; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[TMP1]]
477 ; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[TMP2]]
478 ; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[TMP3]]
479 ; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP4]], i32 0
480 ; AVX2-NEXT: [[TMP9:%.*]] = bitcast i32 addrspace(1)* [[TMP8]] to <8 x i32> addrspace(1)*
481 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32> addrspace(1)* [[TMP9]], align 4, !alias.scope !11
482 ; AVX2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP4]], i32 8
483 ; AVX2-NEXT: [[TMP11:%.*]] = bitcast i32 addrspace(1)* [[TMP10]] to <8 x i32> addrspace(1)*
484 ; AVX2-NEXT: [[WIDE_LOAD15:%.*]] = load <8 x i32>, <8 x i32> addrspace(1)* [[TMP11]], align 4, !alias.scope !11
485 ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP4]], i32 16
486 ; AVX2-NEXT: [[TMP13:%.*]] = bitcast i32 addrspace(1)* [[TMP12]] to <8 x i32> addrspace(1)*
487 ; AVX2-NEXT: [[WIDE_LOAD16:%.*]] = load <8 x i32>, <8 x i32> addrspace(1)* [[TMP13]], align 4, !alias.scope !11
488 ; AVX2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP4]], i32 24
489 ; AVX2-NEXT: [[TMP15:%.*]] = bitcast i32 addrspace(1)* [[TMP14]] to <8 x i32> addrspace(1)*
490 ; AVX2-NEXT: [[WIDE_LOAD17:%.*]] = load <8 x i32>, <8 x i32> addrspace(1)* [[TMP15]], align 4, !alias.scope !11
491 ; AVX2-NEXT: [[TMP16:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
492 ; AVX2-NEXT: [[TMP17:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD15]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
493 ; AVX2-NEXT: [[TMP18:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD16]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
494 ; AVX2-NEXT: [[TMP19:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD17]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
495 ; AVX2-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[B]], i64 [[TMP0]]
496 ; AVX2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[B]], i64 [[TMP1]]
497 ; AVX2-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[B]], i64 [[TMP2]]
498 ; AVX2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[B]], i64 [[TMP3]]
499 ; AVX2-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP20]], i32 0
500 ; AVX2-NEXT: [[TMP25:%.*]] = bitcast i32 addrspace(1)* [[TMP24]] to <8 x i32> addrspace(1)*
501 ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1v8i32(<8 x i32> addrspace(1)* [[TMP25]], i32 4, <8 x i1> [[TMP16]], <8 x i32> undef), !alias.scope !14
502 ; AVX2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP20]], i32 8
503 ; AVX2-NEXT: [[TMP27:%.*]] = bitcast i32 addrspace(1)* [[TMP26]] to <8 x i32> addrspace(1)*
504 ; AVX2-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1v8i32(<8 x i32> addrspace(1)* [[TMP27]], i32 4, <8 x i1> [[TMP17]], <8 x i32> undef), !alias.scope !14
505 ; AVX2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP20]], i32 16
506 ; AVX2-NEXT: [[TMP29:%.*]] = bitcast i32 addrspace(1)* [[TMP28]] to <8 x i32> addrspace(1)*
507 ; AVX2-NEXT: [[WIDE_MASKED_LOAD19:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1v8i32(<8 x i32> addrspace(1)* [[TMP29]], i32 4, <8 x i1> [[TMP18]], <8 x i32> undef), !alias.scope !14
508 ; AVX2-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP20]], i32 24
509 ; AVX2-NEXT: [[TMP31:%.*]] = bitcast i32 addrspace(1)* [[TMP30]] to <8 x i32> addrspace(1)*
510 ; AVX2-NEXT: [[WIDE_MASKED_LOAD20:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1v8i32(<8 x i32> addrspace(1)* [[TMP31]], i32 4, <8 x i1> [[TMP19]], <8 x i32> undef), !alias.scope !14
511 ; AVX2-NEXT: [[TMP32:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]]
512 ; AVX2-NEXT: [[TMP33:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD18]], [[WIDE_LOAD15]]
513 ; AVX2-NEXT: [[TMP34:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD19]], [[WIDE_LOAD16]]
514 ; AVX2-NEXT: [[TMP35:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD20]], [[WIDE_LOAD17]]
515 ; AVX2-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[A]], i64 [[TMP0]]
516 ; AVX2-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[A]], i64 [[TMP1]]
517 ; AVX2-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[A]], i64 [[TMP2]]
518 ; AVX2-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[A]], i64 [[TMP3]]
519 ; AVX2-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP36]], i32 0
520 ; AVX2-NEXT: [[TMP41:%.*]] = bitcast i32 addrspace(1)* [[TMP40]] to <8 x i32> addrspace(1)*
521 ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p1v8i32(<8 x i32> [[TMP32]], <8 x i32> addrspace(1)* [[TMP41]], i32 4, <8 x i1> [[TMP16]]), !alias.scope !16, !noalias !18
522 ; AVX2-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP36]], i32 8
523 ; AVX2-NEXT: [[TMP43:%.*]] = bitcast i32 addrspace(1)* [[TMP42]] to <8 x i32> addrspace(1)*
524 ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p1v8i32(<8 x i32> [[TMP33]], <8 x i32> addrspace(1)* [[TMP43]], i32 4, <8 x i1> [[TMP17]]), !alias.scope !16, !noalias !18
525 ; AVX2-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP36]], i32 16
526 ; AVX2-NEXT: [[TMP45:%.*]] = bitcast i32 addrspace(1)* [[TMP44]] to <8 x i32> addrspace(1)*
527 ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p1v8i32(<8 x i32> [[TMP34]], <8 x i32> addrspace(1)* [[TMP45]], i32 4, <8 x i1> [[TMP18]]), !alias.scope !16, !noalias !18
528 ; AVX2-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP36]], i32 24
529 ; AVX2-NEXT: [[TMP47:%.*]] = bitcast i32 addrspace(1)* [[TMP46]] to <8 x i32> addrspace(1)*
530 ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p1v8i32(<8 x i32> [[TMP35]], <8 x i32> addrspace(1)* [[TMP47]], i32 4, <8 x i1> [[TMP19]]), !alias.scope !16, !noalias !18
531 ; AVX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 32
532 ; AVX2-NEXT: [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984
533 ; AVX2-NEXT: br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !19
534 ; AVX2: middle.block:
535 ; AVX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, 9984
536 ; AVX2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
538 ; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 9984, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
539 ; AVX2-NEXT: br label [[FOR_BODY:%.*]]
541 ; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
542 ; AVX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[INDVARS_IV]]
543 ; AVX2-NEXT: [[TMP49:%.*]] = load i32, i32 addrspace(1)* [[ARRAYIDX]], align 4
544 ; AVX2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP49]], 100
545 ; AVX2-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
547 ; AVX2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[B]], i64 [[INDVARS_IV]]
548 ; AVX2-NEXT: [[TMP50:%.*]] = load i32, i32 addrspace(1)* [[ARRAYIDX3]], align 4
549 ; AVX2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP50]], [[TMP49]]
550 ; AVX2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[A]], i64 [[INDVARS_IV]]
551 ; AVX2-NEXT: store i32 [[ADD]], i32 addrspace(1)* [[ARRAYIDX7]], align 4
552 ; AVX2-NEXT: br label [[FOR_INC]]
554 ; AVX2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
555 ; AVX2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000
556 ; AVX2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !20
558 ; AVX2-NEXT: ret void
560 ; AVX512-LABEL: @foo1_addrspace1(
561 ; AVX512-NEXT: entry:
562 ; AVX512-NEXT: [[A1:%.*]] = bitcast i32 addrspace(1)* [[A:%.*]] to i8 addrspace(1)*
563 ; AVX512-NEXT: [[TRIGGER3:%.*]] = bitcast i32 addrspace(1)* [[TRIGGER:%.*]] to i8 addrspace(1)*
564 ; AVX512-NEXT: [[B6:%.*]] = bitcast i32 addrspace(1)* [[B:%.*]] to i8 addrspace(1)*
565 ; AVX512-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
566 ; AVX512: vector.memcheck:
567 ; AVX512-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32 addrspace(1)* [[A]], i64 10000
568 ; AVX512-NEXT: [[SCEVGEP2:%.*]] = bitcast i32 addrspace(1)* [[SCEVGEP]] to i8 addrspace(1)*
569 ; AVX512-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32 addrspace(1)* [[TRIGGER]], i64 10000
570 ; AVX512-NEXT: [[SCEVGEP45:%.*]] = bitcast i32 addrspace(1)* [[SCEVGEP4]] to i8 addrspace(1)*
571 ; AVX512-NEXT: [[SCEVGEP7:%.*]] = getelementptr i32, i32 addrspace(1)* [[B]], i64 10000
572 ; AVX512-NEXT: [[SCEVGEP78:%.*]] = bitcast i32 addrspace(1)* [[SCEVGEP7]] to i8 addrspace(1)*
573 ; AVX512-NEXT: [[BOUND0:%.*]] = icmp ult i8 addrspace(1)* [[A1]], [[SCEVGEP45]]
574 ; AVX512-NEXT: [[BOUND1:%.*]] = icmp ult i8 addrspace(1)* [[TRIGGER3]], [[SCEVGEP2]]
575 ; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
576 ; AVX512-NEXT: [[BOUND09:%.*]] = icmp ult i8 addrspace(1)* [[A1]], [[SCEVGEP78]]
577 ; AVX512-NEXT: [[BOUND110:%.*]] = icmp ult i8 addrspace(1)* [[B6]], [[SCEVGEP2]]
578 ; AVX512-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
579 ; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
580 ; AVX512-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true
581 ; AVX512-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
583 ; AVX512-NEXT: br label [[VECTOR_BODY:%.*]]
584 ; AVX512: vector.body:
585 ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
586 ; AVX512-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> undef, i64 [[INDEX]], i32 0
587 ; AVX512-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> undef, <16 x i32> zeroinitializer
588 ; AVX512-NEXT: [[INDUCTION:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
589 ; AVX512-NEXT: [[INDUCTION12:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], <i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>
590 ; AVX512-NEXT: [[INDUCTION13:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], <i64 32, i64 33, i64 34, i64 35, i64 36, i64 37, i64 38, i64 39, i64 40, i64 41, i64 42, i64 43, i64 44, i64 45, i64 46, i64 47>
591 ; AVX512-NEXT: [[INDUCTION14:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], <i64 48, i64 49, i64 50, i64 51, i64 52, i64 53, i64 54, i64 55, i64 56, i64 57, i64 58, i64 59, i64 60, i64 61, i64 62, i64 63>
592 ; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
593 ; AVX512-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 16
594 ; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 32
595 ; AVX512-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 48
596 ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[TMP0]]
597 ; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[TMP1]]
598 ; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[TMP2]]
599 ; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[TMP3]]
600 ; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP4]], i32 0
601 ; AVX512-NEXT: [[TMP9:%.*]] = bitcast i32 addrspace(1)* [[TMP8]] to <16 x i32> addrspace(1)*
602 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32> addrspace(1)* [[TMP9]], align 4, !alias.scope !11
603 ; AVX512-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP4]], i32 16
604 ; AVX512-NEXT: [[TMP11:%.*]] = bitcast i32 addrspace(1)* [[TMP10]] to <16 x i32> addrspace(1)*
605 ; AVX512-NEXT: [[WIDE_LOAD15:%.*]] = load <16 x i32>, <16 x i32> addrspace(1)* [[TMP11]], align 4, !alias.scope !11
606 ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP4]], i32 32
607 ; AVX512-NEXT: [[TMP13:%.*]] = bitcast i32 addrspace(1)* [[TMP12]] to <16 x i32> addrspace(1)*
608 ; AVX512-NEXT: [[WIDE_LOAD16:%.*]] = load <16 x i32>, <16 x i32> addrspace(1)* [[TMP13]], align 4, !alias.scope !11
609 ; AVX512-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP4]], i32 48
610 ; AVX512-NEXT: [[TMP15:%.*]] = bitcast i32 addrspace(1)* [[TMP14]] to <16 x i32> addrspace(1)*
611 ; AVX512-NEXT: [[WIDE_LOAD17:%.*]] = load <16 x i32>, <16 x i32> addrspace(1)* [[TMP15]], align 4, !alias.scope !11
612 ; AVX512-NEXT: [[TMP16:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
613 ; AVX512-NEXT: [[TMP17:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD15]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
614 ; AVX512-NEXT: [[TMP18:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD16]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
615 ; AVX512-NEXT: [[TMP19:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD17]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
616 ; AVX512-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[B]], i64 [[TMP0]]
617 ; AVX512-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[B]], i64 [[TMP1]]
618 ; AVX512-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[B]], i64 [[TMP2]]
619 ; AVX512-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[B]], i64 [[TMP3]]
620 ; AVX512-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP20]], i32 0
621 ; AVX512-NEXT: [[TMP25:%.*]] = bitcast i32 addrspace(1)* [[TMP24]] to <16 x i32> addrspace(1)*
622 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1v16i32(<16 x i32> addrspace(1)* [[TMP25]], i32 4, <16 x i1> [[TMP16]], <16 x i32> undef), !alias.scope !14
623 ; AVX512-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP20]], i32 16
624 ; AVX512-NEXT: [[TMP27:%.*]] = bitcast i32 addrspace(1)* [[TMP26]] to <16 x i32> addrspace(1)*
625 ; AVX512-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1v16i32(<16 x i32> addrspace(1)* [[TMP27]], i32 4, <16 x i1> [[TMP17]], <16 x i32> undef), !alias.scope !14
626 ; AVX512-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP20]], i32 32
627 ; AVX512-NEXT: [[TMP29:%.*]] = bitcast i32 addrspace(1)* [[TMP28]] to <16 x i32> addrspace(1)*
628 ; AVX512-NEXT: [[WIDE_MASKED_LOAD19:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1v16i32(<16 x i32> addrspace(1)* [[TMP29]], i32 4, <16 x i1> [[TMP18]], <16 x i32> undef), !alias.scope !14
629 ; AVX512-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP20]], i32 48
630 ; AVX512-NEXT: [[TMP31:%.*]] = bitcast i32 addrspace(1)* [[TMP30]] to <16 x i32> addrspace(1)*
631 ; AVX512-NEXT: [[WIDE_MASKED_LOAD20:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1v16i32(<16 x i32> addrspace(1)* [[TMP31]], i32 4, <16 x i1> [[TMP19]], <16 x i32> undef), !alias.scope !14
632 ; AVX512-NEXT: [[TMP32:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]]
633 ; AVX512-NEXT: [[TMP33:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD18]], [[WIDE_LOAD15]]
634 ; AVX512-NEXT: [[TMP34:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD19]], [[WIDE_LOAD16]]
635 ; AVX512-NEXT: [[TMP35:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD20]], [[WIDE_LOAD17]]
636 ; AVX512-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[A]], i64 [[TMP0]]
637 ; AVX512-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[A]], i64 [[TMP1]]
638 ; AVX512-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[A]], i64 [[TMP2]]
639 ; AVX512-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[A]], i64 [[TMP3]]
640 ; AVX512-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP36]], i32 0
641 ; AVX512-NEXT: [[TMP41:%.*]] = bitcast i32 addrspace(1)* [[TMP40]] to <16 x i32> addrspace(1)*
642 ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p1v16i32(<16 x i32> [[TMP32]], <16 x i32> addrspace(1)* [[TMP41]], i32 4, <16 x i1> [[TMP16]]), !alias.scope !16, !noalias !18
643 ; AVX512-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP36]], i32 16
644 ; AVX512-NEXT: [[TMP43:%.*]] = bitcast i32 addrspace(1)* [[TMP42]] to <16 x i32> addrspace(1)*
645 ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p1v16i32(<16 x i32> [[TMP33]], <16 x i32> addrspace(1)* [[TMP43]], i32 4, <16 x i1> [[TMP17]]), !alias.scope !16, !noalias !18
646 ; AVX512-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP36]], i32 32
647 ; AVX512-NEXT: [[TMP45:%.*]] = bitcast i32 addrspace(1)* [[TMP44]] to <16 x i32> addrspace(1)*
648 ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p1v16i32(<16 x i32> [[TMP34]], <16 x i32> addrspace(1)* [[TMP45]], i32 4, <16 x i1> [[TMP18]]), !alias.scope !16, !noalias !18
649 ; AVX512-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP36]], i32 48
650 ; AVX512-NEXT: [[TMP47:%.*]] = bitcast i32 addrspace(1)* [[TMP46]] to <16 x i32> addrspace(1)*
651 ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p1v16i32(<16 x i32> [[TMP35]], <16 x i32> addrspace(1)* [[TMP47]], i32 4, <16 x i1> [[TMP19]]), !alias.scope !16, !noalias !18
652 ; AVX512-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 64
653 ; AVX512-NEXT: [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984
654 ; AVX512-NEXT: br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !19
655 ; AVX512: middle.block:
656 ; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, 9984
657 ; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
659 ; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 9984, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
660 ; AVX512-NEXT: br label [[FOR_BODY:%.*]]
662 ; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
663 ; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[INDVARS_IV]]
664 ; AVX512-NEXT: [[TMP49:%.*]] = load i32, i32 addrspace(1)* [[ARRAYIDX]], align 4
665 ; AVX512-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP49]], 100
666 ; AVX512-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
668 ; AVX512-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[B]], i64 [[INDVARS_IV]]
669 ; AVX512-NEXT: [[TMP50:%.*]] = load i32, i32 addrspace(1)* [[ARRAYIDX3]], align 4
670 ; AVX512-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP50]], [[TMP49]]
671 ; AVX512-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[A]], i64 [[INDVARS_IV]]
672 ; AVX512-NEXT: store i32 [[ADD]], i32 addrspace(1)* [[ARRAYIDX7]], align 4
673 ; AVX512-NEXT: br label [[FOR_INC]]
675 ; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
676 ; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000
677 ; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !20
679 ; AVX512-NEXT: ret void
684 for.body: ; preds = %for.inc, %entry
685 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
686 %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %trigger, i64 %indvars.iv
687 %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
688 %cmp1 = icmp slt i32 %0, 100
689 br i1 %cmp1, label %if.then, label %for.inc
691 if.then: ; preds = %for.body
692 %arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %B, i64 %indvars.iv
693 %1 = load i32, i32 addrspace(1)* %arrayidx3, align 4
694 %add = add nsw i32 %1, %0
695 %arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %A, i64 %indvars.iv
696 store i32 %add, i32 addrspace(1)* %arrayidx7, align 4
699 for.inc: ; preds = %for.body, %if.then
700 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
701 %exitcond = icmp eq i64 %indvars.iv.next, 10000
702 br i1 %exitcond, label %for.end, label %for.body
704 for.end: ; preds = %for.inc
710 ;void foo2(float *A, float *B, int *trigger) {
712 ; for (int i=0; i<10000; i++) {
713 ; if (trigger[i] < 100) {
714 ; A[i] = B[i] + trigger[i];
719 define void @foo2(float* nocapture %A, float* nocapture readonly %B, i32* nocapture readonly %trigger) local_unnamed_addr #0 {
722 ; AVX1-NEXT: [[A1:%.*]] = bitcast float* [[A:%.*]] to i8*
723 ; AVX1-NEXT: [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8*
724 ; AVX1-NEXT: [[B6:%.*]] = bitcast float* [[B:%.*]] to i8*
725 ; AVX1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
726 ; AVX1: vector.memcheck:
727 ; AVX1-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[A]], i64 10000
728 ; AVX1-NEXT: [[SCEVGEP2:%.*]] = bitcast float* [[SCEVGEP]] to i8*
729 ; AVX1-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 10000
730 ; AVX1-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
731 ; AVX1-NEXT: [[SCEVGEP7:%.*]] = getelementptr float, float* [[B]], i64 10000
732 ; AVX1-NEXT: [[SCEVGEP78:%.*]] = bitcast float* [[SCEVGEP7]] to i8*
733 ; AVX1-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP45]]
734 ; AVX1-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]]
735 ; AVX1-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
736 ; AVX1-NEXT: [[BOUND09:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP78]]
737 ; AVX1-NEXT: [[BOUND110:%.*]] = icmp ult i8* [[B6]], [[SCEVGEP2]]
738 ; AVX1-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
739 ; AVX1-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
740 ; AVX1-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true
741 ; AVX1-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
743 ; AVX1-NEXT: br label [[VECTOR_BODY:%.*]]
745 ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
746 ; AVX1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0
747 ; AVX1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer
748 ; AVX1-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
749 ; AVX1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
750 ; AVX1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]]
751 ; AVX1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
752 ; AVX1-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <8 x i32>*
753 ; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP3]], align 4, !alias.scope !21
754 ; AVX1-NEXT: [[TMP4:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
755 ; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP0]]
756 ; AVX1-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP5]], i32 0
757 ; AVX1-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <8 x float>*
758 ; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP7]], i32 4, <8 x i1> [[TMP4]], <8 x float> undef), !alias.scope !24
759 ; AVX1-NEXT: [[TMP8:%.*]] = sitofp <8 x i32> [[WIDE_LOAD]] to <8 x float>
760 ; AVX1-NEXT: [[TMP9:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD]], [[TMP8]]
761 ; AVX1-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP0]]
762 ; AVX1-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, float* [[TMP10]], i32 0
763 ; AVX1-NEXT: [[TMP12:%.*]] = bitcast float* [[TMP11]] to <8 x float>*
764 ; AVX1-NEXT: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP9]], <8 x float>* [[TMP12]], i32 4, <8 x i1> [[TMP4]]), !alias.scope !26, !noalias !28
765 ; AVX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
766 ; AVX1-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
767 ; AVX1-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !29
768 ; AVX1: middle.block:
769 ; AVX1-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, 10000
770 ; AVX1-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
772 ; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
773 ; AVX1-NEXT: br label [[FOR_BODY:%.*]]
775 ; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
776 ; AVX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
777 ; AVX1-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
778 ; AVX1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP14]], 100
779 ; AVX1-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
781 ; AVX1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]]
782 ; AVX1-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX3]], align 4
783 ; AVX1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP14]] to float
784 ; AVX1-NEXT: [[ADD:%.*]] = fadd float [[TMP15]], [[CONV]]
785 ; AVX1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
786 ; AVX1-NEXT: store float [[ADD]], float* [[ARRAYIDX7]], align 4
787 ; AVX1-NEXT: br label [[FOR_INC]]
789 ; AVX1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
790 ; AVX1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000
791 ; AVX1-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !30
793 ; AVX1-NEXT: ret void
797 ; AVX2-NEXT: [[A1:%.*]] = bitcast float* [[A:%.*]] to i8*
798 ; AVX2-NEXT: [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8*
799 ; AVX2-NEXT: [[B6:%.*]] = bitcast float* [[B:%.*]] to i8*
800 ; AVX2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
801 ; AVX2: vector.memcheck:
802 ; AVX2-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[A]], i64 10000
803 ; AVX2-NEXT: [[SCEVGEP2:%.*]] = bitcast float* [[SCEVGEP]] to i8*
804 ; AVX2-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 10000
805 ; AVX2-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
806 ; AVX2-NEXT: [[SCEVGEP7:%.*]] = getelementptr float, float* [[B]], i64 10000
807 ; AVX2-NEXT: [[SCEVGEP78:%.*]] = bitcast float* [[SCEVGEP7]] to i8*
808 ; AVX2-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP45]]
809 ; AVX2-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]]
810 ; AVX2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
811 ; AVX2-NEXT: [[BOUND09:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP78]]
812 ; AVX2-NEXT: [[BOUND110:%.*]] = icmp ult i8* [[B6]], [[SCEVGEP2]]
813 ; AVX2-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
814 ; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
815 ; AVX2-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true
816 ; AVX2-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
818 ; AVX2-NEXT: br label [[VECTOR_BODY:%.*]]
820 ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
821 ; AVX2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0
822 ; AVX2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer
823 ; AVX2-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
824 ; AVX2-NEXT: [[INDUCTION12:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
825 ; AVX2-NEXT: [[INDUCTION13:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23>
826 ; AVX2-NEXT: [[INDUCTION14:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>
827 ; AVX2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
828 ; AVX2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8
829 ; AVX2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 16
830 ; AVX2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 24
831 ; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]]
832 ; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]]
833 ; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]]
834 ; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP3]]
835 ; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
836 ; AVX2-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <8 x i32>*
837 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP9]], align 4, !alias.scope !21
838 ; AVX2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 8
839 ; AVX2-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <8 x i32>*
840 ; AVX2-NEXT: [[WIDE_LOAD15:%.*]] = load <8 x i32>, <8 x i32>* [[TMP11]], align 4, !alias.scope !21
841 ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 16
842 ; AVX2-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <8 x i32>*
843 ; AVX2-NEXT: [[WIDE_LOAD16:%.*]] = load <8 x i32>, <8 x i32>* [[TMP13]], align 4, !alias.scope !21
844 ; AVX2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 24
845 ; AVX2-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <8 x i32>*
846 ; AVX2-NEXT: [[WIDE_LOAD17:%.*]] = load <8 x i32>, <8 x i32>* [[TMP15]], align 4, !alias.scope !21
847 ; AVX2-NEXT: [[TMP16:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
848 ; AVX2-NEXT: [[TMP17:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD15]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
849 ; AVX2-NEXT: [[TMP18:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD16]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
850 ; AVX2-NEXT: [[TMP19:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD17]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
851 ; AVX2-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP0]]
852 ; AVX2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP1]]
853 ; AVX2-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP2]]
854 ; AVX2-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP3]]
855 ; AVX2-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 0
856 ; AVX2-NEXT: [[TMP25:%.*]] = bitcast float* [[TMP24]] to <8 x float>*
857 ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP25]], i32 4, <8 x i1> [[TMP16]], <8 x float> undef), !alias.scope !24
858 ; AVX2-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 8
859 ; AVX2-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP26]] to <8 x float>*
860 ; AVX2-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP27]], i32 4, <8 x i1> [[TMP17]], <8 x float> undef), !alias.scope !24
861 ; AVX2-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 16
862 ; AVX2-NEXT: [[TMP29:%.*]] = bitcast float* [[TMP28]] to <8 x float>*
863 ; AVX2-NEXT: [[WIDE_MASKED_LOAD19:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP29]], i32 4, <8 x i1> [[TMP18]], <8 x float> undef), !alias.scope !24
864 ; AVX2-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 24
865 ; AVX2-NEXT: [[TMP31:%.*]] = bitcast float* [[TMP30]] to <8 x float>*
866 ; AVX2-NEXT: [[WIDE_MASKED_LOAD20:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP31]], i32 4, <8 x i1> [[TMP19]], <8 x float> undef), !alias.scope !24
867 ; AVX2-NEXT: [[TMP32:%.*]] = sitofp <8 x i32> [[WIDE_LOAD]] to <8 x float>
868 ; AVX2-NEXT: [[TMP33:%.*]] = sitofp <8 x i32> [[WIDE_LOAD15]] to <8 x float>
869 ; AVX2-NEXT: [[TMP34:%.*]] = sitofp <8 x i32> [[WIDE_LOAD16]] to <8 x float>
870 ; AVX2-NEXT: [[TMP35:%.*]] = sitofp <8 x i32> [[WIDE_LOAD17]] to <8 x float>
871 ; AVX2-NEXT: [[TMP36:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD]], [[TMP32]]
872 ; AVX2-NEXT: [[TMP37:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD18]], [[TMP33]]
873 ; AVX2-NEXT: [[TMP38:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD19]], [[TMP34]]
874 ; AVX2-NEXT: [[TMP39:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD20]], [[TMP35]]
875 ; AVX2-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP0]]
876 ; AVX2-NEXT: [[TMP41:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP1]]
877 ; AVX2-NEXT: [[TMP42:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP2]]
878 ; AVX2-NEXT: [[TMP43:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP3]]
879 ; AVX2-NEXT: [[TMP44:%.*]] = getelementptr inbounds float, float* [[TMP40]], i32 0
880 ; AVX2-NEXT: [[TMP45:%.*]] = bitcast float* [[TMP44]] to <8 x float>*
881 ; AVX2-NEXT: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP36]], <8 x float>* [[TMP45]], i32 4, <8 x i1> [[TMP16]]), !alias.scope !26, !noalias !28
882 ; AVX2-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, float* [[TMP40]], i32 8
883 ; AVX2-NEXT: [[TMP47:%.*]] = bitcast float* [[TMP46]] to <8 x float>*
884 ; AVX2-NEXT: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP37]], <8 x float>* [[TMP47]], i32 4, <8 x i1> [[TMP17]]), !alias.scope !26, !noalias !28
885 ; AVX2-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, float* [[TMP40]], i32 16
886 ; AVX2-NEXT: [[TMP49:%.*]] = bitcast float* [[TMP48]] to <8 x float>*
887 ; AVX2-NEXT: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP38]], <8 x float>* [[TMP49]], i32 4, <8 x i1> [[TMP18]]), !alias.scope !26, !noalias !28
888 ; AVX2-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, float* [[TMP40]], i32 24
889 ; AVX2-NEXT: [[TMP51:%.*]] = bitcast float* [[TMP50]] to <8 x float>*
890 ; AVX2-NEXT: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP39]], <8 x float>* [[TMP51]], i32 4, <8 x i1> [[TMP19]]), !alias.scope !26, !noalias !28
891 ; AVX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 32
892 ; AVX2-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984
893 ; AVX2-NEXT: br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !29
894 ; AVX2: middle.block:
895 ; AVX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, 9984
896 ; AVX2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
898 ; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 9984, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
899 ; AVX2-NEXT: br label [[FOR_BODY:%.*]]
901 ; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
902 ; AVX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
903 ; AVX2-NEXT: [[TMP53:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
904 ; AVX2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP53]], 100
905 ; AVX2-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
907 ; AVX2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]]
908 ; AVX2-NEXT: [[TMP54:%.*]] = load float, float* [[ARRAYIDX3]], align 4
909 ; AVX2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP53]] to float
910 ; AVX2-NEXT: [[ADD:%.*]] = fadd float [[TMP54]], [[CONV]]
911 ; AVX2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
912 ; AVX2-NEXT: store float [[ADD]], float* [[ARRAYIDX7]], align 4
913 ; AVX2-NEXT: br label [[FOR_INC]]
915 ; AVX2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
916 ; AVX2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000
917 ; AVX2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !30
919 ; AVX2-NEXT: ret void
921 ; AVX512-LABEL: @foo2(
922 ; AVX512-NEXT: entry:
923 ; AVX512-NEXT: [[A1:%.*]] = bitcast float* [[A:%.*]] to i8*
924 ; AVX512-NEXT: [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8*
925 ; AVX512-NEXT: [[B6:%.*]] = bitcast float* [[B:%.*]] to i8*
926 ; AVX512-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
927 ; AVX512: vector.memcheck:
928 ; AVX512-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[A]], i64 10000
929 ; AVX512-NEXT: [[SCEVGEP2:%.*]] = bitcast float* [[SCEVGEP]] to i8*
930 ; AVX512-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 10000
931 ; AVX512-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
932 ; AVX512-NEXT: [[SCEVGEP7:%.*]] = getelementptr float, float* [[B]], i64 10000
933 ; AVX512-NEXT: [[SCEVGEP78:%.*]] = bitcast float* [[SCEVGEP7]] to i8*
934 ; AVX512-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP45]]
935 ; AVX512-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]]
936 ; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
937 ; AVX512-NEXT: [[BOUND09:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP78]]
938 ; AVX512-NEXT: [[BOUND110:%.*]] = icmp ult i8* [[B6]], [[SCEVGEP2]]
939 ; AVX512-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
940 ; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
941 ; AVX512-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true
942 ; AVX512-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
944 ; AVX512-NEXT: br label [[VECTOR_BODY:%.*]]
945 ; AVX512: vector.body:
946 ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
947 ; AVX512-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> undef, i64 [[INDEX]], i32 0
948 ; AVX512-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> undef, <16 x i32> zeroinitializer
949 ; AVX512-NEXT: [[INDUCTION:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
950 ; AVX512-NEXT: [[INDUCTION12:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], <i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>
951 ; AVX512-NEXT: [[INDUCTION13:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], <i64 32, i64 33, i64 34, i64 35, i64 36, i64 37, i64 38, i64 39, i64 40, i64 41, i64 42, i64 43, i64 44, i64 45, i64 46, i64 47>
952 ; AVX512-NEXT: [[INDUCTION14:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], <i64 48, i64 49, i64 50, i64 51, i64 52, i64 53, i64 54, i64 55, i64 56, i64 57, i64 58, i64 59, i64 60, i64 61, i64 62, i64 63>
953 ; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
954 ; AVX512-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 16
955 ; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 32
956 ; AVX512-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 48
957 ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]]
958 ; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]]
959 ; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]]
960 ; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP3]]
961 ; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
962 ; AVX512-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <16 x i32>*
963 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP9]], align 4, !alias.scope !21
964 ; AVX512-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 16
965 ; AVX512-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <16 x i32>*
966 ; AVX512-NEXT: [[WIDE_LOAD15:%.*]] = load <16 x i32>, <16 x i32>* [[TMP11]], align 4, !alias.scope !21
967 ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 32
968 ; AVX512-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <16 x i32>*
969 ; AVX512-NEXT: [[WIDE_LOAD16:%.*]] = load <16 x i32>, <16 x i32>* [[TMP13]], align 4, !alias.scope !21
970 ; AVX512-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 48
971 ; AVX512-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <16 x i32>*
972 ; AVX512-NEXT: [[WIDE_LOAD17:%.*]] = load <16 x i32>, <16 x i32>* [[TMP15]], align 4, !alias.scope !21
973 ; AVX512-NEXT: [[TMP16:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
974 ; AVX512-NEXT: [[TMP17:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD15]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
975 ; AVX512-NEXT: [[TMP18:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD16]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
976 ; AVX512-NEXT: [[TMP19:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD17]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
977 ; AVX512-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP0]]
978 ; AVX512-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP1]]
979 ; AVX512-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP2]]
980 ; AVX512-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP3]]
981 ; AVX512-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 0
982 ; AVX512-NEXT: [[TMP25:%.*]] = bitcast float* [[TMP24]] to <16 x float>*
983 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* [[TMP25]], i32 4, <16 x i1> [[TMP16]], <16 x float> undef), !alias.scope !24
984 ; AVX512-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 16
985 ; AVX512-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP26]] to <16 x float>*
986 ; AVX512-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* [[TMP27]], i32 4, <16 x i1> [[TMP17]], <16 x float> undef), !alias.scope !24
987 ; AVX512-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 32
988 ; AVX512-NEXT: [[TMP29:%.*]] = bitcast float* [[TMP28]] to <16 x float>*
989 ; AVX512-NEXT: [[WIDE_MASKED_LOAD19:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* [[TMP29]], i32 4, <16 x i1> [[TMP18]], <16 x float> undef), !alias.scope !24
990 ; AVX512-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 48
991 ; AVX512-NEXT: [[TMP31:%.*]] = bitcast float* [[TMP30]] to <16 x float>*
992 ; AVX512-NEXT: [[WIDE_MASKED_LOAD20:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* [[TMP31]], i32 4, <16 x i1> [[TMP19]], <16 x float> undef), !alias.scope !24
993 ; AVX512-NEXT: [[TMP32:%.*]] = sitofp <16 x i32> [[WIDE_LOAD]] to <16 x float>
994 ; AVX512-NEXT: [[TMP33:%.*]] = sitofp <16 x i32> [[WIDE_LOAD15]] to <16 x float>
995 ; AVX512-NEXT: [[TMP34:%.*]] = sitofp <16 x i32> [[WIDE_LOAD16]] to <16 x float>
996 ; AVX512-NEXT: [[TMP35:%.*]] = sitofp <16 x i32> [[WIDE_LOAD17]] to <16 x float>
997 ; AVX512-NEXT: [[TMP36:%.*]] = fadd <16 x float> [[WIDE_MASKED_LOAD]], [[TMP32]]
998 ; AVX512-NEXT: [[TMP37:%.*]] = fadd <16 x float> [[WIDE_MASKED_LOAD18]], [[TMP33]]
999 ; AVX512-NEXT: [[TMP38:%.*]] = fadd <16 x float> [[WIDE_MASKED_LOAD19]], [[TMP34]]
1000 ; AVX512-NEXT: [[TMP39:%.*]] = fadd <16 x float> [[WIDE_MASKED_LOAD20]], [[TMP35]]
1001 ; AVX512-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP0]]
1002 ; AVX512-NEXT: [[TMP41:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP1]]
1003 ; AVX512-NEXT: [[TMP42:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP2]]
1004 ; AVX512-NEXT: [[TMP43:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP3]]
1005 ; AVX512-NEXT: [[TMP44:%.*]] = getelementptr inbounds float, float* [[TMP40]], i32 0
1006 ; AVX512-NEXT: [[TMP45:%.*]] = bitcast float* [[TMP44]] to <16 x float>*
1007 ; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> [[TMP36]], <16 x float>* [[TMP45]], i32 4, <16 x i1> [[TMP16]]), !alias.scope !26, !noalias !28
1008 ; AVX512-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, float* [[TMP40]], i32 16
1009 ; AVX512-NEXT: [[TMP47:%.*]] = bitcast float* [[TMP46]] to <16 x float>*
1010 ; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> [[TMP37]], <16 x float>* [[TMP47]], i32 4, <16 x i1> [[TMP17]]), !alias.scope !26, !noalias !28
1011 ; AVX512-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, float* [[TMP40]], i32 32
1012 ; AVX512-NEXT: [[TMP49:%.*]] = bitcast float* [[TMP48]] to <16 x float>*
1013 ; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> [[TMP38]], <16 x float>* [[TMP49]], i32 4, <16 x i1> [[TMP18]]), !alias.scope !26, !noalias !28
1014 ; AVX512-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, float* [[TMP40]], i32 48
1015 ; AVX512-NEXT: [[TMP51:%.*]] = bitcast float* [[TMP50]] to <16 x float>*
1016 ; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> [[TMP39]], <16 x float>* [[TMP51]], i32 4, <16 x i1> [[TMP19]]), !alias.scope !26, !noalias !28
1017 ; AVX512-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 64
1018 ; AVX512-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984
1019 ; AVX512-NEXT: br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !29
1020 ; AVX512: middle.block:
1021 ; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, 9984
1022 ; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1023 ; AVX512: scalar.ph:
1024 ; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 9984, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
1025 ; AVX512-NEXT: br label [[FOR_BODY:%.*]]
1027 ; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
1028 ; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
1029 ; AVX512-NEXT: [[TMP53:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1030 ; AVX512-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP53]], 100
1031 ; AVX512-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
1033 ; AVX512-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]]
1034 ; AVX512-NEXT: [[TMP54:%.*]] = load float, float* [[ARRAYIDX3]], align 4
1035 ; AVX512-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP53]] to float
1036 ; AVX512-NEXT: [[ADD:%.*]] = fadd float [[TMP54]], [[CONV]]
1037 ; AVX512-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
1038 ; AVX512-NEXT: store float [[ADD]], float* [[ARRAYIDX7]], align 4
1039 ; AVX512-NEXT: br label [[FOR_INC]]
1041 ; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1042 ; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000
1043 ; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !30
1045 ; AVX512-NEXT: ret void
1050 for.body: ; preds = %for.inc, %entry
1051 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
1052 %arrayidx = getelementptr inbounds i32, i32* %trigger, i64 %indvars.iv
1053 %0 = load i32, i32* %arrayidx, align 4
1054 %cmp1 = icmp slt i32 %0, 100
1055 br i1 %cmp1, label %if.then, label %for.inc
1057 if.then: ; preds = %for.body
1058 %arrayidx3 = getelementptr inbounds float, float* %B, i64 %indvars.iv
1059 %1 = load float, float* %arrayidx3, align 4
1060 %conv = sitofp i32 %0 to float
1061 %add = fadd float %1, %conv
1062 %arrayidx7 = getelementptr inbounds float, float* %A, i64 %indvars.iv
1063 store float %add, float* %arrayidx7, align 4
1066 for.inc: ; preds = %for.body, %if.then
1067 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1068 %exitcond = icmp eq i64 %indvars.iv.next, 10000
1069 br i1 %exitcond, label %for.end, label %for.body
1071 for.end: ; preds = %for.inc
1077 ;void foo3(double *A, double *B, int *trigger) {
1079 ; for (int i=0; i<10000; i++) {
1080 ; if (trigger[i] < 100) {
1081 ; A[i] = B[i] + trigger[i];
1086 define void @foo3(double* nocapture %A, double* nocapture readonly %B, i32* nocapture readonly %trigger) local_unnamed_addr #0 {
1089 ; AVX-NEXT: [[A1:%.*]] = bitcast double* [[A:%.*]] to i8*
1090 ; AVX-NEXT: [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8*
1091 ; AVX-NEXT: [[B6:%.*]] = bitcast double* [[B:%.*]] to i8*
1092 ; AVX-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
1093 ; AVX: vector.memcheck:
1094 ; AVX-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[A]], i64 10000
1095 ; AVX-NEXT: [[SCEVGEP2:%.*]] = bitcast double* [[SCEVGEP]] to i8*
1096 ; AVX-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 10000
1097 ; AVX-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
1098 ; AVX-NEXT: [[SCEVGEP7:%.*]] = getelementptr double, double* [[B]], i64 10000
1099 ; AVX-NEXT: [[SCEVGEP78:%.*]] = bitcast double* [[SCEVGEP7]] to i8*
1100 ; AVX-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP45]]
1101 ; AVX-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]]
1102 ; AVX-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1103 ; AVX-NEXT: [[BOUND09:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP78]]
1104 ; AVX-NEXT: [[BOUND110:%.*]] = icmp ult i8* [[B6]], [[SCEVGEP2]]
1105 ; AVX-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
1106 ; AVX-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
1107 ; AVX-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true
1108 ; AVX-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1110 ; AVX-NEXT: br label [[VECTOR_BODY:%.*]]
1112 ; AVX-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1113 ; AVX-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0
1114 ; AVX-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
1115 ; AVX-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
1116 ; AVX-NEXT: [[INDUCTION12:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 4, i64 5, i64 6, i64 7>
1117 ; AVX-NEXT: [[INDUCTION13:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 8, i64 9, i64 10, i64 11>
1118 ; AVX-NEXT: [[INDUCTION14:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 12, i64 13, i64 14, i64 15>
1119 ; AVX-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1120 ; AVX-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4
1121 ; AVX-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8
1122 ; AVX-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12
1123 ; AVX-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]]
1124 ; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]]
1125 ; AVX-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]]
1126 ; AVX-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP3]]
1127 ; AVX-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
1128 ; AVX-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>*
1129 ; AVX-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4, !alias.scope !31
1130 ; AVX-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 4
1131 ; AVX-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>*
1132 ; AVX-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP11]], align 4, !alias.scope !31
1133 ; AVX-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 8
1134 ; AVX-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>*
1135 ; AVX-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x i32>, <4 x i32>* [[TMP13]], align 4, !alias.scope !31
1136 ; AVX-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 12
1137 ; AVX-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <4 x i32>*
1138 ; AVX-NEXT: [[WIDE_LOAD17:%.*]] = load <4 x i32>, <4 x i32>* [[TMP15]], align 4, !alias.scope !31
1139 ; AVX-NEXT: [[TMP16:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100>
1140 ; AVX-NEXT: [[TMP17:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD15]], <i32 100, i32 100, i32 100, i32 100>
1141 ; AVX-NEXT: [[TMP18:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD16]], <i32 100, i32 100, i32 100, i32 100>
1142 ; AVX-NEXT: [[TMP19:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD17]], <i32 100, i32 100, i32 100, i32 100>
1143 ; AVX-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP0]]
1144 ; AVX-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP1]]
1145 ; AVX-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP2]]
1146 ; AVX-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP3]]
1147 ; AVX-NEXT: [[TMP24:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 0
1148 ; AVX-NEXT: [[TMP25:%.*]] = bitcast double* [[TMP24]] to <4 x double>*
1149 ; AVX-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP25]], i32 8, <4 x i1> [[TMP16]], <4 x double> undef), !alias.scope !34
1150 ; AVX-NEXT: [[TMP26:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 4
1151 ; AVX-NEXT: [[TMP27:%.*]] = bitcast double* [[TMP26]] to <4 x double>*
1152 ; AVX-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP27]], i32 8, <4 x i1> [[TMP17]], <4 x double> undef), !alias.scope !34
1153 ; AVX-NEXT: [[TMP28:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 8
1154 ; AVX-NEXT: [[TMP29:%.*]] = bitcast double* [[TMP28]] to <4 x double>*
1155 ; AVX-NEXT: [[WIDE_MASKED_LOAD19:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP29]], i32 8, <4 x i1> [[TMP18]], <4 x double> undef), !alias.scope !34
1156 ; AVX-NEXT: [[TMP30:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 12
1157 ; AVX-NEXT: [[TMP31:%.*]] = bitcast double* [[TMP30]] to <4 x double>*
1158 ; AVX-NEXT: [[WIDE_MASKED_LOAD20:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP31]], i32 8, <4 x i1> [[TMP19]], <4 x double> undef), !alias.scope !34
1159 ; AVX-NEXT: [[TMP32:%.*]] = sitofp <4 x i32> [[WIDE_LOAD]] to <4 x double>
1160 ; AVX-NEXT: [[TMP33:%.*]] = sitofp <4 x i32> [[WIDE_LOAD15]] to <4 x double>
1161 ; AVX-NEXT: [[TMP34:%.*]] = sitofp <4 x i32> [[WIDE_LOAD16]] to <4 x double>
1162 ; AVX-NEXT: [[TMP35:%.*]] = sitofp <4 x i32> [[WIDE_LOAD17]] to <4 x double>
1163 ; AVX-NEXT: [[TMP36:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], [[TMP32]]
1164 ; AVX-NEXT: [[TMP37:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD18]], [[TMP33]]
1165 ; AVX-NEXT: [[TMP38:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD19]], [[TMP34]]
1166 ; AVX-NEXT: [[TMP39:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD20]], [[TMP35]]
1167 ; AVX-NEXT: [[TMP40:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[TMP0]]
1168 ; AVX-NEXT: [[TMP41:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[TMP1]]
1169 ; AVX-NEXT: [[TMP42:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[TMP2]]
1170 ; AVX-NEXT: [[TMP43:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[TMP3]]
1171 ; AVX-NEXT: [[TMP44:%.*]] = getelementptr inbounds double, double* [[TMP40]], i32 0
1172 ; AVX-NEXT: [[TMP45:%.*]] = bitcast double* [[TMP44]] to <4 x double>*
1173 ; AVX-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[TMP36]], <4 x double>* [[TMP45]], i32 8, <4 x i1> [[TMP16]]), !alias.scope !36, !noalias !38
1174 ; AVX-NEXT: [[TMP46:%.*]] = getelementptr inbounds double, double* [[TMP40]], i32 4
1175 ; AVX-NEXT: [[TMP47:%.*]] = bitcast double* [[TMP46]] to <4 x double>*
1176 ; AVX-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[TMP37]], <4 x double>* [[TMP47]], i32 8, <4 x i1> [[TMP17]]), !alias.scope !36, !noalias !38
1177 ; AVX-NEXT: [[TMP48:%.*]] = getelementptr inbounds double, double* [[TMP40]], i32 8
1178 ; AVX-NEXT: [[TMP49:%.*]] = bitcast double* [[TMP48]] to <4 x double>*
1179 ; AVX-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[TMP38]], <4 x double>* [[TMP49]], i32 8, <4 x i1> [[TMP18]]), !alias.scope !36, !noalias !38
1180 ; AVX-NEXT: [[TMP50:%.*]] = getelementptr inbounds double, double* [[TMP40]], i32 12
1181 ; AVX-NEXT: [[TMP51:%.*]] = bitcast double* [[TMP50]] to <4 x double>*
1182 ; AVX-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[TMP39]], <4 x double>* [[TMP51]], i32 8, <4 x i1> [[TMP19]]), !alias.scope !36, !noalias !38
1183 ; AVX-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
1184 ; AVX-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
1185 ; AVX-NEXT: br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !39
1186 ; AVX: middle.block:
1187 ; AVX-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, 10000
1188 ; AVX-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1190 ; AVX-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
1191 ; AVX-NEXT: br label [[FOR_BODY:%.*]]
1193 ; AVX-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
1194 ; AVX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
1195 ; AVX-NEXT: [[TMP53:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1196 ; AVX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP53]], 100
1197 ; AVX-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
1199 ; AVX-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDVARS_IV]]
1200 ; AVX-NEXT: [[TMP54:%.*]] = load double, double* [[ARRAYIDX3]], align 8
1201 ; AVX-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP53]] to double
1202 ; AVX-NEXT: [[ADD:%.*]] = fadd double [[TMP54]], [[CONV]]
1203 ; AVX-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV]]
1204 ; AVX-NEXT: store double [[ADD]], double* [[ARRAYIDX7]], align 8
1205 ; AVX-NEXT: br label [[FOR_INC]]
1207 ; AVX-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1208 ; AVX-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000
1209 ; AVX-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !40
1211 ; AVX-NEXT: ret void
1213 ; AVX512-LABEL: @foo3(
1214 ; AVX512-NEXT: entry:
1215 ; AVX512-NEXT: [[A1:%.*]] = bitcast double* [[A:%.*]] to i8*
1216 ; AVX512-NEXT: [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8*
1217 ; AVX512-NEXT: [[B6:%.*]] = bitcast double* [[B:%.*]] to i8*
1218 ; AVX512-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
1219 ; AVX512: vector.memcheck:
1220 ; AVX512-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[A]], i64 10000
1221 ; AVX512-NEXT: [[SCEVGEP2:%.*]] = bitcast double* [[SCEVGEP]] to i8*
1222 ; AVX512-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 10000
1223 ; AVX512-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
1224 ; AVX512-NEXT: [[SCEVGEP7:%.*]] = getelementptr double, double* [[B]], i64 10000
1225 ; AVX512-NEXT: [[SCEVGEP78:%.*]] = bitcast double* [[SCEVGEP7]] to i8*
1226 ; AVX512-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP45]]
1227 ; AVX512-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]]
1228 ; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1229 ; AVX512-NEXT: [[BOUND09:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP78]]
1230 ; AVX512-NEXT: [[BOUND110:%.*]] = icmp ult i8* [[B6]], [[SCEVGEP2]]
1231 ; AVX512-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
1232 ; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
1233 ; AVX512-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true
1234 ; AVX512-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1235 ; AVX512: vector.ph:
1236 ; AVX512-NEXT: br label [[VECTOR_BODY:%.*]]
1237 ; AVX512: vector.body:
1238 ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1239 ; AVX512-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0
1240 ; AVX512-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer
1241 ; AVX512-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
1242 ; AVX512-NEXT: [[INDUCTION12:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
1243 ; AVX512-NEXT: [[INDUCTION13:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23>
1244 ; AVX512-NEXT: [[INDUCTION14:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>
1245 ; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1246 ; AVX512-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8
1247 ; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 16
1248 ; AVX512-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 24
1249 ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]]
1250 ; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]]
1251 ; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]]
1252 ; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP3]]
1253 ; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
1254 ; AVX512-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <8 x i32>*
1255 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP9]], align 4, !alias.scope !31
1256 ; AVX512-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 8
1257 ; AVX512-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <8 x i32>*
1258 ; AVX512-NEXT: [[WIDE_LOAD15:%.*]] = load <8 x i32>, <8 x i32>* [[TMP11]], align 4, !alias.scope !31
1259 ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 16
1260 ; AVX512-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <8 x i32>*
1261 ; AVX512-NEXT: [[WIDE_LOAD16:%.*]] = load <8 x i32>, <8 x i32>* [[TMP13]], align 4, !alias.scope !31
1262 ; AVX512-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 24
1263 ; AVX512-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <8 x i32>*
1264 ; AVX512-NEXT: [[WIDE_LOAD17:%.*]] = load <8 x i32>, <8 x i32>* [[TMP15]], align 4, !alias.scope !31
1265 ; AVX512-NEXT: [[TMP16:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
1266 ; AVX512-NEXT: [[TMP17:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD15]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
1267 ; AVX512-NEXT: [[TMP18:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD16]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
1268 ; AVX512-NEXT: [[TMP19:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD17]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
1269 ; AVX512-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP0]]
1270 ; AVX512-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP1]]
1271 ; AVX512-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP2]]
1272 ; AVX512-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP3]]
1273 ; AVX512-NEXT: [[TMP24:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 0
1274 ; AVX512-NEXT: [[TMP25:%.*]] = bitcast double* [[TMP24]] to <8 x double>*
1275 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP25]], i32 8, <8 x i1> [[TMP16]], <8 x double> undef), !alias.scope !34
1276 ; AVX512-NEXT: [[TMP26:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 8
1277 ; AVX512-NEXT: [[TMP27:%.*]] = bitcast double* [[TMP26]] to <8 x double>*
1278 ; AVX512-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP27]], i32 8, <8 x i1> [[TMP17]], <8 x double> undef), !alias.scope !34
1279 ; AVX512-NEXT: [[TMP28:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 16
1280 ; AVX512-NEXT: [[TMP29:%.*]] = bitcast double* [[TMP28]] to <8 x double>*
1281 ; AVX512-NEXT: [[WIDE_MASKED_LOAD19:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP29]], i32 8, <8 x i1> [[TMP18]], <8 x double> undef), !alias.scope !34
1282 ; AVX512-NEXT: [[TMP30:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 24
1283 ; AVX512-NEXT: [[TMP31:%.*]] = bitcast double* [[TMP30]] to <8 x double>*
1284 ; AVX512-NEXT: [[WIDE_MASKED_LOAD20:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP31]], i32 8, <8 x i1> [[TMP19]], <8 x double> undef), !alias.scope !34
1285 ; AVX512-NEXT: [[TMP32:%.*]] = sitofp <8 x i32> [[WIDE_LOAD]] to <8 x double>
1286 ; AVX512-NEXT: [[TMP33:%.*]] = sitofp <8 x i32> [[WIDE_LOAD15]] to <8 x double>
1287 ; AVX512-NEXT: [[TMP34:%.*]] = sitofp <8 x i32> [[WIDE_LOAD16]] to <8 x double>
1288 ; AVX512-NEXT: [[TMP35:%.*]] = sitofp <8 x i32> [[WIDE_LOAD17]] to <8 x double>
1289 ; AVX512-NEXT: [[TMP36:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD]], [[TMP32]]
1290 ; AVX512-NEXT: [[TMP37:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD18]], [[TMP33]]
1291 ; AVX512-NEXT: [[TMP38:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD19]], [[TMP34]]
1292 ; AVX512-NEXT: [[TMP39:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD20]], [[TMP35]]
1293 ; AVX512-NEXT: [[TMP40:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[TMP0]]
1294 ; AVX512-NEXT: [[TMP41:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[TMP1]]
1295 ; AVX512-NEXT: [[TMP42:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[TMP2]]
1296 ; AVX512-NEXT: [[TMP43:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[TMP3]]
1297 ; AVX512-NEXT: [[TMP44:%.*]] = getelementptr inbounds double, double* [[TMP40]], i32 0
1298 ; AVX512-NEXT: [[TMP45:%.*]] = bitcast double* [[TMP44]] to <8 x double>*
1299 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[TMP36]], <8 x double>* [[TMP45]], i32 8, <8 x i1> [[TMP16]]), !alias.scope !36, !noalias !38
1300 ; AVX512-NEXT: [[TMP46:%.*]] = getelementptr inbounds double, double* [[TMP40]], i32 8
1301 ; AVX512-NEXT: [[TMP47:%.*]] = bitcast double* [[TMP46]] to <8 x double>*
1302 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[TMP37]], <8 x double>* [[TMP47]], i32 8, <8 x i1> [[TMP17]]), !alias.scope !36, !noalias !38
1303 ; AVX512-NEXT: [[TMP48:%.*]] = getelementptr inbounds double, double* [[TMP40]], i32 16
1304 ; AVX512-NEXT: [[TMP49:%.*]] = bitcast double* [[TMP48]] to <8 x double>*
1305 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[TMP38]], <8 x double>* [[TMP49]], i32 8, <8 x i1> [[TMP18]]), !alias.scope !36, !noalias !38
1306 ; AVX512-NEXT: [[TMP50:%.*]] = getelementptr inbounds double, double* [[TMP40]], i32 24
1307 ; AVX512-NEXT: [[TMP51:%.*]] = bitcast double* [[TMP50]] to <8 x double>*
1308 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[TMP39]], <8 x double>* [[TMP51]], i32 8, <8 x i1> [[TMP19]]), !alias.scope !36, !noalias !38
1309 ; AVX512-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 32
1310 ; AVX512-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984
1311 ; AVX512-NEXT: br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !39
1312 ; AVX512: middle.block:
1313 ; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, 9984
1314 ; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1315 ; AVX512: scalar.ph:
1316 ; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 9984, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
1317 ; AVX512-NEXT: br label [[FOR_BODY:%.*]]
1319 ; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
1320 ; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
1321 ; AVX512-NEXT: [[TMP53:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1322 ; AVX512-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP53]], 100
1323 ; AVX512-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
1325 ; AVX512-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDVARS_IV]]
1326 ; AVX512-NEXT: [[TMP54:%.*]] = load double, double* [[ARRAYIDX3]], align 8
1327 ; AVX512-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP53]] to double
1328 ; AVX512-NEXT: [[ADD:%.*]] = fadd double [[TMP54]], [[CONV]]
1329 ; AVX512-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV]]
1330 ; AVX512-NEXT: store double [[ADD]], double* [[ARRAYIDX7]], align 8
1331 ; AVX512-NEXT: br label [[FOR_INC]]
1333 ; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1334 ; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000
1335 ; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !40
1337 ; AVX512-NEXT: ret void
1342 for.body: ; preds = %for.inc, %entry
1343 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
1344 %arrayidx = getelementptr inbounds i32, i32* %trigger, i64 %indvars.iv
1345 %0 = load i32, i32* %arrayidx, align 4
1346 %cmp1 = icmp slt i32 %0, 100
1347 br i1 %cmp1, label %if.then, label %for.inc
1349 if.then: ; preds = %for.body
1350 %arrayidx3 = getelementptr inbounds double, double* %B, i64 %indvars.iv
1351 %1 = load double, double* %arrayidx3, align 8
1352 %conv = sitofp i32 %0 to double
1353 %add = fadd double %1, %conv
1354 %arrayidx7 = getelementptr inbounds double, double* %A, i64 %indvars.iv
1355 store double %add, double* %arrayidx7, align 8
1358 for.inc: ; preds = %for.body, %if.then
1359 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1360 %exitcond = icmp eq i64 %indvars.iv.next, 10000
1361 br i1 %exitcond, label %for.end, label %for.body
1363 for.end: ; preds = %for.inc
1369 ;void foo4(double *A, double *B, int *trigger) {
1371 ; for (int i=0; i<10000; i += 16) {
1372 ; if (trigger[i] < 100) {
1373 ; A[i] = B[i*2] + trigger[i]; << non-cosecutive access
1378 define void @foo4(double* nocapture %A, double* nocapture readonly %B, i32* nocapture readonly %trigger) local_unnamed_addr #0 {
1381 ; AVX-NEXT: br label [[FOR_BODY:%.*]]
1383 ; AVX-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
1384 ; AVX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], i64 [[INDVARS_IV]]
1385 ; AVX-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1386 ; AVX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP0]], 100
1387 ; AVX-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
1389 ; AVX-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1
1390 ; AVX-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 [[TMP1]]
1391 ; AVX-NEXT: [[TMP2:%.*]] = load double, double* [[ARRAYIDX3]], align 8
1392 ; AVX-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double
1393 ; AVX-NEXT: [[ADD:%.*]] = fadd double [[TMP2]], [[CONV]]
1394 ; AVX-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[INDVARS_IV]]
1395 ; AVX-NEXT: store double [[ADD]], double* [[ARRAYIDX7]], align 8
1396 ; AVX-NEXT: br label [[FOR_INC]]
1398 ; AVX-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16
1399 ; AVX-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], 10000
1400 ; AVX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
1402 ; AVX-NEXT: ret void
1404 ; AVX512-LABEL: @foo4(
1405 ; AVX512-NEXT: entry:
1406 ; AVX512-NEXT: [[A1:%.*]] = bitcast double* [[A:%.*]] to i8*
1407 ; AVX512-NEXT: [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8*
1408 ; AVX512-NEXT: [[B6:%.*]] = bitcast double* [[B:%.*]] to i8*
1409 ; AVX512-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
1410 ; AVX512: vector.memcheck:
1411 ; AVX512-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[A]], i64 9985
1412 ; AVX512-NEXT: [[SCEVGEP2:%.*]] = bitcast double* [[SCEVGEP]] to i8*
1413 ; AVX512-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 9985
1414 ; AVX512-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
1415 ; AVX512-NEXT: [[SCEVGEP7:%.*]] = getelementptr double, double* [[B]], i64 19969
1416 ; AVX512-NEXT: [[SCEVGEP78:%.*]] = bitcast double* [[SCEVGEP7]] to i8*
1417 ; AVX512-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP45]]
1418 ; AVX512-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]]
1419 ; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1420 ; AVX512-NEXT: [[BOUND09:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP78]]
1421 ; AVX512-NEXT: [[BOUND110:%.*]] = icmp ult i8* [[B6]], [[SCEVGEP2]]
1422 ; AVX512-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
1423 ; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
1424 ; AVX512-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true
1425 ; AVX512-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1426 ; AVX512: vector.ph:
1427 ; AVX512-NEXT: br label [[VECTOR_BODY:%.*]]
1428 ; AVX512: vector.body:
1429 ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1430 ; AVX512-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 16, i64 32, i64 48, i64 64, i64 80, i64 96, i64 112>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1431 ; AVX512-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], <8 x i64> [[VEC_IND]]
1432 ; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[TMP0]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef), !alias.scope !41
1433 ; AVX512-NEXT: [[TMP1:%.*]] = icmp slt <8 x i32> [[WIDE_MASKED_GATHER]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
1434 ; AVX512-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
1435 ; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, double* [[B]], <8 x i64> [[TMP2]]
1436 ; AVX512-NEXT: [[WIDE_MASKED_GATHER12:%.*]] = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> [[TMP3]], i32 8, <8 x i1> [[TMP1]], <8 x double> undef), !alias.scope !44
1437 ; AVX512-NEXT: [[TMP4:%.*]] = sitofp <8 x i32> [[WIDE_MASKED_GATHER]] to <8 x double>
1438 ; AVX512-NEXT: [[TMP5:%.*]] = fadd <8 x double> [[WIDE_MASKED_GATHER12]], [[TMP4]]
1439 ; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[A]], <8 x i64> [[VEC_IND]]
1440 ; AVX512-NEXT: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> [[TMP5]], <8 x double*> [[TMP6]], i32 8, <8 x i1> [[TMP1]]), !alias.scope !46, !noalias !48
1441 ; AVX512-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
1442 ; AVX512-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], <i64 128, i64 128, i64 128, i64 128, i64 128, i64 128, i64 128, i64 128>
1443 ; AVX512-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 624
1444 ; AVX512-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !49
1445 ; AVX512: middle.block:
1446 ; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 625, 624
1447 ; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1448 ; AVX512: scalar.ph:
1449 ; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 9984, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
1450 ; AVX512-NEXT: br label [[FOR_BODY:%.*]]
1452 ; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
1453 ; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
1454 ; AVX512-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1455 ; AVX512-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 100
1456 ; AVX512-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
1458 ; AVX512-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1
1459 ; AVX512-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP9]]
1460 ; AVX512-NEXT: [[TMP10:%.*]] = load double, double* [[ARRAYIDX3]], align 8
1461 ; AVX512-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP8]] to double
1462 ; AVX512-NEXT: [[ADD:%.*]] = fadd double [[TMP10]], [[CONV]]
1463 ; AVX512-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV]]
1464 ; AVX512-NEXT: store double [[ADD]], double* [[ARRAYIDX7]], align 8
1465 ; AVX512-NEXT: br label [[FOR_INC]]
1467 ; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16
1468 ; AVX512-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], 10000
1469 ; AVX512-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop !50
1471 ; AVX512-NEXT: ret void
1476 for.body: ; preds = %entry, %for.inc
1477 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
1478 %arrayidx = getelementptr inbounds i32, i32* %trigger, i64 %indvars.iv
1479 %0 = load i32, i32* %arrayidx, align 4
1480 %cmp1 = icmp slt i32 %0, 100
1481 br i1 %cmp1, label %if.then, label %for.inc
1483 if.then: ; preds = %for.body
1484 %1 = shl nuw nsw i64 %indvars.iv, 1
1485 %arrayidx3 = getelementptr inbounds double, double* %B, i64 %1
1486 %2 = load double, double* %arrayidx3, align 8
1487 %conv = sitofp i32 %0 to double
1488 %add = fadd double %2, %conv
1489 %arrayidx7 = getelementptr inbounds double, double* %A, i64 %indvars.iv
1490 store double %add, double* %arrayidx7, align 8
1493 for.inc: ; preds = %for.body, %if.then
1494 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 16
1495 %cmp = icmp ult i64 %indvars.iv.next, 10000
1496 br i1 %cmp, label %for.body, label %for.end
1498 for.end: ; preds = %for.inc
1502 @a = common global [1 x i32*] zeroinitializer, align 8
1503 @c = common global i32* null, align 8
1505 ; The loop here should not be vectorized due to trapping
1506 ; constant expression
1508 define void @foo5(i32* nocapture %A, i32* nocapture readnone %B, i32* nocapture readonly %trigger) local_unnamed_addr #0 {
1511 ; AVX-NEXT: br label [[FOR_BODY:%.*]]
1513 ; AVX-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
1514 ; AVX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], i64 [[INDVARS_IV]]
1515 ; AVX-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1516 ; AVX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP0]], 100
1517 ; AVX-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
1519 ; AVX-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
1520 ; AVX-NEXT: store i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32* [[ARRAYIDX7]], align 4
1521 ; AVX-NEXT: br label [[FOR_INC]]
1523 ; AVX-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1524 ; AVX-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000
1525 ; AVX-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
1527 ; AVX-NEXT: ret void
1529 ; AVX512-LABEL: @foo5(
1530 ; AVX512-NEXT: entry:
1531 ; AVX512-NEXT: br label [[FOR_BODY:%.*]]
1533 ; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
1534 ; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], i64 [[INDVARS_IV]]
1535 ; AVX512-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1536 ; AVX512-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP0]], 100
1537 ; AVX512-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
1539 ; AVX512-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
1540 ; AVX512-NEXT: store i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32* [[ARRAYIDX7]], align 4
1541 ; AVX512-NEXT: br label [[FOR_INC]]
1543 ; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1544 ; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000
1545 ; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
1547 ; AVX512-NEXT: ret void
1552 for.body: ; preds = %for.inc, %entry
1553 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
1554 %arrayidx = getelementptr inbounds i32, i32* %trigger, i64 %indvars.iv
1555 %0 = load i32, i32* %arrayidx, align 4
1556 %cmp1 = icmp slt i32 %0, 100
1557 br i1 %cmp1, label %if.then, label %for.inc
1559 if.then: ; preds = %for.body
1560 %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
1561 store i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32* %arrayidx7, align 4
1564 for.inc: ; preds = %for.body, %if.then
1565 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1566 %exitcond = icmp eq i64 %indvars.iv.next, 10000
1567 br i1 %exitcond, label %for.end, label %for.body
1569 for.end: ; preds = %for.inc
1574 ;void foo6(double *in, double *out, unsigned size, int *trigger) {
1576 ; for (int i=SIZE-1; i>=0; i--) {
1577 ; if (trigger[i] > 0) {
1578 ; out[i] = in[i] + (double) 0.5;
1583 define void @foo6(double* nocapture readonly %in, double* nocapture %out, i32 %size, i32* nocapture readonly %trigger) local_unnamed_addr #0 {
1584 ; AVX1-LABEL: @foo6(
1586 ; AVX1-NEXT: br label [[FOR_BODY:%.*]]
1588 ; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 4095, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
1589 ; AVX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], i64 [[INDVARS_IV]]
1590 ; AVX1-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1591 ; AVX1-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
1592 ; AVX1-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
1594 ; AVX1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[IN:%.*]], i64 [[INDVARS_IV]]
1595 ; AVX1-NEXT: [[TMP1:%.*]] = load double, double* [[ARRAYIDX3]], align 8
1596 ; AVX1-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e-01
1597 ; AVX1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], i64 [[INDVARS_IV]]
1598 ; AVX1-NEXT: store double [[ADD]], double* [[ARRAYIDX5]], align 8
1599 ; AVX1-NEXT: br label [[FOR_INC]]
1601 ; AVX1-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
1602 ; AVX1-NEXT: [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
1603 ; AVX1-NEXT: br i1 [[CMP]], label [[FOR_END:%.*]], label [[FOR_BODY]]
1605 ; AVX1-NEXT: ret void
1607 ; AVX2-LABEL: @foo6(
1609 ; AVX2-NEXT: [[OUT1:%.*]] = bitcast double* [[OUT:%.*]] to i8*
1610 ; AVX2-NEXT: [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8*
1611 ; AVX2-NEXT: [[IN6:%.*]] = bitcast double* [[IN:%.*]] to i8*
1612 ; AVX2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
1613 ; AVX2: vector.memcheck:
1614 ; AVX2-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[OUT]], i64 4096
1615 ; AVX2-NEXT: [[SCEVGEP2:%.*]] = bitcast double* [[SCEVGEP]] to i8*
1616 ; AVX2-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 4096
1617 ; AVX2-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
1618 ; AVX2-NEXT: [[SCEVGEP7:%.*]] = getelementptr double, double* [[IN]], i64 4096
1619 ; AVX2-NEXT: [[SCEVGEP78:%.*]] = bitcast double* [[SCEVGEP7]] to i8*
1620 ; AVX2-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[OUT1]], [[SCEVGEP45]]
1621 ; AVX2-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]]
1622 ; AVX2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1623 ; AVX2-NEXT: [[BOUND09:%.*]] = icmp ult i8* [[OUT1]], [[SCEVGEP78]]
1624 ; AVX2-NEXT: [[BOUND110:%.*]] = icmp ult i8* [[IN6]], [[SCEVGEP2]]
1625 ; AVX2-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
1626 ; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
1627 ; AVX2-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true
1628 ; AVX2-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1630 ; AVX2-NEXT: br label [[VECTOR_BODY:%.*]]
1631 ; AVX2: vector.body:
1632 ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1633 ; AVX2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 4095, [[INDEX]]
1634 ; AVX2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[OFFSET_IDX]], i32 0
1635 ; AVX2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
1636 ; AVX2-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 -1, i64 -2, i64 -3>
1637 ; AVX2-NEXT: [[INDUCTION12:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 -4, i64 -5, i64 -6, i64 -7>
1638 ; AVX2-NEXT: [[INDUCTION13:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 -8, i64 -9, i64 -10, i64 -11>
1639 ; AVX2-NEXT: [[INDUCTION14:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 -12, i64 -13, i64 -14, i64 -15>
1640 ; AVX2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
1641 ; AVX2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -4
1642 ; AVX2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -8
1643 ; AVX2-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -12
1644 ; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]]
1645 ; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]]
1646 ; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]]
1647 ; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP3]]
1648 ; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
1649 ; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 -3
1650 ; AVX2-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
1651 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4, !alias.scope !41
1652 ; AVX2-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1653 ; AVX2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -4
1654 ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 -3
1655 ; AVX2-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>*
1656 ; AVX2-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP13]], align 4, !alias.scope !41
1657 ; AVX2-NEXT: [[REVERSE16:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD15]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1658 ; AVX2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -8
1659 ; AVX2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i32 -3
1660 ; AVX2-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>*
1661 ; AVX2-NEXT: [[WIDE_LOAD17:%.*]] = load <4 x i32>, <4 x i32>* [[TMP16]], align 4, !alias.scope !41
1662 ; AVX2-NEXT: [[REVERSE18:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD17]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1663 ; AVX2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -12
1664 ; AVX2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 -3
1665 ; AVX2-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
1666 ; AVX2-NEXT: [[WIDE_LOAD19:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4, !alias.scope !41
1667 ; AVX2-NEXT: [[REVERSE20:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD19]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1668 ; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt <4 x i32> [[REVERSE]], zeroinitializer
1669 ; AVX2-NEXT: [[TMP21:%.*]] = icmp sgt <4 x i32> [[REVERSE16]], zeroinitializer
1670 ; AVX2-NEXT: [[TMP22:%.*]] = icmp sgt <4 x i32> [[REVERSE18]], zeroinitializer
1671 ; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt <4 x i32> [[REVERSE20]], zeroinitializer
1672 ; AVX2-NEXT: [[TMP24:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[TMP0]]
1673 ; AVX2-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[TMP1]]
1674 ; AVX2-NEXT: [[TMP26:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[TMP2]]
1675 ; AVX2-NEXT: [[TMP27:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[TMP3]]
1676 ; AVX2-NEXT: [[TMP28:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 0
1677 ; AVX2-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 -3
1678 ; AVX2-NEXT: [[REVERSE21:%.*]] = shufflevector <4 x i1> [[TMP20]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1679 ; AVX2-NEXT: [[TMP30:%.*]] = bitcast double* [[TMP29]] to <4 x double>*
1680 ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP30]], i32 8, <4 x i1> [[REVERSE21]], <4 x double> undef), !alias.scope !44
1681 ; AVX2-NEXT: [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1682 ; AVX2-NEXT: [[TMP31:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -4
1683 ; AVX2-NEXT: [[TMP32:%.*]] = getelementptr inbounds double, double* [[TMP31]], i32 -3
1684 ; AVX2-NEXT: [[REVERSE23:%.*]] = shufflevector <4 x i1> [[TMP21]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1685 ; AVX2-NEXT: [[TMP33:%.*]] = bitcast double* [[TMP32]] to <4 x double>*
1686 ; AVX2-NEXT: [[WIDE_MASKED_LOAD24:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP33]], i32 8, <4 x i1> [[REVERSE23]], <4 x double> undef), !alias.scope !44
1687 ; AVX2-NEXT: [[REVERSE25:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD24]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1688 ; AVX2-NEXT: [[TMP34:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -8
1689 ; AVX2-NEXT: [[TMP35:%.*]] = getelementptr inbounds double, double* [[TMP34]], i32 -3
1690 ; AVX2-NEXT: [[REVERSE26:%.*]] = shufflevector <4 x i1> [[TMP22]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1691 ; AVX2-NEXT: [[TMP36:%.*]] = bitcast double* [[TMP35]] to <4 x double>*
1692 ; AVX2-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP36]], i32 8, <4 x i1> [[REVERSE26]], <4 x double> undef), !alias.scope !44
1693 ; AVX2-NEXT: [[REVERSE28:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD27]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1694 ; AVX2-NEXT: [[TMP37:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -12
1695 ; AVX2-NEXT: [[TMP38:%.*]] = getelementptr inbounds double, double* [[TMP37]], i32 -3
1696 ; AVX2-NEXT: [[REVERSE29:%.*]] = shufflevector <4 x i1> [[TMP23]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1697 ; AVX2-NEXT: [[TMP39:%.*]] = bitcast double* [[TMP38]] to <4 x double>*
1698 ; AVX2-NEXT: [[WIDE_MASKED_LOAD30:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP39]], i32 8, <4 x i1> [[REVERSE29]], <4 x double> undef), !alias.scope !44
1699 ; AVX2-NEXT: [[REVERSE31:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD30]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1700 ; AVX2-NEXT: [[TMP40:%.*]] = fadd <4 x double> [[REVERSE22]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
1701 ; AVX2-NEXT: [[TMP41:%.*]] = fadd <4 x double> [[REVERSE25]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
1702 ; AVX2-NEXT: [[TMP42:%.*]] = fadd <4 x double> [[REVERSE28]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
1703 ; AVX2-NEXT: [[TMP43:%.*]] = fadd <4 x double> [[REVERSE31]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
1704 ; AVX2-NEXT: [[TMP44:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP0]]
1705 ; AVX2-NEXT: [[TMP45:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP1]]
1706 ; AVX2-NEXT: [[TMP46:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP2]]
1707 ; AVX2-NEXT: [[TMP47:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP3]]
1708 ; AVX2-NEXT: [[REVERSE32:%.*]] = shufflevector <4 x double> [[TMP40]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1709 ; AVX2-NEXT: [[TMP48:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
1710 ; AVX2-NEXT: [[TMP49:%.*]] = getelementptr inbounds double, double* [[TMP48]], i32 -3
1711 ; AVX2-NEXT: [[TMP50:%.*]] = bitcast double* [[TMP49]] to <4 x double>*
1712 ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE32]], <4 x double>* [[TMP50]], i32 8, <4 x i1> [[REVERSE21]]), !alias.scope !46, !noalias !48
1713 ; AVX2-NEXT: [[REVERSE34:%.*]] = shufflevector <4 x double> [[TMP41]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1714 ; AVX2-NEXT: [[TMP51:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -4
1715 ; AVX2-NEXT: [[TMP52:%.*]] = getelementptr inbounds double, double* [[TMP51]], i32 -3
1716 ; AVX2-NEXT: [[TMP53:%.*]] = bitcast double* [[TMP52]] to <4 x double>*
1717 ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE34]], <4 x double>* [[TMP53]], i32 8, <4 x i1> [[REVERSE23]]), !alias.scope !46, !noalias !48
1718 ; AVX2-NEXT: [[REVERSE36:%.*]] = shufflevector <4 x double> [[TMP42]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1719 ; AVX2-NEXT: [[TMP54:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -8
1720 ; AVX2-NEXT: [[TMP55:%.*]] = getelementptr inbounds double, double* [[TMP54]], i32 -3
1721 ; AVX2-NEXT: [[TMP56:%.*]] = bitcast double* [[TMP55]] to <4 x double>*
1722 ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE36]], <4 x double>* [[TMP56]], i32 8, <4 x i1> [[REVERSE26]]), !alias.scope !46, !noalias !48
1723 ; AVX2-NEXT: [[REVERSE38:%.*]] = shufflevector <4 x double> [[TMP43]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1724 ; AVX2-NEXT: [[TMP57:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -12
1725 ; AVX2-NEXT: [[TMP58:%.*]] = getelementptr inbounds double, double* [[TMP57]], i32 -3
1726 ; AVX2-NEXT: [[TMP59:%.*]] = bitcast double* [[TMP58]] to <4 x double>*
1727 ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE38]], <4 x double>* [[TMP59]], i32 8, <4 x i1> [[REVERSE29]]), !alias.scope !46, !noalias !48
1728 ; AVX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
1729 ; AVX2-NEXT: [[TMP60:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
1730 ; AVX2-NEXT: br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !49
1731 ; AVX2: middle.block:
1732 ; AVX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 4096, 4096
1733 ; AVX2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1735 ; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, [[MIDDLE_BLOCK]] ], [ 4095, [[ENTRY:%.*]] ], [ 4095, [[VECTOR_MEMCHECK]] ]
1736 ; AVX2-NEXT: br label [[FOR_BODY:%.*]]
1738 ; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
1739 ; AVX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
1740 ; AVX2-NEXT: [[TMP61:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1741 ; AVX2-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP61]], 0
1742 ; AVX2-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
1744 ; AVX2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[INDVARS_IV]]
1745 ; AVX2-NEXT: [[TMP62:%.*]] = load double, double* [[ARRAYIDX3]], align 8
1746 ; AVX2-NEXT: [[ADD:%.*]] = fadd double [[TMP62]], 5.000000e-01
1747 ; AVX2-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[INDVARS_IV]]
1748 ; AVX2-NEXT: store double [[ADD]], double* [[ARRAYIDX5]], align 8
1749 ; AVX2-NEXT: br label [[FOR_INC]]
1751 ; AVX2-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
1752 ; AVX2-NEXT: [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
1753 ; AVX2-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !50
1755 ; AVX2-NEXT: ret void
1757 ; AVX512-LABEL: @foo6(
1758 ; AVX512-NEXT: entry:
1759 ; AVX512-NEXT: [[OUT1:%.*]] = bitcast double* [[OUT:%.*]] to i8*
1760 ; AVX512-NEXT: [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8*
1761 ; AVX512-NEXT: [[IN6:%.*]] = bitcast double* [[IN:%.*]] to i8*
1762 ; AVX512-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
1763 ; AVX512: vector.memcheck:
1764 ; AVX512-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[OUT]], i64 4096
1765 ; AVX512-NEXT: [[SCEVGEP2:%.*]] = bitcast double* [[SCEVGEP]] to i8*
1766 ; AVX512-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 4096
1767 ; AVX512-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
1768 ; AVX512-NEXT: [[SCEVGEP7:%.*]] = getelementptr double, double* [[IN]], i64 4096
1769 ; AVX512-NEXT: [[SCEVGEP78:%.*]] = bitcast double* [[SCEVGEP7]] to i8*
1770 ; AVX512-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[OUT1]], [[SCEVGEP45]]
1771 ; AVX512-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]]
1772 ; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1773 ; AVX512-NEXT: [[BOUND09:%.*]] = icmp ult i8* [[OUT1]], [[SCEVGEP78]]
1774 ; AVX512-NEXT: [[BOUND110:%.*]] = icmp ult i8* [[IN6]], [[SCEVGEP2]]
1775 ; AVX512-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
1776 ; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
1777 ; AVX512-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true
1778 ; AVX512-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1779 ; AVX512: vector.ph:
1780 ; AVX512-NEXT: br label [[VECTOR_BODY:%.*]]
1781 ; AVX512: vector.body:
1782 ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1783 ; AVX512-NEXT: [[OFFSET_IDX:%.*]] = sub i64 4095, [[INDEX]]
1784 ; AVX512-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[OFFSET_IDX]], i32 0
1785 ; AVX512-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer
1786 ; AVX512-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 -1, i64 -2, i64 -3, i64 -4, i64 -5, i64 -6, i64 -7>
1787 ; AVX512-NEXT: [[INDUCTION12:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 -8, i64 -9, i64 -10, i64 -11, i64 -12, i64 -13, i64 -14, i64 -15>
1788 ; AVX512-NEXT: [[INDUCTION13:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 -16, i64 -17, i64 -18, i64 -19, i64 -20, i64 -21, i64 -22, i64 -23>
1789 ; AVX512-NEXT: [[INDUCTION14:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 -24, i64 -25, i64 -26, i64 -27, i64 -28, i64 -29, i64 -30, i64 -31>
1790 ; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
1791 ; AVX512-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -8
1792 ; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -16
1793 ; AVX512-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -24
1794 ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]]
1795 ; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]]
1796 ; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]]
1797 ; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP3]]
1798 ; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
1799 ; AVX512-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 -7
1800 ; AVX512-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <8 x i32>*
1801 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP10]], align 4, !alias.scope !51
1802 ; AVX512-NEXT: [[REVERSE:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1803 ; AVX512-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -8
1804 ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 -7
1805 ; AVX512-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <8 x i32>*
1806 ; AVX512-NEXT: [[WIDE_LOAD15:%.*]] = load <8 x i32>, <8 x i32>* [[TMP13]], align 4, !alias.scope !51
1807 ; AVX512-NEXT: [[REVERSE16:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD15]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1808 ; AVX512-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -16
1809 ; AVX512-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i32 -7
1810 ; AVX512-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <8 x i32>*
1811 ; AVX512-NEXT: [[WIDE_LOAD17:%.*]] = load <8 x i32>, <8 x i32>* [[TMP16]], align 4, !alias.scope !51
1812 ; AVX512-NEXT: [[REVERSE18:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD17]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1813 ; AVX512-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -24
1814 ; AVX512-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 -7
1815 ; AVX512-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <8 x i32>*
1816 ; AVX512-NEXT: [[WIDE_LOAD19:%.*]] = load <8 x i32>, <8 x i32>* [[TMP19]], align 4, !alias.scope !51
1817 ; AVX512-NEXT: [[REVERSE20:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD19]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1818 ; AVX512-NEXT: [[TMP20:%.*]] = icmp sgt <8 x i32> [[REVERSE]], zeroinitializer
1819 ; AVX512-NEXT: [[TMP21:%.*]] = icmp sgt <8 x i32> [[REVERSE16]], zeroinitializer
1820 ; AVX512-NEXT: [[TMP22:%.*]] = icmp sgt <8 x i32> [[REVERSE18]], zeroinitializer
1821 ; AVX512-NEXT: [[TMP23:%.*]] = icmp sgt <8 x i32> [[REVERSE20]], zeroinitializer
1822 ; AVX512-NEXT: [[TMP24:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[TMP0]]
1823 ; AVX512-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[TMP1]]
1824 ; AVX512-NEXT: [[TMP26:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[TMP2]]
1825 ; AVX512-NEXT: [[TMP27:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[TMP3]]
1826 ; AVX512-NEXT: [[TMP28:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 0
1827 ; AVX512-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 -7
1828 ; AVX512-NEXT: [[REVERSE21:%.*]] = shufflevector <8 x i1> [[TMP20]], <8 x i1> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1829 ; AVX512-NEXT: [[TMP30:%.*]] = bitcast double* [[TMP29]] to <8 x double>*
1830 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP30]], i32 8, <8 x i1> [[REVERSE21]], <8 x double> undef), !alias.scope !54
1831 ; AVX512-NEXT: [[REVERSE22:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1832 ; AVX512-NEXT: [[TMP31:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -8
1833 ; AVX512-NEXT: [[TMP32:%.*]] = getelementptr inbounds double, double* [[TMP31]], i32 -7
1834 ; AVX512-NEXT: [[REVERSE23:%.*]] = shufflevector <8 x i1> [[TMP21]], <8 x i1> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1835 ; AVX512-NEXT: [[TMP33:%.*]] = bitcast double* [[TMP32]] to <8 x double>*
1836 ; AVX512-NEXT: [[WIDE_MASKED_LOAD24:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP33]], i32 8, <8 x i1> [[REVERSE23]], <8 x double> undef), !alias.scope !54
1837 ; AVX512-NEXT: [[REVERSE25:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD24]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1838 ; AVX512-NEXT: [[TMP34:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -16
1839 ; AVX512-NEXT: [[TMP35:%.*]] = getelementptr inbounds double, double* [[TMP34]], i32 -7
1840 ; AVX512-NEXT: [[REVERSE26:%.*]] = shufflevector <8 x i1> [[TMP22]], <8 x i1> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1841 ; AVX512-NEXT: [[TMP36:%.*]] = bitcast double* [[TMP35]] to <8 x double>*
1842 ; AVX512-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP36]], i32 8, <8 x i1> [[REVERSE26]], <8 x double> undef), !alias.scope !54
1843 ; AVX512-NEXT: [[REVERSE28:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD27]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1844 ; AVX512-NEXT: [[TMP37:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -24
1845 ; AVX512-NEXT: [[TMP38:%.*]] = getelementptr inbounds double, double* [[TMP37]], i32 -7
1846 ; AVX512-NEXT: [[REVERSE29:%.*]] = shufflevector <8 x i1> [[TMP23]], <8 x i1> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1847 ; AVX512-NEXT: [[TMP39:%.*]] = bitcast double* [[TMP38]] to <8 x double>*
1848 ; AVX512-NEXT: [[WIDE_MASKED_LOAD30:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP39]], i32 8, <8 x i1> [[REVERSE29]], <8 x double> undef), !alias.scope !54
1849 ; AVX512-NEXT: [[REVERSE31:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD30]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1850 ; AVX512-NEXT: [[TMP40:%.*]] = fadd <8 x double> [[REVERSE22]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
1851 ; AVX512-NEXT: [[TMP41:%.*]] = fadd <8 x double> [[REVERSE25]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
1852 ; AVX512-NEXT: [[TMP42:%.*]] = fadd <8 x double> [[REVERSE28]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
1853 ; AVX512-NEXT: [[TMP43:%.*]] = fadd <8 x double> [[REVERSE31]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
1854 ; AVX512-NEXT: [[TMP44:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP0]]
1855 ; AVX512-NEXT: [[TMP45:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP1]]
1856 ; AVX512-NEXT: [[TMP46:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP2]]
1857 ; AVX512-NEXT: [[TMP47:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP3]]
1858 ; AVX512-NEXT: [[REVERSE32:%.*]] = shufflevector <8 x double> [[TMP40]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1859 ; AVX512-NEXT: [[TMP48:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
1860 ; AVX512-NEXT: [[TMP49:%.*]] = getelementptr inbounds double, double* [[TMP48]], i32 -7
1861 ; AVX512-NEXT: [[TMP50:%.*]] = bitcast double* [[TMP49]] to <8 x double>*
1862 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE32]], <8 x double>* [[TMP50]], i32 8, <8 x i1> [[REVERSE21]]), !alias.scope !56, !noalias !58
1863 ; AVX512-NEXT: [[REVERSE34:%.*]] = shufflevector <8 x double> [[TMP41]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1864 ; AVX512-NEXT: [[TMP51:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -8
1865 ; AVX512-NEXT: [[TMP52:%.*]] = getelementptr inbounds double, double* [[TMP51]], i32 -7
1866 ; AVX512-NEXT: [[TMP53:%.*]] = bitcast double* [[TMP52]] to <8 x double>*
1867 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE34]], <8 x double>* [[TMP53]], i32 8, <8 x i1> [[REVERSE23]]), !alias.scope !56, !noalias !58
1868 ; AVX512-NEXT: [[REVERSE36:%.*]] = shufflevector <8 x double> [[TMP42]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1869 ; AVX512-NEXT: [[TMP54:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -16
1870 ; AVX512-NEXT: [[TMP55:%.*]] = getelementptr inbounds double, double* [[TMP54]], i32 -7
1871 ; AVX512-NEXT: [[TMP56:%.*]] = bitcast double* [[TMP55]] to <8 x double>*
1872 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE36]], <8 x double>* [[TMP56]], i32 8, <8 x i1> [[REVERSE26]]), !alias.scope !56, !noalias !58
1873 ; AVX512-NEXT: [[REVERSE38:%.*]] = shufflevector <8 x double> [[TMP43]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1874 ; AVX512-NEXT: [[TMP57:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -24
1875 ; AVX512-NEXT: [[TMP58:%.*]] = getelementptr inbounds double, double* [[TMP57]], i32 -7
1876 ; AVX512-NEXT: [[TMP59:%.*]] = bitcast double* [[TMP58]] to <8 x double>*
1877 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE38]], <8 x double>* [[TMP59]], i32 8, <8 x i1> [[REVERSE29]]), !alias.scope !56, !noalias !58
1878 ; AVX512-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 32
1879 ; AVX512-NEXT: [[TMP60:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
1880 ; AVX512-NEXT: br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !59
1881 ; AVX512: middle.block:
1882 ; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 4096, 4096
1883 ; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1884 ; AVX512: scalar.ph:
1885 ; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, [[MIDDLE_BLOCK]] ], [ 4095, [[ENTRY:%.*]] ], [ 4095, [[VECTOR_MEMCHECK]] ]
1886 ; AVX512-NEXT: br label [[FOR_BODY:%.*]]
1888 ; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
1889 ; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
1890 ; AVX512-NEXT: [[TMP61:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1891 ; AVX512-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP61]], 0
1892 ; AVX512-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
1894 ; AVX512-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[INDVARS_IV]]
1895 ; AVX512-NEXT: [[TMP62:%.*]] = load double, double* [[ARRAYIDX3]], align 8
1896 ; AVX512-NEXT: [[ADD:%.*]] = fadd double [[TMP62]], 5.000000e-01
1897 ; AVX512-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[INDVARS_IV]]
1898 ; AVX512-NEXT: store double [[ADD]], double* [[ARRAYIDX5]], align 8
1899 ; AVX512-NEXT: br label [[FOR_INC]]
1901 ; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
1902 ; AVX512-NEXT: [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
1903 ; AVX512-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !60
1905 ; AVX512-NEXT: ret void
1910 for.body: ; preds = %for.inc, %entry
1911 %indvars.iv = phi i64 [ 4095, %entry ], [ %indvars.iv.next, %for.inc ]
1912 %arrayidx = getelementptr inbounds i32, i32* %trigger, i64 %indvars.iv
1913 %0 = load i32, i32* %arrayidx, align 4
1914 %cmp1 = icmp sgt i32 %0, 0
1915 br i1 %cmp1, label %if.then, label %for.inc
1917 if.then: ; preds = %for.body
1918 %arrayidx3 = getelementptr inbounds double, double* %in, i64 %indvars.iv
1919 %1 = load double, double* %arrayidx3, align 8
1920 %add = fadd double %1, 5.000000e-01
1921 %arrayidx5 = getelementptr inbounds double, double* %out, i64 %indvars.iv
1922 store double %add, double* %arrayidx5, align 8
1925 for.inc: ; preds = %for.body, %if.then
1926 %indvars.iv.next = add nsw i64 %indvars.iv, -1
1927 %cmp = icmp eq i64 %indvars.iv, 0
1928 br i1 %cmp, label %for.end, label %for.body
1930 for.end: ; preds = %for.inc
1934 ; void foo7 (double * __restrict__ out, double ** __restrict__ in,
1935 ; bool * __restrict__ trigger, unsigned size) {
1937 ; for (unsigned i=0; i<size; i++)
1938 ; if (trigger[i] && (in[i] != 0))
1939 ; out[i] = (double) 0.5;
1942 define void @foo7(double* noalias nocapture %out, double** noalias nocapture readonly %in, i8* noalias nocapture readonly %trigger, i32 %size) local_unnamed_addr #0 {
1943 ; AVX1-LABEL: @foo7(
1945 ; AVX1-NEXT: [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0
1946 ; AVX1-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
1947 ; AVX1: for.body.preheader:
1948 ; AVX1-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64
1949 ; AVX1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16
1950 ; AVX1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1952 ; AVX1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 16
1953 ; AVX1-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
1954 ; AVX1-NEXT: br label [[VECTOR_BODY:%.*]]
1955 ; AVX1: vector.body:
1956 ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1957 ; AVX1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0
1958 ; AVX1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
1959 ; AVX1-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
1960 ; AVX1-NEXT: [[INDUCTION1:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 4, i64 5, i64 6, i64 7>
1961 ; AVX1-NEXT: [[INDUCTION2:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 8, i64 9, i64 10, i64 11>
1962 ; AVX1-NEXT: [[INDUCTION3:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 12, i64 13, i64 14, i64 15>
1963 ; AVX1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1964 ; AVX1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4
1965 ; AVX1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8
1966 ; AVX1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12
1967 ; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER:%.*]], i64 [[TMP0]]
1968 ; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP1]]
1969 ; AVX1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP2]]
1970 ; AVX1-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP3]]
1971 ; AVX1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0
1972 ; AVX1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <4 x i8>*
1973 ; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP9]], align 1
1974 ; AVX1-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 4
1975 ; AVX1-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <4 x i8>*
1976 ; AVX1-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, <4 x i8>* [[TMP11]], align 1
1977 ; AVX1-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 8
1978 ; AVX1-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <4 x i8>*
1979 ; AVX1-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP13]], align 1
1980 ; AVX1-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 12
1981 ; AVX1-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <4 x i8>*
1982 ; AVX1-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, <4 x i8>* [[TMP15]], align 1
1983 ; AVX1-NEXT: [[TMP16:%.*]] = and <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1>
1984 ; AVX1-NEXT: [[TMP17:%.*]] = and <4 x i8> [[WIDE_LOAD4]], <i8 1, i8 1, i8 1, i8 1>
1985 ; AVX1-NEXT: [[TMP18:%.*]] = and <4 x i8> [[WIDE_LOAD5]], <i8 1, i8 1, i8 1, i8 1>
1986 ; AVX1-NEXT: [[TMP19:%.*]] = and <4 x i8> [[WIDE_LOAD6]], <i8 1, i8 1, i8 1, i8 1>
1987 ; AVX1-NEXT: [[TMP20:%.*]] = icmp eq <4 x i8> [[TMP16]], zeroinitializer
1988 ; AVX1-NEXT: [[TMP21:%.*]] = icmp eq <4 x i8> [[TMP17]], zeroinitializer
1989 ; AVX1-NEXT: [[TMP22:%.*]] = icmp eq <4 x i8> [[TMP18]], zeroinitializer
1990 ; AVX1-NEXT: [[TMP23:%.*]] = icmp eq <4 x i8> [[TMP19]], zeroinitializer
1991 ; AVX1-NEXT: [[TMP24:%.*]] = getelementptr inbounds double*, double** [[IN:%.*]], i64 [[TMP0]]
1992 ; AVX1-NEXT: [[TMP25:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[TMP1]]
1993 ; AVX1-NEXT: [[TMP26:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[TMP2]]
1994 ; AVX1-NEXT: [[TMP27:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[TMP3]]
1995 ; AVX1-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP20]], <i1 true, i1 true, i1 true, i1 true>
1996 ; AVX1-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP21]], <i1 true, i1 true, i1 true, i1 true>
1997 ; AVX1-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP22]], <i1 true, i1 true, i1 true, i1 true>
1998 ; AVX1-NEXT: [[TMP31:%.*]] = xor <4 x i1> [[TMP23]], <i1 true, i1 true, i1 true, i1 true>
1999 ; AVX1-NEXT: [[TMP32:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 0
2000 ; AVX1-NEXT: [[TMP33:%.*]] = bitcast double** [[TMP32]] to <4 x double*>*
2001 ; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP33]], i32 8, <4 x i1> [[TMP28]], <4 x double*> undef)
2002 ; AVX1-NEXT: [[TMP34:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 4
2003 ; AVX1-NEXT: [[TMP35:%.*]] = bitcast double** [[TMP34]] to <4 x double*>*
2004 ; AVX1-NEXT: [[WIDE_MASKED_LOAD7:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP35]], i32 8, <4 x i1> [[TMP29]], <4 x double*> undef)
2005 ; AVX1-NEXT: [[TMP36:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 8
2006 ; AVX1-NEXT: [[TMP37:%.*]] = bitcast double** [[TMP36]] to <4 x double*>*
2007 ; AVX1-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP37]], i32 8, <4 x i1> [[TMP30]], <4 x double*> undef)
2008 ; AVX1-NEXT: [[TMP38:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 12
2009 ; AVX1-NEXT: [[TMP39:%.*]] = bitcast double** [[TMP38]] to <4 x double*>*
2010 ; AVX1-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP39]], i32 8, <4 x i1> [[TMP31]], <4 x double*> undef)
2011 ; AVX1-NEXT: [[TMP40:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD]], zeroinitializer
2012 ; AVX1-NEXT: [[TMP41:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD7]], zeroinitializer
2013 ; AVX1-NEXT: [[TMP42:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD8]], zeroinitializer
2014 ; AVX1-NEXT: [[TMP43:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD9]], zeroinitializer
2015 ; AVX1-NEXT: [[TMP44:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], i64 [[TMP0]]
2016 ; AVX1-NEXT: [[TMP45:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP1]]
2017 ; AVX1-NEXT: [[TMP46:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP2]]
2018 ; AVX1-NEXT: [[TMP47:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP3]]
2019 ; AVX1-NEXT: [[TMP48:%.*]] = xor <4 x i1> [[TMP40]], <i1 true, i1 true, i1 true, i1 true>
2020 ; AVX1-NEXT: [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true>
2021 ; AVX1-NEXT: [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true>
2022 ; AVX1-NEXT: [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true>
2023 ; AVX1-NEXT: [[TMP52:%.*]] = and <4 x i1> [[TMP48]], [[TMP28]]
2024 ; AVX1-NEXT: [[TMP53:%.*]] = and <4 x i1> [[TMP49]], [[TMP29]]
2025 ; AVX1-NEXT: [[TMP54:%.*]] = and <4 x i1> [[TMP50]], [[TMP30]]
2026 ; AVX1-NEXT: [[TMP55:%.*]] = and <4 x i1> [[TMP51]], [[TMP31]]
2027 ; AVX1-NEXT: [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
2028 ; AVX1-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>*
2029 ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]])
2030 ; AVX1-NEXT: [[TMP58:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 4
2031 ; AVX1-NEXT: [[TMP59:%.*]] = bitcast double* [[TMP58]] to <4 x double>*
2032 ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP59]], i32 8, <4 x i1> [[TMP53]])
2033 ; AVX1-NEXT: [[TMP60:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 8
2034 ; AVX1-NEXT: [[TMP61:%.*]] = bitcast double* [[TMP60]] to <4 x double>*
2035 ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP61]], i32 8, <4 x i1> [[TMP54]])
2036 ; AVX1-NEXT: [[TMP62:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 12
2037 ; AVX1-NEXT: [[TMP63:%.*]] = bitcast double* [[TMP62]] to <4 x double>*
2038 ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP63]], i32 8, <4 x i1> [[TMP55]])
2039 ; AVX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
2040 ; AVX1-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2041 ; AVX1-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !41
2042 ; AVX1: middle.block:
2043 ; AVX1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
2044 ; AVX1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
2046 ; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
2047 ; AVX1-NEXT: br label [[FOR_BODY:%.*]]
2049 ; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
2050 ; AVX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[INDVARS_IV]]
2051 ; AVX1-NEXT: [[TMP65:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
2052 ; AVX1-NEXT: [[TMP66:%.*]] = and i8 [[TMP65]], 1
2053 ; AVX1-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP66]], 0
2054 ; AVX1-NEXT: br i1 [[TOBOOL]], label [[FOR_INC]], label [[LAND_LHS_TRUE:%.*]]
2055 ; AVX1: land.lhs.true:
2056 ; AVX1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[INDVARS_IV]]
2057 ; AVX1-NEXT: [[TMP67:%.*]] = load double*, double** [[ARRAYIDX2]], align 8
2058 ; AVX1-NEXT: [[CMP3:%.*]] = icmp eq double* [[TMP67]], null
2059 ; AVX1-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[IF_THEN:%.*]]
2061 ; AVX1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[INDVARS_IV]]
2062 ; AVX1-NEXT: store double 5.000000e-01, double* [[ARRAYIDX5]], align 8
2063 ; AVX1-NEXT: br label [[FOR_INC]]
2065 ; AVX1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
2066 ; AVX1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
2067 ; AVX1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !42
2068 ; AVX1: for.end.loopexit:
2069 ; AVX1-NEXT: br label [[FOR_END]]
2071 ; AVX1-NEXT: ret void
2073 ; AVX2-LABEL: @foo7(
2075 ; AVX2-NEXT: [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0
2076 ; AVX2-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
2077 ; AVX2: for.body.preheader:
2078 ; AVX2-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64
2079 ; AVX2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16
2080 ; AVX2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2082 ; AVX2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 16
2083 ; AVX2-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
2084 ; AVX2-NEXT: br label [[VECTOR_BODY:%.*]]
2085 ; AVX2: vector.body:
2086 ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2087 ; AVX2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0
2088 ; AVX2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
2089 ; AVX2-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
2090 ; AVX2-NEXT: [[INDUCTION1:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 4, i64 5, i64 6, i64 7>
2091 ; AVX2-NEXT: [[INDUCTION2:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 8, i64 9, i64 10, i64 11>
2092 ; AVX2-NEXT: [[INDUCTION3:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 12, i64 13, i64 14, i64 15>
2093 ; AVX2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
2094 ; AVX2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4
2095 ; AVX2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8
2096 ; AVX2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12
2097 ; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER:%.*]], i64 [[TMP0]]
2098 ; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP1]]
2099 ; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP2]]
2100 ; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP3]]
2101 ; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0
2102 ; AVX2-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <4 x i8>*
2103 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP9]], align 1
2104 ; AVX2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 4
2105 ; AVX2-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <4 x i8>*
2106 ; AVX2-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, <4 x i8>* [[TMP11]], align 1
2107 ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 8
2108 ; AVX2-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <4 x i8>*
2109 ; AVX2-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP13]], align 1
2110 ; AVX2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 12
2111 ; AVX2-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <4 x i8>*
2112 ; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, <4 x i8>* [[TMP15]], align 1
2113 ; AVX2-NEXT: [[TMP16:%.*]] = and <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1>
2114 ; AVX2-NEXT: [[TMP17:%.*]] = and <4 x i8> [[WIDE_LOAD4]], <i8 1, i8 1, i8 1, i8 1>
2115 ; AVX2-NEXT: [[TMP18:%.*]] = and <4 x i8> [[WIDE_LOAD5]], <i8 1, i8 1, i8 1, i8 1>
2116 ; AVX2-NEXT: [[TMP19:%.*]] = and <4 x i8> [[WIDE_LOAD6]], <i8 1, i8 1, i8 1, i8 1>
2117 ; AVX2-NEXT: [[TMP20:%.*]] = icmp eq <4 x i8> [[TMP16]], zeroinitializer
2118 ; AVX2-NEXT: [[TMP21:%.*]] = icmp eq <4 x i8> [[TMP17]], zeroinitializer
2119 ; AVX2-NEXT: [[TMP22:%.*]] = icmp eq <4 x i8> [[TMP18]], zeroinitializer
2120 ; AVX2-NEXT: [[TMP23:%.*]] = icmp eq <4 x i8> [[TMP19]], zeroinitializer
2121 ; AVX2-NEXT: [[TMP24:%.*]] = getelementptr inbounds double*, double** [[IN:%.*]], i64 [[TMP0]]
2122 ; AVX2-NEXT: [[TMP25:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[TMP1]]
2123 ; AVX2-NEXT: [[TMP26:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[TMP2]]
2124 ; AVX2-NEXT: [[TMP27:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[TMP3]]
2125 ; AVX2-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP20]], <i1 true, i1 true, i1 true, i1 true>
2126 ; AVX2-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP21]], <i1 true, i1 true, i1 true, i1 true>
2127 ; AVX2-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP22]], <i1 true, i1 true, i1 true, i1 true>
2128 ; AVX2-NEXT: [[TMP31:%.*]] = xor <4 x i1> [[TMP23]], <i1 true, i1 true, i1 true, i1 true>
2129 ; AVX2-NEXT: [[TMP32:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 0
2130 ; AVX2-NEXT: [[TMP33:%.*]] = bitcast double** [[TMP32]] to <4 x double*>*
2131 ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP33]], i32 8, <4 x i1> [[TMP28]], <4 x double*> undef)
2132 ; AVX2-NEXT: [[TMP34:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 4
2133 ; AVX2-NEXT: [[TMP35:%.*]] = bitcast double** [[TMP34]] to <4 x double*>*
2134 ; AVX2-NEXT: [[WIDE_MASKED_LOAD7:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP35]], i32 8, <4 x i1> [[TMP29]], <4 x double*> undef)
2135 ; AVX2-NEXT: [[TMP36:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 8
2136 ; AVX2-NEXT: [[TMP37:%.*]] = bitcast double** [[TMP36]] to <4 x double*>*
2137 ; AVX2-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP37]], i32 8, <4 x i1> [[TMP30]], <4 x double*> undef)
2138 ; AVX2-NEXT: [[TMP38:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 12
2139 ; AVX2-NEXT: [[TMP39:%.*]] = bitcast double** [[TMP38]] to <4 x double*>*
2140 ; AVX2-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP39]], i32 8, <4 x i1> [[TMP31]], <4 x double*> undef)
2141 ; AVX2-NEXT: [[TMP40:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD]], zeroinitializer
2142 ; AVX2-NEXT: [[TMP41:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD7]], zeroinitializer
2143 ; AVX2-NEXT: [[TMP42:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD8]], zeroinitializer
2144 ; AVX2-NEXT: [[TMP43:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD9]], zeroinitializer
2145 ; AVX2-NEXT: [[TMP44:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], i64 [[TMP0]]
2146 ; AVX2-NEXT: [[TMP45:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP1]]
2147 ; AVX2-NEXT: [[TMP46:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP2]]
2148 ; AVX2-NEXT: [[TMP47:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP3]]
2149 ; AVX2-NEXT: [[TMP48:%.*]] = xor <4 x i1> [[TMP40]], <i1 true, i1 true, i1 true, i1 true>
2150 ; AVX2-NEXT: [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true>
2151 ; AVX2-NEXT: [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true>
2152 ; AVX2-NEXT: [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true>
2153 ; AVX2-NEXT: [[TMP52:%.*]] = and <4 x i1> [[TMP48]], [[TMP28]]
2154 ; AVX2-NEXT: [[TMP53:%.*]] = and <4 x i1> [[TMP49]], [[TMP29]]
2155 ; AVX2-NEXT: [[TMP54:%.*]] = and <4 x i1> [[TMP50]], [[TMP30]]
2156 ; AVX2-NEXT: [[TMP55:%.*]] = and <4 x i1> [[TMP51]], [[TMP31]]
2157 ; AVX2-NEXT: [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
2158 ; AVX2-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>*
2159 ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]])
2160 ; AVX2-NEXT: [[TMP58:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 4
2161 ; AVX2-NEXT: [[TMP59:%.*]] = bitcast double* [[TMP58]] to <4 x double>*
2162 ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP59]], i32 8, <4 x i1> [[TMP53]])
2163 ; AVX2-NEXT: [[TMP60:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 8
2164 ; AVX2-NEXT: [[TMP61:%.*]] = bitcast double* [[TMP60]] to <4 x double>*
2165 ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP61]], i32 8, <4 x i1> [[TMP54]])
2166 ; AVX2-NEXT: [[TMP62:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 12
2167 ; AVX2-NEXT: [[TMP63:%.*]] = bitcast double* [[TMP62]] to <4 x double>*
2168 ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP63]], i32 8, <4 x i1> [[TMP55]])
2169 ; AVX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
2170 ; AVX2-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2171 ; AVX2-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !51
2172 ; AVX2: middle.block:
2173 ; AVX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
2174 ; AVX2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
2176 ; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
2177 ; AVX2-NEXT: br label [[FOR_BODY:%.*]]
2179 ; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
2180 ; AVX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[INDVARS_IV]]
2181 ; AVX2-NEXT: [[TMP65:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
2182 ; AVX2-NEXT: [[TMP66:%.*]] = and i8 [[TMP65]], 1
2183 ; AVX2-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP66]], 0
2184 ; AVX2-NEXT: br i1 [[TOBOOL]], label [[FOR_INC]], label [[LAND_LHS_TRUE:%.*]]
2185 ; AVX2: land.lhs.true:
2186 ; AVX2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[INDVARS_IV]]
2187 ; AVX2-NEXT: [[TMP67:%.*]] = load double*, double** [[ARRAYIDX2]], align 8
2188 ; AVX2-NEXT: [[CMP3:%.*]] = icmp eq double* [[TMP67]], null
2189 ; AVX2-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[IF_THEN:%.*]]
2191 ; AVX2-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[INDVARS_IV]]
2192 ; AVX2-NEXT: store double 5.000000e-01, double* [[ARRAYIDX5]], align 8
2193 ; AVX2-NEXT: br label [[FOR_INC]]
2195 ; AVX2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
2196 ; AVX2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
2197 ; AVX2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !52
2198 ; AVX2: for.end.loopexit:
2199 ; AVX2-NEXT: br label [[FOR_END]]
2201 ; AVX2-NEXT: ret void
2203 ; AVX512-LABEL: @foo7(
2204 ; AVX512-NEXT: entry:
2205 ; AVX512-NEXT: [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0
2206 ; AVX512-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
2207 ; AVX512: for.body.preheader:
2208 ; AVX512-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64
2209 ; AVX512-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32
2210 ; AVX512-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2211 ; AVX512: vector.ph:
2212 ; AVX512-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 32
2213 ; AVX512-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
2214 ; AVX512-NEXT: br label [[VECTOR_BODY:%.*]]
2215 ; AVX512: vector.body:
2216 ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2217 ; AVX512-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0
2218 ; AVX512-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer
2219 ; AVX512-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
2220 ; AVX512-NEXT: [[INDUCTION1:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
2221 ; AVX512-NEXT: [[INDUCTION2:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23>
2222 ; AVX512-NEXT: [[INDUCTION3:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>
2223 ; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
2224 ; AVX512-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8
2225 ; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 16
2226 ; AVX512-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 24
2227 ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER:%.*]], i64 [[TMP0]]
2228 ; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP1]]
2229 ; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP2]]
2230 ; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP3]]
2231 ; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0
2232 ; AVX512-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <8 x i8>*
2233 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, <8 x i8>* [[TMP9]], align 1
2234 ; AVX512-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 8
2235 ; AVX512-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <8 x i8>*
2236 ; AVX512-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, <8 x i8>* [[TMP11]], align 1
2237 ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 16
2238 ; AVX512-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <8 x i8>*
2239 ; AVX512-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i8>, <8 x i8>* [[TMP13]], align 1
2240 ; AVX512-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 24
2241 ; AVX512-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <8 x i8>*
2242 ; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i8>, <8 x i8>* [[TMP15]], align 1
2243 ; AVX512-NEXT: [[TMP16:%.*]] = and <8 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
2244 ; AVX512-NEXT: [[TMP17:%.*]] = and <8 x i8> [[WIDE_LOAD4]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
2245 ; AVX512-NEXT: [[TMP18:%.*]] = and <8 x i8> [[WIDE_LOAD5]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
2246 ; AVX512-NEXT: [[TMP19:%.*]] = and <8 x i8> [[WIDE_LOAD6]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
2247 ; AVX512-NEXT: [[TMP20:%.*]] = icmp eq <8 x i8> [[TMP16]], zeroinitializer
2248 ; AVX512-NEXT: [[TMP21:%.*]] = icmp eq <8 x i8> [[TMP17]], zeroinitializer
2249 ; AVX512-NEXT: [[TMP22:%.*]] = icmp eq <8 x i8> [[TMP18]], zeroinitializer
2250 ; AVX512-NEXT: [[TMP23:%.*]] = icmp eq <8 x i8> [[TMP19]], zeroinitializer
2251 ; AVX512-NEXT: [[TMP24:%.*]] = getelementptr inbounds double*, double** [[IN:%.*]], i64 [[TMP0]]
2252 ; AVX512-NEXT: [[TMP25:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[TMP1]]
2253 ; AVX512-NEXT: [[TMP26:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[TMP2]]
2254 ; AVX512-NEXT: [[TMP27:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[TMP3]]
2255 ; AVX512-NEXT: [[TMP28:%.*]] = xor <8 x i1> [[TMP20]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
2256 ; AVX512-NEXT: [[TMP29:%.*]] = xor <8 x i1> [[TMP21]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
2257 ; AVX512-NEXT: [[TMP30:%.*]] = xor <8 x i1> [[TMP22]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
2258 ; AVX512-NEXT: [[TMP31:%.*]] = xor <8 x i1> [[TMP23]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
2259 ; AVX512-NEXT: [[TMP32:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 0
2260 ; AVX512-NEXT: [[TMP33:%.*]] = bitcast double** [[TMP32]] to <8 x double*>*
2261 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>* [[TMP33]], i32 8, <8 x i1> [[TMP28]], <8 x double*> undef)
2262 ; AVX512-NEXT: [[TMP34:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 8
2263 ; AVX512-NEXT: [[TMP35:%.*]] = bitcast double** [[TMP34]] to <8 x double*>*
2264 ; AVX512-NEXT: [[WIDE_MASKED_LOAD7:%.*]] = call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>* [[TMP35]], i32 8, <8 x i1> [[TMP29]], <8 x double*> undef)
2265 ; AVX512-NEXT: [[TMP36:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 16
2266 ; AVX512-NEXT: [[TMP37:%.*]] = bitcast double** [[TMP36]] to <8 x double*>*
2267 ; AVX512-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>* [[TMP37]], i32 8, <8 x i1> [[TMP30]], <8 x double*> undef)
2268 ; AVX512-NEXT: [[TMP38:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 24
2269 ; AVX512-NEXT: [[TMP39:%.*]] = bitcast double** [[TMP38]] to <8 x double*>*
2270 ; AVX512-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>* [[TMP39]], i32 8, <8 x i1> [[TMP31]], <8 x double*> undef)
2271 ; AVX512-NEXT: [[TMP40:%.*]] = icmp eq <8 x double*> [[WIDE_MASKED_LOAD]], zeroinitializer
2272 ; AVX512-NEXT: [[TMP41:%.*]] = icmp eq <8 x double*> [[WIDE_MASKED_LOAD7]], zeroinitializer
2273 ; AVX512-NEXT: [[TMP42:%.*]] = icmp eq <8 x double*> [[WIDE_MASKED_LOAD8]], zeroinitializer
2274 ; AVX512-NEXT: [[TMP43:%.*]] = icmp eq <8 x double*> [[WIDE_MASKED_LOAD9]], zeroinitializer
2275 ; AVX512-NEXT: [[TMP44:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], i64 [[TMP0]]
2276 ; AVX512-NEXT: [[TMP45:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP1]]
2277 ; AVX512-NEXT: [[TMP46:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP2]]
2278 ; AVX512-NEXT: [[TMP47:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP3]]
2279 ; AVX512-NEXT: [[TMP48:%.*]] = xor <8 x i1> [[TMP40]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
2280 ; AVX512-NEXT: [[TMP49:%.*]] = xor <8 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
2281 ; AVX512-NEXT: [[TMP50:%.*]] = xor <8 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
2282 ; AVX512-NEXT: [[TMP51:%.*]] = xor <8 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
2283 ; AVX512-NEXT: [[TMP52:%.*]] = and <8 x i1> [[TMP48]], [[TMP28]]
2284 ; AVX512-NEXT: [[TMP53:%.*]] = and <8 x i1> [[TMP49]], [[TMP29]]
2285 ; AVX512-NEXT: [[TMP54:%.*]] = and <8 x i1> [[TMP50]], [[TMP30]]
2286 ; AVX512-NEXT: [[TMP55:%.*]] = and <8 x i1> [[TMP51]], [[TMP31]]
2287 ; AVX512-NEXT: [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
2288 ; AVX512-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <8 x double>*
2289 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <8 x double>* [[TMP57]], i32 8, <8 x i1> [[TMP52]])
2290 ; AVX512-NEXT: [[TMP58:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 8
2291 ; AVX512-NEXT: [[TMP59:%.*]] = bitcast double* [[TMP58]] to <8 x double>*
2292 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <8 x double>* [[TMP59]], i32 8, <8 x i1> [[TMP53]])
2293 ; AVX512-NEXT: [[TMP60:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 16
2294 ; AVX512-NEXT: [[TMP61:%.*]] = bitcast double* [[TMP60]] to <8 x double>*
2295 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <8 x double>* [[TMP61]], i32 8, <8 x i1> [[TMP54]])
2296 ; AVX512-NEXT: [[TMP62:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 24
2297 ; AVX512-NEXT: [[TMP63:%.*]] = bitcast double* [[TMP62]] to <8 x double>*
2298 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <8 x double>* [[TMP63]], i32 8, <8 x i1> [[TMP55]])
2299 ; AVX512-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 32
2300 ; AVX512-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2301 ; AVX512-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !61
2302 ; AVX512: middle.block:
2303 ; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
2304 ; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
2305 ; AVX512: scalar.ph:
2306 ; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
2307 ; AVX512-NEXT: br label [[FOR_BODY:%.*]]
2309 ; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
2310 ; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[INDVARS_IV]]
2311 ; AVX512-NEXT: [[TMP65:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
2312 ; AVX512-NEXT: [[TMP66:%.*]] = and i8 [[TMP65]], 1
2313 ; AVX512-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP66]], 0
2314 ; AVX512-NEXT: br i1 [[TOBOOL]], label [[FOR_INC]], label [[LAND_LHS_TRUE:%.*]]
2315 ; AVX512: land.lhs.true:
2316 ; AVX512-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[INDVARS_IV]]
2317 ; AVX512-NEXT: [[TMP67:%.*]] = load double*, double** [[ARRAYIDX2]], align 8
2318 ; AVX512-NEXT: [[CMP3:%.*]] = icmp eq double* [[TMP67]], null
2319 ; AVX512-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[IF_THEN:%.*]]
2321 ; AVX512-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[INDVARS_IV]]
2322 ; AVX512-NEXT: store double 5.000000e-01, double* [[ARRAYIDX5]], align 8
2323 ; AVX512-NEXT: br label [[FOR_INC]]
2325 ; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
2326 ; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
2327 ; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !62
2328 ; AVX512: for.end.loopexit:
2329 ; AVX512-NEXT: br label [[FOR_END]]
2331 ; AVX512-NEXT: ret void
2334 %cmp5 = icmp eq i32 %size, 0
2335 br i1 %cmp5, label %for.end, label %for.body.preheader
2337 for.body.preheader: ; preds = %entry
2338 %wide.trip.count = zext i32 %size to i64
2341 for.body: ; preds = %for.inc, %for.body.preheader
2342 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
2343 %arrayidx = getelementptr inbounds i8, i8* %trigger, i64 %indvars.iv
2344 %0 = load i8, i8* %arrayidx, align 1
2346 %tobool = icmp eq i8 %1, 0
2347 br i1 %tobool, label %for.inc, label %land.lhs.true
2349 land.lhs.true: ; preds = %for.body
2350 %arrayidx2 = getelementptr inbounds double*, double** %in, i64 %indvars.iv
2351 %2 = load double*, double** %arrayidx2, align 8
2352 %cmp3 = icmp eq double* %2, null
2353 br i1 %cmp3, label %for.inc, label %if.then
2355 if.then: ; preds = %land.lhs.true
2356 %arrayidx5 = getelementptr inbounds double, double* %out, i64 %indvars.iv
2357 store double 5.000000e-01, double* %arrayidx5, align 8
2360 for.inc: ; preds = %land.lhs.true, %for.body, %if.then
2361 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2362 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
2363 br i1 %exitcond, label %for.end, label %for.body
2365 for.end: ; preds = %for.inc, %entry
2369 ;typedef int (*fp)();
2370 ;void foo8 (double* __restrict__ out, fp* __restrict__ in, bool * __restrict__ trigger, unsigned size) {
2372 ; for (unsigned i=0; i<size; i++)
2373 ; if (trigger[i] && (in[i] != 0))
2374 ; out[i] = (double) 0.5;
2377 define void @foo8(double* noalias nocapture %out, i32 ()** noalias nocapture readonly %in, i8* noalias nocapture readonly %trigger, i32 %size) local_unnamed_addr #0 {
2378 ; AVX1-LABEL: @foo8(
2380 ; AVX1-NEXT: [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0
2381 ; AVX1-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
2382 ; AVX1: for.body.preheader:
2383 ; AVX1-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64
2384 ; AVX1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16
2385 ; AVX1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2387 ; AVX1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 16
2388 ; AVX1-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
2389 ; AVX1-NEXT: br label [[VECTOR_BODY:%.*]]
2390 ; AVX1: vector.body:
2391 ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2392 ; AVX1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0
2393 ; AVX1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
2394 ; AVX1-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
2395 ; AVX1-NEXT: [[INDUCTION1:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 4, i64 5, i64 6, i64 7>
2396 ; AVX1-NEXT: [[INDUCTION2:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 8, i64 9, i64 10, i64 11>
2397 ; AVX1-NEXT: [[INDUCTION3:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 12, i64 13, i64 14, i64 15>
2398 ; AVX1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
2399 ; AVX1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4
2400 ; AVX1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8
2401 ; AVX1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12
2402 ; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER:%.*]], i64 [[TMP0]]
2403 ; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP1]]
2404 ; AVX1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP2]]
2405 ; AVX1-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP3]]
2406 ; AVX1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0
2407 ; AVX1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <4 x i8>*
2408 ; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP9]], align 1
2409 ; AVX1-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 4
2410 ; AVX1-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <4 x i8>*
2411 ; AVX1-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, <4 x i8>* [[TMP11]], align 1
2412 ; AVX1-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 8
2413 ; AVX1-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <4 x i8>*
2414 ; AVX1-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP13]], align 1
2415 ; AVX1-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 12
2416 ; AVX1-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <4 x i8>*
2417 ; AVX1-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, <4 x i8>* [[TMP15]], align 1
2418 ; AVX1-NEXT: [[TMP16:%.*]] = and <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1>
2419 ; AVX1-NEXT: [[TMP17:%.*]] = and <4 x i8> [[WIDE_LOAD4]], <i8 1, i8 1, i8 1, i8 1>
2420 ; AVX1-NEXT: [[TMP18:%.*]] = and <4 x i8> [[WIDE_LOAD5]], <i8 1, i8 1, i8 1, i8 1>
2421 ; AVX1-NEXT: [[TMP19:%.*]] = and <4 x i8> [[WIDE_LOAD6]], <i8 1, i8 1, i8 1, i8 1>
2422 ; AVX1-NEXT: [[TMP20:%.*]] = icmp eq <4 x i8> [[TMP16]], zeroinitializer
2423 ; AVX1-NEXT: [[TMP21:%.*]] = icmp eq <4 x i8> [[TMP17]], zeroinitializer
2424 ; AVX1-NEXT: [[TMP22:%.*]] = icmp eq <4 x i8> [[TMP18]], zeroinitializer
2425 ; AVX1-NEXT: [[TMP23:%.*]] = icmp eq <4 x i8> [[TMP19]], zeroinitializer
2426 ; AVX1-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN:%.*]], i64 [[TMP0]]
2427 ; AVX1-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[TMP1]]
2428 ; AVX1-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[TMP2]]
2429 ; AVX1-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[TMP3]]
2430 ; AVX1-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP20]], <i1 true, i1 true, i1 true, i1 true>
2431 ; AVX1-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP21]], <i1 true, i1 true, i1 true, i1 true>
2432 ; AVX1-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP22]], <i1 true, i1 true, i1 true, i1 true>
2433 ; AVX1-NEXT: [[TMP31:%.*]] = xor <4 x i1> [[TMP23]], <i1 true, i1 true, i1 true, i1 true>
2434 ; AVX1-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 0
2435 ; AVX1-NEXT: [[TMP33:%.*]] = bitcast i32 ()** [[TMP32]] to <4 x i32 ()*>*
2436 ; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP33]], i32 8, <4 x i1> [[TMP28]], <4 x i32 ()*> undef)
2437 ; AVX1-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 4
2438 ; AVX1-NEXT: [[TMP35:%.*]] = bitcast i32 ()** [[TMP34]] to <4 x i32 ()*>*
2439 ; AVX1-NEXT: [[WIDE_MASKED_LOAD7:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP35]], i32 8, <4 x i1> [[TMP29]], <4 x i32 ()*> undef)
2440 ; AVX1-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 8
2441 ; AVX1-NEXT: [[TMP37:%.*]] = bitcast i32 ()** [[TMP36]] to <4 x i32 ()*>*
2442 ; AVX1-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP37]], i32 8, <4 x i1> [[TMP30]], <4 x i32 ()*> undef)
2443 ; AVX1-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 12
2444 ; AVX1-NEXT: [[TMP39:%.*]] = bitcast i32 ()** [[TMP38]] to <4 x i32 ()*>*
2445 ; AVX1-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP39]], i32 8, <4 x i1> [[TMP31]], <4 x i32 ()*> undef)
2446 ; AVX1-NEXT: [[TMP40:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD]], zeroinitializer
2447 ; AVX1-NEXT: [[TMP41:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD7]], zeroinitializer
2448 ; AVX1-NEXT: [[TMP42:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD8]], zeroinitializer
2449 ; AVX1-NEXT: [[TMP43:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD9]], zeroinitializer
2450 ; AVX1-NEXT: [[TMP44:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], i64 [[TMP0]]
2451 ; AVX1-NEXT: [[TMP45:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP1]]
2452 ; AVX1-NEXT: [[TMP46:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP2]]
2453 ; AVX1-NEXT: [[TMP47:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP3]]
2454 ; AVX1-NEXT: [[TMP48:%.*]] = xor <4 x i1> [[TMP40]], <i1 true, i1 true, i1 true, i1 true>
2455 ; AVX1-NEXT: [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true>
2456 ; AVX1-NEXT: [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true>
2457 ; AVX1-NEXT: [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true>
2458 ; AVX1-NEXT: [[TMP52:%.*]] = and <4 x i1> [[TMP48]], [[TMP28]]
2459 ; AVX1-NEXT: [[TMP53:%.*]] = and <4 x i1> [[TMP49]], [[TMP29]]
2460 ; AVX1-NEXT: [[TMP54:%.*]] = and <4 x i1> [[TMP50]], [[TMP30]]
2461 ; AVX1-NEXT: [[TMP55:%.*]] = and <4 x i1> [[TMP51]], [[TMP31]]
2462 ; AVX1-NEXT: [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
2463 ; AVX1-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>*
2464 ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]])
2465 ; AVX1-NEXT: [[TMP58:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 4
2466 ; AVX1-NEXT: [[TMP59:%.*]] = bitcast double* [[TMP58]] to <4 x double>*
2467 ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP59]], i32 8, <4 x i1> [[TMP53]])
2468 ; AVX1-NEXT: [[TMP60:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 8
2469 ; AVX1-NEXT: [[TMP61:%.*]] = bitcast double* [[TMP60]] to <4 x double>*
2470 ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP61]], i32 8, <4 x i1> [[TMP54]])
2471 ; AVX1-NEXT: [[TMP62:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 12
2472 ; AVX1-NEXT: [[TMP63:%.*]] = bitcast double* [[TMP62]] to <4 x double>*
2473 ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP63]], i32 8, <4 x i1> [[TMP55]])
2474 ; AVX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
2475 ; AVX1-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2476 ; AVX1-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !44
2477 ; AVX1: middle.block:
2478 ; AVX1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
2479 ; AVX1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
2481 ; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
2482 ; AVX1-NEXT: br label [[FOR_BODY:%.*]]
2484 ; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
2485 ; AVX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[INDVARS_IV]]
2486 ; AVX1-NEXT: [[TMP65:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
2487 ; AVX1-NEXT: [[TMP66:%.*]] = and i8 [[TMP65]], 1
2488 ; AVX1-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP66]], 0
2489 ; AVX1-NEXT: br i1 [[TOBOOL]], label [[FOR_INC]], label [[LAND_LHS_TRUE:%.*]]
2490 ; AVX1: land.lhs.true:
2491 ; AVX1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[INDVARS_IV]]
2492 ; AVX1-NEXT: [[TMP67:%.*]] = load i32 ()*, i32 ()** [[ARRAYIDX2]], align 8
2493 ; AVX1-NEXT: [[CMP3:%.*]] = icmp eq i32 ()* [[TMP67]], null
2494 ; AVX1-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[IF_THEN:%.*]]
2496 ; AVX1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[INDVARS_IV]]
2497 ; AVX1-NEXT: store double 5.000000e-01, double* [[ARRAYIDX5]], align 8
2498 ; AVX1-NEXT: br label [[FOR_INC]]
2500 ; AVX1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
2501 ; AVX1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
2502 ; AVX1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !45
2503 ; AVX1: for.end.loopexit:
2504 ; AVX1-NEXT: br label [[FOR_END]]
2506 ; AVX1-NEXT: ret void
2508 ; AVX2-LABEL: @foo8(
2510 ; AVX2-NEXT: [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0
2511 ; AVX2-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
2512 ; AVX2: for.body.preheader:
2513 ; AVX2-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64
2514 ; AVX2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16
2515 ; AVX2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2517 ; AVX2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 16
2518 ; AVX2-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
2519 ; AVX2-NEXT: br label [[VECTOR_BODY:%.*]]
2520 ; AVX2: vector.body:
2521 ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2522 ; AVX2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0
2523 ; AVX2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
2524 ; AVX2-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
2525 ; AVX2-NEXT: [[INDUCTION1:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 4, i64 5, i64 6, i64 7>
2526 ; AVX2-NEXT: [[INDUCTION2:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 8, i64 9, i64 10, i64 11>
2527 ; AVX2-NEXT: [[INDUCTION3:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 12, i64 13, i64 14, i64 15>
2528 ; AVX2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
2529 ; AVX2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4
2530 ; AVX2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8
2531 ; AVX2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12
2532 ; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER:%.*]], i64 [[TMP0]]
2533 ; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP1]]
2534 ; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP2]]
2535 ; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP3]]
2536 ; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0
2537 ; AVX2-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <4 x i8>*
2538 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP9]], align 1
2539 ; AVX2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 4
2540 ; AVX2-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <4 x i8>*
2541 ; AVX2-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, <4 x i8>* [[TMP11]], align 1
2542 ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 8
2543 ; AVX2-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <4 x i8>*
2544 ; AVX2-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP13]], align 1
2545 ; AVX2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 12
2546 ; AVX2-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <4 x i8>*
2547 ; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, <4 x i8>* [[TMP15]], align 1
2548 ; AVX2-NEXT: [[TMP16:%.*]] = and <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1>
2549 ; AVX2-NEXT: [[TMP17:%.*]] = and <4 x i8> [[WIDE_LOAD4]], <i8 1, i8 1, i8 1, i8 1>
2550 ; AVX2-NEXT: [[TMP18:%.*]] = and <4 x i8> [[WIDE_LOAD5]], <i8 1, i8 1, i8 1, i8 1>
2551 ; AVX2-NEXT: [[TMP19:%.*]] = and <4 x i8> [[WIDE_LOAD6]], <i8 1, i8 1, i8 1, i8 1>
2552 ; AVX2-NEXT: [[TMP20:%.*]] = icmp eq <4 x i8> [[TMP16]], zeroinitializer
2553 ; AVX2-NEXT: [[TMP21:%.*]] = icmp eq <4 x i8> [[TMP17]], zeroinitializer
2554 ; AVX2-NEXT: [[TMP22:%.*]] = icmp eq <4 x i8> [[TMP18]], zeroinitializer
2555 ; AVX2-NEXT: [[TMP23:%.*]] = icmp eq <4 x i8> [[TMP19]], zeroinitializer
2556 ; AVX2-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN:%.*]], i64 [[TMP0]]
2557 ; AVX2-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[TMP1]]
2558 ; AVX2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[TMP2]]
2559 ; AVX2-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[TMP3]]
2560 ; AVX2-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP20]], <i1 true, i1 true, i1 true, i1 true>
2561 ; AVX2-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP21]], <i1 true, i1 true, i1 true, i1 true>
2562 ; AVX2-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP22]], <i1 true, i1 true, i1 true, i1 true>
2563 ; AVX2-NEXT: [[TMP31:%.*]] = xor <4 x i1> [[TMP23]], <i1 true, i1 true, i1 true, i1 true>
2564 ; AVX2-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 0
2565 ; AVX2-NEXT: [[TMP33:%.*]] = bitcast i32 ()** [[TMP32]] to <4 x i32 ()*>*
2566 ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP33]], i32 8, <4 x i1> [[TMP28]], <4 x i32 ()*> undef)
2567 ; AVX2-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 4
2568 ; AVX2-NEXT: [[TMP35:%.*]] = bitcast i32 ()** [[TMP34]] to <4 x i32 ()*>*
2569 ; AVX2-NEXT: [[WIDE_MASKED_LOAD7:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP35]], i32 8, <4 x i1> [[TMP29]], <4 x i32 ()*> undef)
2570 ; AVX2-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 8
2571 ; AVX2-NEXT: [[TMP37:%.*]] = bitcast i32 ()** [[TMP36]] to <4 x i32 ()*>*
2572 ; AVX2-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP37]], i32 8, <4 x i1> [[TMP30]], <4 x i32 ()*> undef)
2573 ; AVX2-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 12
2574 ; AVX2-NEXT: [[TMP39:%.*]] = bitcast i32 ()** [[TMP38]] to <4 x i32 ()*>*
2575 ; AVX2-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP39]], i32 8, <4 x i1> [[TMP31]], <4 x i32 ()*> undef)
2576 ; AVX2-NEXT: [[TMP40:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD]], zeroinitializer
2577 ; AVX2-NEXT: [[TMP41:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD7]], zeroinitializer
2578 ; AVX2-NEXT: [[TMP42:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD8]], zeroinitializer
2579 ; AVX2-NEXT: [[TMP43:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD9]], zeroinitializer
2580 ; AVX2-NEXT: [[TMP44:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], i64 [[TMP0]]
2581 ; AVX2-NEXT: [[TMP45:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP1]]
2582 ; AVX2-NEXT: [[TMP46:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP2]]
2583 ; AVX2-NEXT: [[TMP47:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP3]]
2584 ; AVX2-NEXT: [[TMP48:%.*]] = xor <4 x i1> [[TMP40]], <i1 true, i1 true, i1 true, i1 true>
2585 ; AVX2-NEXT: [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true>
2586 ; AVX2-NEXT: [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true>
2587 ; AVX2-NEXT: [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true>
2588 ; AVX2-NEXT: [[TMP52:%.*]] = and <4 x i1> [[TMP48]], [[TMP28]]
2589 ; AVX2-NEXT: [[TMP53:%.*]] = and <4 x i1> [[TMP49]], [[TMP29]]
2590 ; AVX2-NEXT: [[TMP54:%.*]] = and <4 x i1> [[TMP50]], [[TMP30]]
2591 ; AVX2-NEXT: [[TMP55:%.*]] = and <4 x i1> [[TMP51]], [[TMP31]]
2592 ; AVX2-NEXT: [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
2593 ; AVX2-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>*
2594 ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]])
2595 ; AVX2-NEXT: [[TMP58:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 4
2596 ; AVX2-NEXT: [[TMP59:%.*]] = bitcast double* [[TMP58]] to <4 x double>*
2597 ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP59]], i32 8, <4 x i1> [[TMP53]])
2598 ; AVX2-NEXT: [[TMP60:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 8
2599 ; AVX2-NEXT: [[TMP61:%.*]] = bitcast double* [[TMP60]] to <4 x double>*
2600 ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP61]], i32 8, <4 x i1> [[TMP54]])
2601 ; AVX2-NEXT: [[TMP62:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 12
2602 ; AVX2-NEXT: [[TMP63:%.*]] = bitcast double* [[TMP62]] to <4 x double>*
2603 ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP63]], i32 8, <4 x i1> [[TMP55]])
2604 ; AVX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
2605 ; AVX2-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2606 ; AVX2-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !54
2607 ; AVX2: middle.block:
2608 ; AVX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
2609 ; AVX2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
2611 ; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
2612 ; AVX2-NEXT: br label [[FOR_BODY:%.*]]
2614 ; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
2615 ; AVX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[INDVARS_IV]]
2616 ; AVX2-NEXT: [[TMP65:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
2617 ; AVX2-NEXT: [[TMP66:%.*]] = and i8 [[TMP65]], 1
2618 ; AVX2-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP66]], 0
2619 ; AVX2-NEXT: br i1 [[TOBOOL]], label [[FOR_INC]], label [[LAND_LHS_TRUE:%.*]]
2620 ; AVX2: land.lhs.true:
2621 ; AVX2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[INDVARS_IV]]
2622 ; AVX2-NEXT: [[TMP67:%.*]] = load i32 ()*, i32 ()** [[ARRAYIDX2]], align 8
2623 ; AVX2-NEXT: [[CMP3:%.*]] = icmp eq i32 ()* [[TMP67]], null
2624 ; AVX2-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[IF_THEN:%.*]]
2626 ; AVX2-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[INDVARS_IV]]
2627 ; AVX2-NEXT: store double 5.000000e-01, double* [[ARRAYIDX5]], align 8
2628 ; AVX2-NEXT: br label [[FOR_INC]]
2630 ; AVX2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
2631 ; AVX2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
2632 ; AVX2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !55
2633 ; AVX2: for.end.loopexit:
2634 ; AVX2-NEXT: br label [[FOR_END]]
2636 ; AVX2-NEXT: ret void
2638 ; AVX512-LABEL: @foo8(
2639 ; AVX512-NEXT: entry:
2640 ; AVX512-NEXT: [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0
2641 ; AVX512-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
2642 ; AVX512: for.body.preheader:
2643 ; AVX512-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64
2644 ; AVX512-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32
2645 ; AVX512-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2646 ; AVX512: vector.ph:
2647 ; AVX512-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 32
2648 ; AVX512-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
2649 ; AVX512-NEXT: br label [[VECTOR_BODY:%.*]]
2650 ; AVX512: vector.body:
2651 ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2652 ; AVX512-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0
2653 ; AVX512-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer
2654 ; AVX512-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
2655 ; AVX512-NEXT: [[INDUCTION1:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
2656 ; AVX512-NEXT: [[INDUCTION2:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23>
2657 ; AVX512-NEXT: [[INDUCTION3:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>
2658 ; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
2659 ; AVX512-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8
2660 ; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 16
2661 ; AVX512-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 24
2662 ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER:%.*]], i64 [[TMP0]]
2663 ; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP1]]
2664 ; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP2]]
2665 ; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP3]]
2666 ; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0
2667 ; AVX512-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <8 x i8>*
2668 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, <8 x i8>* [[TMP9]], align 1
2669 ; AVX512-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 8
2670 ; AVX512-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <8 x i8>*
2671 ; AVX512-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, <8 x i8>* [[TMP11]], align 1
2672 ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 16
2673 ; AVX512-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <8 x i8>*
2674 ; AVX512-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i8>, <8 x i8>* [[TMP13]], align 1
2675 ; AVX512-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 24
2676 ; AVX512-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <8 x i8>*
2677 ; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i8>, <8 x i8>* [[TMP15]], align 1
2678 ; AVX512-NEXT: [[TMP16:%.*]] = and <8 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
2679 ; AVX512-NEXT: [[TMP17:%.*]] = and <8 x i8> [[WIDE_LOAD4]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
2680 ; AVX512-NEXT: [[TMP18:%.*]] = and <8 x i8> [[WIDE_LOAD5]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
2681 ; AVX512-NEXT: [[TMP19:%.*]] = and <8 x i8> [[WIDE_LOAD6]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
2682 ; AVX512-NEXT: [[TMP20:%.*]] = icmp eq <8 x i8> [[TMP16]], zeroinitializer
2683 ; AVX512-NEXT: [[TMP21:%.*]] = icmp eq <8 x i8> [[TMP17]], zeroinitializer
2684 ; AVX512-NEXT: [[TMP22:%.*]] = icmp eq <8 x i8> [[TMP18]], zeroinitializer
2685 ; AVX512-NEXT: [[TMP23:%.*]] = icmp eq <8 x i8> [[TMP19]], zeroinitializer
2686 ; AVX512-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN:%.*]], i64 [[TMP0]]
2687 ; AVX512-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[TMP1]]
2688 ; AVX512-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[TMP2]]
2689 ; AVX512-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[TMP3]]
2690 ; AVX512-NEXT: [[TMP28:%.*]] = xor <8 x i1> [[TMP20]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
2691 ; AVX512-NEXT: [[TMP29:%.*]] = xor <8 x i1> [[TMP21]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
2692 ; AVX512-NEXT: [[TMP30:%.*]] = xor <8 x i1> [[TMP22]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
2693 ; AVX512-NEXT: [[TMP31:%.*]] = xor <8 x i1> [[TMP23]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
2694 ; AVX512-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 0
2695 ; AVX512-NEXT: [[TMP33:%.*]] = bitcast i32 ()** [[TMP32]] to <8 x i32 ()*>*
2696 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* [[TMP33]], i32 8, <8 x i1> [[TMP28]], <8 x i32 ()*> undef)
2697 ; AVX512-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 8
2698 ; AVX512-NEXT: [[TMP35:%.*]] = bitcast i32 ()** [[TMP34]] to <8 x i32 ()*>*
2699 ; AVX512-NEXT: [[WIDE_MASKED_LOAD7:%.*]] = call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* [[TMP35]], i32 8, <8 x i1> [[TMP29]], <8 x i32 ()*> undef)
2700 ; AVX512-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 16
2701 ; AVX512-NEXT: [[TMP37:%.*]] = bitcast i32 ()** [[TMP36]] to <8 x i32 ()*>*
2702 ; AVX512-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* [[TMP37]], i32 8, <8 x i1> [[TMP30]], <8 x i32 ()*> undef)
2703 ; AVX512-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 24
2704 ; AVX512-NEXT: [[TMP39:%.*]] = bitcast i32 ()** [[TMP38]] to <8 x i32 ()*>*
2705 ; AVX512-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* [[TMP39]], i32 8, <8 x i1> [[TMP31]], <8 x i32 ()*> undef)
2706 ; AVX512-NEXT: [[TMP40:%.*]] = icmp eq <8 x i32 ()*> [[WIDE_MASKED_LOAD]], zeroinitializer
2707 ; AVX512-NEXT: [[TMP41:%.*]] = icmp eq <8 x i32 ()*> [[WIDE_MASKED_LOAD7]], zeroinitializer
2708 ; AVX512-NEXT: [[TMP42:%.*]] = icmp eq <8 x i32 ()*> [[WIDE_MASKED_LOAD8]], zeroinitializer
2709 ; AVX512-NEXT: [[TMP43:%.*]] = icmp eq <8 x i32 ()*> [[WIDE_MASKED_LOAD9]], zeroinitializer
2710 ; AVX512-NEXT: [[TMP44:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], i64 [[TMP0]]
2711 ; AVX512-NEXT: [[TMP45:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP1]]
2712 ; AVX512-NEXT: [[TMP46:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP2]]
2713 ; AVX512-NEXT: [[TMP47:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP3]]
2714 ; AVX512-NEXT: [[TMP48:%.*]] = xor <8 x i1> [[TMP40]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
2715 ; AVX512-NEXT: [[TMP49:%.*]] = xor <8 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
2716 ; AVX512-NEXT: [[TMP50:%.*]] = xor <8 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
2717 ; AVX512-NEXT: [[TMP51:%.*]] = xor <8 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
2718 ; AVX512-NEXT: [[TMP52:%.*]] = and <8 x i1> [[TMP48]], [[TMP28]]
2719 ; AVX512-NEXT: [[TMP53:%.*]] = and <8 x i1> [[TMP49]], [[TMP29]]
2720 ; AVX512-NEXT: [[TMP54:%.*]] = and <8 x i1> [[TMP50]], [[TMP30]]
2721 ; AVX512-NEXT: [[TMP55:%.*]] = and <8 x i1> [[TMP51]], [[TMP31]]
2722 ; AVX512-NEXT: [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
2723 ; AVX512-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <8 x double>*
2724 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <8 x double>* [[TMP57]], i32 8, <8 x i1> [[TMP52]])
2725 ; AVX512-NEXT: [[TMP58:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 8
2726 ; AVX512-NEXT: [[TMP59:%.*]] = bitcast double* [[TMP58]] to <8 x double>*
2727 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <8 x double>* [[TMP59]], i32 8, <8 x i1> [[TMP53]])
2728 ; AVX512-NEXT: [[TMP60:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 16
2729 ; AVX512-NEXT: [[TMP61:%.*]] = bitcast double* [[TMP60]] to <8 x double>*
2730 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <8 x double>* [[TMP61]], i32 8, <8 x i1> [[TMP54]])
2731 ; AVX512-NEXT: [[TMP62:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 24
2732 ; AVX512-NEXT: [[TMP63:%.*]] = bitcast double* [[TMP62]] to <8 x double>*
2733 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <8 x double>* [[TMP63]], i32 8, <8 x i1> [[TMP55]])
2734 ; AVX512-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 32
2735 ; AVX512-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2736 ; AVX512-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !64
2737 ; AVX512: middle.block:
2738 ; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
2739 ; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
2740 ; AVX512: scalar.ph:
2741 ; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
2742 ; AVX512-NEXT: br label [[FOR_BODY:%.*]]
2744 ; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
2745 ; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[INDVARS_IV]]
2746 ; AVX512-NEXT: [[TMP65:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
2747 ; AVX512-NEXT: [[TMP66:%.*]] = and i8 [[TMP65]], 1
2748 ; AVX512-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP66]], 0
2749 ; AVX512-NEXT: br i1 [[TOBOOL]], label [[FOR_INC]], label [[LAND_LHS_TRUE:%.*]]
2750 ; AVX512: land.lhs.true:
2751 ; AVX512-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[INDVARS_IV]]
2752 ; AVX512-NEXT: [[TMP67:%.*]] = load i32 ()*, i32 ()** [[ARRAYIDX2]], align 8
2753 ; AVX512-NEXT: [[CMP3:%.*]] = icmp eq i32 ()* [[TMP67]], null
2754 ; AVX512-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[IF_THEN:%.*]]
2756 ; AVX512-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[INDVARS_IV]]
2757 ; AVX512-NEXT: store double 5.000000e-01, double* [[ARRAYIDX5]], align 8
2758 ; AVX512-NEXT: br label [[FOR_INC]]
2760 ; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
2761 ; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
2762 ; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !65
2763 ; AVX512: for.end.loopexit:
2764 ; AVX512-NEXT: br label [[FOR_END]]
2766 ; AVX512-NEXT: ret void
2769 %cmp5 = icmp eq i32 %size, 0
2770 br i1 %cmp5, label %for.end, label %for.body.preheader
2772 for.body.preheader: ; preds = %entry
2773 %wide.trip.count = zext i32 %size to i64
2776 for.body: ; preds = %for.inc, %for.body.preheader
2777 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
2778 %arrayidx = getelementptr inbounds i8, i8* %trigger, i64 %indvars.iv
2779 %0 = load i8, i8* %arrayidx, align 1
2781 %tobool = icmp eq i8 %1, 0
2782 br i1 %tobool, label %for.inc, label %land.lhs.true
2784 land.lhs.true: ; preds = %for.body
2785 %arrayidx2 = getelementptr inbounds i32 ()*, i32 ()** %in, i64 %indvars.iv
2786 %2 = load i32 ()*, i32 ()** %arrayidx2, align 8
2787 %cmp3 = icmp eq i32 ()* %2, null
2788 br i1 %cmp3, label %for.inc, label %if.then
2790 if.then: ; preds = %land.lhs.true
2791 %arrayidx5 = getelementptr inbounds double, double* %out, i64 %indvars.iv
2792 store double 5.000000e-01, double* %arrayidx5, align 8
2795 for.inc: ; preds = %land.lhs.true, %for.body, %if.then
2796 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2797 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
2798 br i1 %exitcond, label %for.end, label %for.body
2800 for.end: ; preds = %for.inc, %entry
2804 attributes #0 = { norecurse nounwind }