1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2 ; RUN: opt -p loop-vectorize -force-vector-width=2 -S %s | FileCheck --check-prefixes=VF2 %s
3 ; RUN: opt -p loop-vectorize -force-vector-width=4 -S %s | FileCheck --check-prefixes=VF4 %s
5 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
7 define void @test_2xi64(ptr noalias %data, ptr noalias %factor, i64 noundef %n) {
8 ; VF2-LABEL: define void @test_2xi64(
9 ; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]], i64 noundef [[N:%.*]]) {
10 ; VF2-NEXT: [[ENTRY:.*]]:
11 ; VF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
12 ; VF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
14 ; VF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
15 ; VF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
16 ; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
17 ; VF2: [[VECTOR_BODY]]:
18 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
19 ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
20 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
21 ; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]]
22 ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
23 ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8
24 ; VF2-NEXT: [[TMP3:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 1)
25 ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
26 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP4]]
27 ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
28 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]]
29 ; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP5]], align 8
30 ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
31 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
32 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
33 ; VF2-NEXT: [[TMP12:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP11]]
34 ; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
35 ; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP5]], align 8
36 ; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
37 ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP7]], align 8
38 ; VF2-NEXT: [[TMP15:%.*]] = or disjoint <2 x i64> [[TMP3]], splat (i64 1)
39 ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP15]], i32 0
40 ; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP16]]
41 ; VF2-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP15]], i32 1
42 ; VF2-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP18]]
43 ; VF2-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP17]], align 8
44 ; VF2-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP19]], align 8
45 ; VF2-NEXT: [[TMP22:%.*]] = insertelement <2 x i64> poison, i64 [[TMP20]], i32 0
46 ; VF2-NEXT: [[TMP23:%.*]] = insertelement <2 x i64> [[TMP22]], i64 [[TMP21]], i32 1
47 ; VF2-NEXT: [[TMP24:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP23]]
48 ; VF2-NEXT: [[TMP25:%.*]] = extractelement <2 x i64> [[TMP24]], i32 0
49 ; VF2-NEXT: store i64 [[TMP25]], ptr [[TMP17]], align 8
50 ; VF2-NEXT: [[TMP26:%.*]] = extractelement <2 x i64> [[TMP24]], i32 1
51 ; VF2-NEXT: store i64 [[TMP26]], ptr [[TMP19]], align 8
52 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
53 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
54 ; VF2-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
55 ; VF2-NEXT: br i1 [[TMP27]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
56 ; VF2: [[MIDDLE_BLOCK]]:
57 ; VF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
58 ; VF2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
60 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
61 ; VF2-NEXT: br label %[[LOOP:.*]]
63 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
64 ; VF2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[IV]]
65 ; VF2-NEXT: [[L_FACTOR:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
66 ; VF2-NEXT: [[TMP28:%.*]] = shl nsw i64 [[IV]], 1
67 ; VF2-NEXT: [[DATA_0:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP28]]
68 ; VF2-NEXT: [[L_0:%.*]] = load i64, ptr [[DATA_0]], align 8
69 ; VF2-NEXT: [[MUL_0:%.*]] = mul i64 [[L_FACTOR]], [[L_0]]
70 ; VF2-NEXT: store i64 [[MUL_0]], ptr [[DATA_0]], align 8
71 ; VF2-NEXT: [[TMP29:%.*]] = or disjoint i64 [[TMP28]], 1
72 ; VF2-NEXT: [[DATA_1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP29]]
73 ; VF2-NEXT: [[L_1:%.*]] = load i64, ptr [[DATA_1]], align 8
74 ; VF2-NEXT: [[MUL_1:%.*]] = mul i64 [[L_FACTOR]], [[L_1]]
75 ; VF2-NEXT: store i64 [[MUL_1]], ptr [[DATA_1]], align 8
76 ; VF2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
77 ; VF2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
78 ; VF2-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
82 ; VF4-LABEL: define void @test_2xi64(
83 ; VF4-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]], i64 noundef [[N:%.*]]) {
84 ; VF4-NEXT: [[ENTRY:.*]]:
85 ; VF4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
86 ; VF4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
88 ; VF4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
89 ; VF4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
90 ; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
91 ; VF4: [[VECTOR_BODY]]:
92 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
93 ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
94 ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
95 ; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]]
96 ; VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
97 ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
98 ; VF4-NEXT: [[TMP3:%.*]] = shl nsw <4 x i64> [[VEC_IND]], splat (i64 1)
99 ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
100 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP4]]
101 ; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
102 ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]]
103 ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
104 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP8]]
105 ; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
106 ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP10]]
107 ; VF4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP5]], align 8
108 ; VF4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP7]], align 8
109 ; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP9]], align 8
110 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
111 ; VF4-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> poison, i64 [[TMP12]], i32 0
112 ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1
113 ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2
114 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3
115 ; VF4-NEXT: [[TMP20:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[TMP19]]
116 ; VF4-NEXT: [[TMP21:%.*]] = extractelement <4 x i64> [[TMP20]], i32 0
117 ; VF4-NEXT: store i64 [[TMP21]], ptr [[TMP5]], align 8
118 ; VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP20]], i32 1
119 ; VF4-NEXT: store i64 [[TMP22]], ptr [[TMP7]], align 8
120 ; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP20]], i32 2
121 ; VF4-NEXT: store i64 [[TMP23]], ptr [[TMP9]], align 8
122 ; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP20]], i32 3
123 ; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP11]], align 8
124 ; VF4-NEXT: [[TMP25:%.*]] = or disjoint <4 x i64> [[TMP3]], splat (i64 1)
125 ; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP25]], i32 0
126 ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP26]]
127 ; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP25]], i32 1
128 ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP28]]
129 ; VF4-NEXT: [[TMP30:%.*]] = extractelement <4 x i64> [[TMP25]], i32 2
130 ; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP30]]
131 ; VF4-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP25]], i32 3
132 ; VF4-NEXT: [[TMP33:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP32]]
133 ; VF4-NEXT: [[TMP34:%.*]] = load i64, ptr [[TMP27]], align 8
134 ; VF4-NEXT: [[TMP35:%.*]] = load i64, ptr [[TMP29]], align 8
135 ; VF4-NEXT: [[TMP36:%.*]] = load i64, ptr [[TMP31]], align 8
136 ; VF4-NEXT: [[TMP37:%.*]] = load i64, ptr [[TMP33]], align 8
137 ; VF4-NEXT: [[TMP38:%.*]] = insertelement <4 x i64> poison, i64 [[TMP34]], i32 0
138 ; VF4-NEXT: [[TMP39:%.*]] = insertelement <4 x i64> [[TMP38]], i64 [[TMP35]], i32 1
139 ; VF4-NEXT: [[TMP40:%.*]] = insertelement <4 x i64> [[TMP39]], i64 [[TMP36]], i32 2
140 ; VF4-NEXT: [[TMP41:%.*]] = insertelement <4 x i64> [[TMP40]], i64 [[TMP37]], i32 3
141 ; VF4-NEXT: [[TMP42:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[TMP41]]
142 ; VF4-NEXT: [[TMP43:%.*]] = extractelement <4 x i64> [[TMP42]], i32 0
143 ; VF4-NEXT: store i64 [[TMP43]], ptr [[TMP27]], align 8
144 ; VF4-NEXT: [[TMP44:%.*]] = extractelement <4 x i64> [[TMP42]], i32 1
145 ; VF4-NEXT: store i64 [[TMP44]], ptr [[TMP29]], align 8
146 ; VF4-NEXT: [[TMP45:%.*]] = extractelement <4 x i64> [[TMP42]], i32 2
147 ; VF4-NEXT: store i64 [[TMP45]], ptr [[TMP31]], align 8
148 ; VF4-NEXT: [[TMP46:%.*]] = extractelement <4 x i64> [[TMP42]], i32 3
149 ; VF4-NEXT: store i64 [[TMP46]], ptr [[TMP33]], align 8
150 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
151 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
152 ; VF4-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
153 ; VF4-NEXT: br i1 [[TMP47]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
154 ; VF4: [[MIDDLE_BLOCK]]:
155 ; VF4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
156 ; VF4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
157 ; VF4: [[SCALAR_PH]]:
158 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
159 ; VF4-NEXT: br label %[[LOOP:.*]]
161 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
162 ; VF4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[IV]]
163 ; VF4-NEXT: [[L_FACTOR:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
164 ; VF4-NEXT: [[TMP48:%.*]] = shl nsw i64 [[IV]], 1
165 ; VF4-NEXT: [[DATA_0:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP48]]
166 ; VF4-NEXT: [[L_0:%.*]] = load i64, ptr [[DATA_0]], align 8
167 ; VF4-NEXT: [[MUL_0:%.*]] = mul i64 [[L_FACTOR]], [[L_0]]
168 ; VF4-NEXT: store i64 [[MUL_0]], ptr [[DATA_0]], align 8
169 ; VF4-NEXT: [[TMP49:%.*]] = or disjoint i64 [[TMP48]], 1
170 ; VF4-NEXT: [[DATA_1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP49]]
171 ; VF4-NEXT: [[L_1:%.*]] = load i64, ptr [[DATA_1]], align 8
172 ; VF4-NEXT: [[MUL_1:%.*]] = mul i64 [[L_FACTOR]], [[L_1]]
173 ; VF4-NEXT: store i64 [[MUL_1]], ptr [[DATA_1]], align 8
174 ; VF4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
175 ; VF4-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
176 ; VF4-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
184 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
185 %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv
186 %l.factor = load i64, ptr %arrayidx, align 8
187 %1 = shl nsw i64 %iv, 1
188 %data.0 = getelementptr inbounds i64, ptr %data, i64 %1
189 %l.0 = load i64, ptr %data.0, align 8
190 %mul.0 = mul i64 %l.factor, %l.0
191 store i64 %mul.0, ptr %data.0, align 8
192 %3 = or disjoint i64 %1, 1
193 %data.1 = getelementptr inbounds i64, ptr %data, i64 %3
194 %l.1 = load i64, ptr %data.1, align 8
195 %mul.1 = mul i64 %l.factor, %l.1
196 store i64 %mul.1, ptr %data.1, align 8
197 %iv.next = add nuw nsw i64 %iv, 1
198 %ec = icmp eq i64 %iv.next, %n
199 br i1 %ec, label %exit, label %loop
205 define void @test_2xi64_interleave_loads_order_flipped(ptr noalias %data, ptr noalias %factor, i64 noundef %n) {
206 ; VF2-LABEL: define void @test_2xi64_interleave_loads_order_flipped(
207 ; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]], i64 noundef [[N:%.*]]) {
208 ; VF2-NEXT: [[ENTRY:.*]]:
209 ; VF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
210 ; VF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
211 ; VF2: [[VECTOR_PH]]:
212 ; VF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
213 ; VF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
214 ; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
215 ; VF2: [[VECTOR_BODY]]:
216 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
217 ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
218 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
219 ; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]]
220 ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
221 ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8
222 ; VF2-NEXT: [[TMP3:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 1)
223 ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
224 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP4]]
225 ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
226 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]]
227 ; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP5]], align 8
228 ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
229 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
230 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
231 ; VF2-NEXT: [[TMP12:%.*]] = or disjoint <2 x i64> [[TMP3]], splat (i64 1)
232 ; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
233 ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP13]]
234 ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
235 ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP15]]
236 ; VF2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP14]], align 8
237 ; VF2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 8
238 ; VF2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> poison, i64 [[TMP17]], i32 0
239 ; VF2-NEXT: [[TMP20:%.*]] = insertelement <2 x i64> [[TMP19]], i64 [[TMP18]], i32 1
240 ; VF2-NEXT: [[TMP21:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP20]]
241 ; VF2-NEXT: [[TMP22:%.*]] = extractelement <2 x i64> [[TMP21]], i32 0
242 ; VF2-NEXT: store i64 [[TMP22]], ptr [[TMP5]], align 8
243 ; VF2-NEXT: [[TMP23:%.*]] = extractelement <2 x i64> [[TMP21]], i32 1
244 ; VF2-NEXT: store i64 [[TMP23]], ptr [[TMP7]], align 8
245 ; VF2-NEXT: [[TMP24:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP11]]
246 ; VF2-NEXT: [[TMP25:%.*]] = extractelement <2 x i64> [[TMP24]], i32 0
247 ; VF2-NEXT: store i64 [[TMP25]], ptr [[TMP14]], align 8
248 ; VF2-NEXT: [[TMP26:%.*]] = extractelement <2 x i64> [[TMP24]], i32 1
249 ; VF2-NEXT: store i64 [[TMP26]], ptr [[TMP16]], align 8
250 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
251 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
252 ; VF2-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
253 ; VF2-NEXT: br i1 [[TMP27]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
254 ; VF2: [[MIDDLE_BLOCK]]:
255 ; VF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
256 ; VF2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
257 ; VF2: [[SCALAR_PH]]:
258 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
259 ; VF2-NEXT: br label %[[LOOP:.*]]
261 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
262 ; VF2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[IV]]
263 ; VF2-NEXT: [[L_FACTOR:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
264 ; VF2-NEXT: [[TMP28:%.*]] = shl nsw i64 [[IV]], 1
265 ; VF2-NEXT: [[DATA_0:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP28]]
266 ; VF2-NEXT: [[L_0:%.*]] = load i64, ptr [[DATA_0]], align 8
267 ; VF2-NEXT: [[TMP29:%.*]] = or disjoint i64 [[TMP28]], 1
268 ; VF2-NEXT: [[DATA_1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP29]]
269 ; VF2-NEXT: [[L_1:%.*]] = load i64, ptr [[DATA_1]], align 8
270 ; VF2-NEXT: [[MUL_0:%.*]] = mul i64 [[L_FACTOR]], [[L_1]]
271 ; VF2-NEXT: store i64 [[MUL_0]], ptr [[DATA_0]], align 8
272 ; VF2-NEXT: [[MUL_1:%.*]] = mul i64 [[L_FACTOR]], [[L_0]]
273 ; VF2-NEXT: store i64 [[MUL_1]], ptr [[DATA_1]], align 8
274 ; VF2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
275 ; VF2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
276 ; VF2-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
280 ; VF4-LABEL: define void @test_2xi64_interleave_loads_order_flipped(
281 ; VF4-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]], i64 noundef [[N:%.*]]) {
282 ; VF4-NEXT: [[ENTRY:.*]]:
283 ; VF4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
284 ; VF4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
285 ; VF4: [[VECTOR_PH]]:
286 ; VF4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
287 ; VF4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
288 ; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
289 ; VF4: [[VECTOR_BODY]]:
290 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
291 ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
292 ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
293 ; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]]
294 ; VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
295 ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
296 ; VF4-NEXT: [[TMP3:%.*]] = shl nsw <4 x i64> [[VEC_IND]], splat (i64 1)
297 ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
298 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP4]]
299 ; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
300 ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]]
301 ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
302 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP8]]
303 ; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
304 ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP10]]
305 ; VF4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP5]], align 8
306 ; VF4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP7]], align 8
307 ; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP9]], align 8
308 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
309 ; VF4-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> poison, i64 [[TMP12]], i32 0
310 ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1
311 ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2
312 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3
313 ; VF4-NEXT: [[TMP20:%.*]] = or disjoint <4 x i64> [[TMP3]], splat (i64 1)
314 ; VF4-NEXT: [[TMP21:%.*]] = extractelement <4 x i64> [[TMP20]], i32 0
315 ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP21]]
316 ; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP20]], i32 1
317 ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP23]]
318 ; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP20]], i32 2
319 ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP25]]
320 ; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP20]], i32 3
321 ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP27]]
322 ; VF4-NEXT: [[TMP29:%.*]] = load i64, ptr [[TMP22]], align 8
323 ; VF4-NEXT: [[TMP30:%.*]] = load i64, ptr [[TMP24]], align 8
324 ; VF4-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP26]], align 8
325 ; VF4-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP28]], align 8
326 ; VF4-NEXT: [[TMP33:%.*]] = insertelement <4 x i64> poison, i64 [[TMP29]], i32 0
327 ; VF4-NEXT: [[TMP34:%.*]] = insertelement <4 x i64> [[TMP33]], i64 [[TMP30]], i32 1
328 ; VF4-NEXT: [[TMP35:%.*]] = insertelement <4 x i64> [[TMP34]], i64 [[TMP31]], i32 2
329 ; VF4-NEXT: [[TMP36:%.*]] = insertelement <4 x i64> [[TMP35]], i64 [[TMP32]], i32 3
330 ; VF4-NEXT: [[TMP37:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[TMP36]]
331 ; VF4-NEXT: [[TMP38:%.*]] = extractelement <4 x i64> [[TMP37]], i32 0
332 ; VF4-NEXT: store i64 [[TMP38]], ptr [[TMP5]], align 8
333 ; VF4-NEXT: [[TMP39:%.*]] = extractelement <4 x i64> [[TMP37]], i32 1
334 ; VF4-NEXT: store i64 [[TMP39]], ptr [[TMP7]], align 8
335 ; VF4-NEXT: [[TMP40:%.*]] = extractelement <4 x i64> [[TMP37]], i32 2
336 ; VF4-NEXT: store i64 [[TMP40]], ptr [[TMP9]], align 8
337 ; VF4-NEXT: [[TMP41:%.*]] = extractelement <4 x i64> [[TMP37]], i32 3
338 ; VF4-NEXT: store i64 [[TMP41]], ptr [[TMP11]], align 8
339 ; VF4-NEXT: [[TMP42:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[TMP19]]
340 ; VF4-NEXT: [[TMP43:%.*]] = extractelement <4 x i64> [[TMP42]], i32 0
341 ; VF4-NEXT: store i64 [[TMP43]], ptr [[TMP22]], align 8
342 ; VF4-NEXT: [[TMP44:%.*]] = extractelement <4 x i64> [[TMP42]], i32 1
343 ; VF4-NEXT: store i64 [[TMP44]], ptr [[TMP24]], align 8
344 ; VF4-NEXT: [[TMP45:%.*]] = extractelement <4 x i64> [[TMP42]], i32 2
345 ; VF4-NEXT: store i64 [[TMP45]], ptr [[TMP26]], align 8
346 ; VF4-NEXT: [[TMP46:%.*]] = extractelement <4 x i64> [[TMP42]], i32 3
347 ; VF4-NEXT: store i64 [[TMP46]], ptr [[TMP28]], align 8
348 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
349 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
350 ; VF4-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
351 ; VF4-NEXT: br i1 [[TMP47]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
352 ; VF4: [[MIDDLE_BLOCK]]:
353 ; VF4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
354 ; VF4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
355 ; VF4: [[SCALAR_PH]]:
356 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
357 ; VF4-NEXT: br label %[[LOOP:.*]]
359 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
360 ; VF4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[IV]]
361 ; VF4-NEXT: [[L_FACTOR:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
362 ; VF4-NEXT: [[TMP48:%.*]] = shl nsw i64 [[IV]], 1
363 ; VF4-NEXT: [[DATA_0:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP48]]
364 ; VF4-NEXT: [[L_0:%.*]] = load i64, ptr [[DATA_0]], align 8
365 ; VF4-NEXT: [[TMP49:%.*]] = or disjoint i64 [[TMP48]], 1
366 ; VF4-NEXT: [[DATA_1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP49]]
367 ; VF4-NEXT: [[L_1:%.*]] = load i64, ptr [[DATA_1]], align 8
368 ; VF4-NEXT: [[MUL_0:%.*]] = mul i64 [[L_FACTOR]], [[L_1]]
369 ; VF4-NEXT: store i64 [[MUL_0]], ptr [[DATA_0]], align 8
370 ; VF4-NEXT: [[MUL_1:%.*]] = mul i64 [[L_FACTOR]], [[L_0]]
371 ; VF4-NEXT: store i64 [[MUL_1]], ptr [[DATA_1]], align 8
372 ; VF4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
373 ; VF4-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
374 ; VF4-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
382 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
383 %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv
384 %l.factor = load i64, ptr %arrayidx, align 8
385 %1 = shl nsw i64 %iv, 1
386 %data.0 = getelementptr inbounds i64, ptr %data, i64 %1
387 %l.0 = load i64, ptr %data.0, align 8
388 %3 = or disjoint i64 %1, 1
389 %data.1 = getelementptr inbounds i64, ptr %data, i64 %3
390 %l.1 = load i64, ptr %data.1, align 8
391 %mul.0 = mul i64 %l.factor, %l.1
392 store i64 %mul.0, ptr %data.0, align 8
393 %mul.1 = mul i64 %l.factor, %l.0
394 store i64 %mul.1, ptr %data.1, align 8
395 %iv.next = add nuw nsw i64 %iv, 1
396 %ec = icmp eq i64 %iv.next, %n
397 br i1 %ec, label %exit, label %loop
403 define void @test_2xi64_store_order_flipped_1(ptr noalias %data, ptr noalias %factor, i64 noundef %n) {
404 ; VF2-LABEL: define void @test_2xi64_store_order_flipped_1(
405 ; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]], i64 noundef [[N:%.*]]) {
406 ; VF2-NEXT: [[ENTRY:.*]]:
407 ; VF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
408 ; VF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
409 ; VF2: [[VECTOR_PH]]:
410 ; VF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
411 ; VF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
412 ; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
413 ; VF2: [[VECTOR_BODY]]:
414 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
415 ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
416 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
417 ; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]]
418 ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
419 ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8
420 ; VF2-NEXT: [[TMP3:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 1)
421 ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
422 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP4]]
423 ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
424 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]]
425 ; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP5]], align 8
426 ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
427 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
428 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
429 ; VF2-NEXT: [[TMP12:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP11]]
430 ; VF2-NEXT: [[TMP13:%.*]] = or disjoint <2 x i64> [[TMP3]], splat (i64 1)
431 ; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
432 ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP14]]
433 ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
434 ; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP16]]
435 ; VF2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP15]], align 8
436 ; VF2-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP17]], align 8
437 ; VF2-NEXT: [[TMP20:%.*]] = insertelement <2 x i64> poison, i64 [[TMP18]], i32 0
438 ; VF2-NEXT: [[TMP21:%.*]] = insertelement <2 x i64> [[TMP20]], i64 [[TMP19]], i32 1
439 ; VF2-NEXT: [[TMP22:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP21]]
440 ; VF2-NEXT: [[TMP23:%.*]] = extractelement <2 x i64> [[TMP22]], i32 0
441 ; VF2-NEXT: store i64 [[TMP23]], ptr [[TMP5]], align 8
442 ; VF2-NEXT: [[TMP24:%.*]] = extractelement <2 x i64> [[TMP22]], i32 1
443 ; VF2-NEXT: store i64 [[TMP24]], ptr [[TMP7]], align 8
444 ; VF2-NEXT: [[TMP25:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
445 ; VF2-NEXT: store i64 [[TMP25]], ptr [[TMP15]], align 8
446 ; VF2-NEXT: [[TMP26:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
447 ; VF2-NEXT: store i64 [[TMP26]], ptr [[TMP17]], align 8
448 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
449 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
450 ; VF2-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
451 ; VF2-NEXT: br i1 [[TMP27]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
452 ; VF2: [[MIDDLE_BLOCK]]:
453 ; VF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
454 ; VF2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
455 ; VF2: [[SCALAR_PH]]:
456 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
457 ; VF2-NEXT: br label %[[LOOP:.*]]
459 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
460 ; VF2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[IV]]
461 ; VF2-NEXT: [[L_FACTOR:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
462 ; VF2-NEXT: [[TMP28:%.*]] = shl nsw i64 [[IV]], 1
463 ; VF2-NEXT: [[DATA_0:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP28]]
464 ; VF2-NEXT: [[L_0:%.*]] = load i64, ptr [[DATA_0]], align 8
465 ; VF2-NEXT: [[MUL_0:%.*]] = mul i64 [[L_FACTOR]], [[L_0]]
466 ; VF2-NEXT: [[TMP29:%.*]] = or disjoint i64 [[TMP28]], 1
467 ; VF2-NEXT: [[DATA_1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP29]]
468 ; VF2-NEXT: [[L_1:%.*]] = load i64, ptr [[DATA_1]], align 8
469 ; VF2-NEXT: [[MUL_1:%.*]] = mul i64 [[L_FACTOR]], [[L_1]]
470 ; VF2-NEXT: store i64 [[MUL_1]], ptr [[DATA_0]], align 8
471 ; VF2-NEXT: store i64 [[MUL_0]], ptr [[DATA_1]], align 8
472 ; VF2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
473 ; VF2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
474 ; VF2-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
478 ; VF4-LABEL: define void @test_2xi64_store_order_flipped_1(
479 ; VF4-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]], i64 noundef [[N:%.*]]) {
480 ; VF4-NEXT: [[ENTRY:.*]]:
481 ; VF4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
482 ; VF4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
483 ; VF4: [[VECTOR_PH]]:
484 ; VF4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
485 ; VF4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
486 ; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
487 ; VF4: [[VECTOR_BODY]]:
488 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
489 ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
490 ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
491 ; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]]
492 ; VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
493 ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
494 ; VF4-NEXT: [[TMP3:%.*]] = shl nsw <4 x i64> [[VEC_IND]], splat (i64 1)
495 ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
496 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP4]]
497 ; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
498 ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]]
499 ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
500 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP8]]
501 ; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
502 ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP10]]
503 ; VF4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP5]], align 8
504 ; VF4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP7]], align 8
505 ; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP9]], align 8
506 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
507 ; VF4-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> poison, i64 [[TMP12]], i32 0
508 ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1
509 ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2
510 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3
511 ; VF4-NEXT: [[TMP20:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[TMP19]]
512 ; VF4-NEXT: [[TMP21:%.*]] = or disjoint <4 x i64> [[TMP3]], splat (i64 1)
513 ; VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP21]], i32 0
514 ; VF4-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP22]]
515 ; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP21]], i32 1
516 ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP24]]
517 ; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP21]], i32 2
518 ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP26]]
519 ; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3
520 ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP28]]
521 ; VF4-NEXT: [[TMP30:%.*]] = load i64, ptr [[TMP23]], align 8
522 ; VF4-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP25]], align 8
523 ; VF4-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP27]], align 8
524 ; VF4-NEXT: [[TMP33:%.*]] = load i64, ptr [[TMP29]], align 8
525 ; VF4-NEXT: [[TMP34:%.*]] = insertelement <4 x i64> poison, i64 [[TMP30]], i32 0
526 ; VF4-NEXT: [[TMP35:%.*]] = insertelement <4 x i64> [[TMP34]], i64 [[TMP31]], i32 1
527 ; VF4-NEXT: [[TMP36:%.*]] = insertelement <4 x i64> [[TMP35]], i64 [[TMP32]], i32 2
528 ; VF4-NEXT: [[TMP37:%.*]] = insertelement <4 x i64> [[TMP36]], i64 [[TMP33]], i32 3
529 ; VF4-NEXT: [[TMP38:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[TMP37]]
530 ; VF4-NEXT: [[TMP39:%.*]] = extractelement <4 x i64> [[TMP38]], i32 0
531 ; VF4-NEXT: store i64 [[TMP39]], ptr [[TMP5]], align 8
532 ; VF4-NEXT: [[TMP40:%.*]] = extractelement <4 x i64> [[TMP38]], i32 1
533 ; VF4-NEXT: store i64 [[TMP40]], ptr [[TMP7]], align 8
534 ; VF4-NEXT: [[TMP41:%.*]] = extractelement <4 x i64> [[TMP38]], i32 2
535 ; VF4-NEXT: store i64 [[TMP41]], ptr [[TMP9]], align 8
536 ; VF4-NEXT: [[TMP42:%.*]] = extractelement <4 x i64> [[TMP38]], i32 3
537 ; VF4-NEXT: store i64 [[TMP42]], ptr [[TMP11]], align 8
538 ; VF4-NEXT: [[TMP43:%.*]] = extractelement <4 x i64> [[TMP20]], i32 0
539 ; VF4-NEXT: store i64 [[TMP43]], ptr [[TMP23]], align 8
540 ; VF4-NEXT: [[TMP44:%.*]] = extractelement <4 x i64> [[TMP20]], i32 1
541 ; VF4-NEXT: store i64 [[TMP44]], ptr [[TMP25]], align 8
542 ; VF4-NEXT: [[TMP45:%.*]] = extractelement <4 x i64> [[TMP20]], i32 2
543 ; VF4-NEXT: store i64 [[TMP45]], ptr [[TMP27]], align 8
544 ; VF4-NEXT: [[TMP46:%.*]] = extractelement <4 x i64> [[TMP20]], i32 3
545 ; VF4-NEXT: store i64 [[TMP46]], ptr [[TMP29]], align 8
546 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
547 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
548 ; VF4-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
549 ; VF4-NEXT: br i1 [[TMP47]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
550 ; VF4: [[MIDDLE_BLOCK]]:
551 ; VF4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
552 ; VF4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
553 ; VF4: [[SCALAR_PH]]:
554 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
555 ; VF4-NEXT: br label %[[LOOP:.*]]
557 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
558 ; VF4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[IV]]
559 ; VF4-NEXT: [[L_FACTOR:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
560 ; VF4-NEXT: [[TMP48:%.*]] = shl nsw i64 [[IV]], 1
561 ; VF4-NEXT: [[DATA_0:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP48]]
562 ; VF4-NEXT: [[L_0:%.*]] = load i64, ptr [[DATA_0]], align 8
563 ; VF4-NEXT: [[MUL_0:%.*]] = mul i64 [[L_FACTOR]], [[L_0]]
564 ; VF4-NEXT: [[TMP49:%.*]] = or disjoint i64 [[TMP48]], 1
565 ; VF4-NEXT: [[DATA_1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP49]]
566 ; VF4-NEXT: [[L_1:%.*]] = load i64, ptr [[DATA_1]], align 8
567 ; VF4-NEXT: [[MUL_1:%.*]] = mul i64 [[L_FACTOR]], [[L_1]]
568 ; VF4-NEXT: store i64 [[MUL_1]], ptr [[DATA_0]], align 8
569 ; VF4-NEXT: store i64 [[MUL_0]], ptr [[DATA_1]], align 8
570 ; VF4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
571 ; VF4-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
572 ; VF4-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
580 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
581 %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv
582 %l.factor = load i64, ptr %arrayidx, align 8
583 %1 = shl nsw i64 %iv, 1
584 %data.0 = getelementptr inbounds i64, ptr %data, i64 %1
585 %l.0 = load i64, ptr %data.0, align 8
586 %mul.0 = mul i64 %l.factor, %l.0
587 %3 = or disjoint i64 %1, 1
588 %data.1 = getelementptr inbounds i64, ptr %data, i64 %3
589 %l.1 = load i64, ptr %data.1, align 8
590 %mul.1 = mul i64 %l.factor, %l.1
591 store i64 %mul.1, ptr %data.0, align 8
592 store i64 %mul.0, ptr %data.1, align 8
593 %iv.next = add nuw nsw i64 %iv, 1
594 %ec = icmp eq i64 %iv.next, %n
595 br i1 %ec, label %exit, label %loop
601 define void @test_2xi64_store_order_flipped_2(ptr noalias %data, ptr noalias %factor, i64 noundef %n) {
602 ; VF2-LABEL: define void @test_2xi64_store_order_flipped_2(
603 ; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]], i64 noundef [[N:%.*]]) {
604 ; VF2-NEXT: [[ENTRY:.*]]:
605 ; VF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
606 ; VF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
607 ; VF2: [[VECTOR_PH]]:
608 ; VF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
609 ; VF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
610 ; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
611 ; VF2: [[VECTOR_BODY]]:
612 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
613 ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
614 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
615 ; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]]
616 ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
617 ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8
618 ; VF2-NEXT: [[TMP3:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 1)
619 ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
620 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP4]]
621 ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
622 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]]
623 ; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP5]], align 8
624 ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
625 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
626 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
627 ; VF2-NEXT: [[TMP12:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP11]]
628 ; VF2-NEXT: [[TMP13:%.*]] = or disjoint <2 x i64> [[TMP3]], splat (i64 1)
629 ; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
630 ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP14]]
631 ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
632 ; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP16]]
633 ; VF2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP15]], align 8
634 ; VF2-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP17]], align 8
635 ; VF2-NEXT: [[TMP20:%.*]] = insertelement <2 x i64> poison, i64 [[TMP18]], i32 0
636 ; VF2-NEXT: [[TMP21:%.*]] = insertelement <2 x i64> [[TMP20]], i64 [[TMP19]], i32 1
637 ; VF2-NEXT: [[TMP22:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP21]]
638 ; VF2-NEXT: [[TMP23:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
639 ; VF2-NEXT: store i64 [[TMP23]], ptr [[TMP15]], align 8
640 ; VF2-NEXT: [[TMP24:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
641 ; VF2-NEXT: store i64 [[TMP24]], ptr [[TMP17]], align 8
642 ; VF2-NEXT: [[TMP25:%.*]] = extractelement <2 x i64> [[TMP22]], i32 0
643 ; VF2-NEXT: store i64 [[TMP25]], ptr [[TMP5]], align 8
644 ; VF2-NEXT: [[TMP26:%.*]] = extractelement <2 x i64> [[TMP22]], i32 1
645 ; VF2-NEXT: store i64 [[TMP26]], ptr [[TMP7]], align 8
646 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
647 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
648 ; VF2-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
649 ; VF2-NEXT: br i1 [[TMP27]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
650 ; VF2: [[MIDDLE_BLOCK]]:
651 ; VF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
652 ; VF2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
653 ; VF2: [[SCALAR_PH]]:
654 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
655 ; VF2-NEXT: br label %[[LOOP:.*]]
657 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
658 ; VF2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[IV]]
659 ; VF2-NEXT: [[L_FACTOR:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
660 ; VF2-NEXT: [[TMP28:%.*]] = shl nsw i64 [[IV]], 1
661 ; VF2-NEXT: [[DATA_0:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP28]]
662 ; VF2-NEXT: [[L_0:%.*]] = load i64, ptr [[DATA_0]], align 8
663 ; VF2-NEXT: [[MUL_0:%.*]] = mul i64 [[L_FACTOR]], [[L_0]]
664 ; VF2-NEXT: [[TMP29:%.*]] = or disjoint i64 [[TMP28]], 1
665 ; VF2-NEXT: [[DATA_1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP29]]
666 ; VF2-NEXT: [[L_1:%.*]] = load i64, ptr [[DATA_1]], align 8
667 ; VF2-NEXT: [[MUL_1:%.*]] = mul i64 [[L_FACTOR]], [[L_1]]
668 ; VF2-NEXT: store i64 [[MUL_0]], ptr [[DATA_1]], align 8
669 ; VF2-NEXT: store i64 [[MUL_1]], ptr [[DATA_0]], align 8
670 ; VF2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
671 ; VF2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
672 ; VF2-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
676 ; VF4-LABEL: define void @test_2xi64_store_order_flipped_2(
677 ; VF4-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]], i64 noundef [[N:%.*]]) {
678 ; VF4-NEXT: [[ENTRY:.*]]:
679 ; VF4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
680 ; VF4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
681 ; VF4: [[VECTOR_PH]]:
682 ; VF4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
683 ; VF4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
684 ; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
685 ; VF4: [[VECTOR_BODY]]:
686 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
687 ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
688 ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
689 ; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]]
690 ; VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
691 ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
692 ; VF4-NEXT: [[TMP3:%.*]] = shl nsw <4 x i64> [[VEC_IND]], splat (i64 1)
693 ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
694 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP4]]
695 ; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
696 ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]]
697 ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
698 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP8]]
699 ; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
700 ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP10]]
701 ; VF4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP5]], align 8
702 ; VF4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP7]], align 8
703 ; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP9]], align 8
704 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
705 ; VF4-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> poison, i64 [[TMP12]], i32 0
706 ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1
707 ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2
708 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3
709 ; VF4-NEXT: [[TMP20:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[TMP19]]
710 ; VF4-NEXT: [[TMP21:%.*]] = or disjoint <4 x i64> [[TMP3]], splat (i64 1)
711 ; VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP21]], i32 0
712 ; VF4-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP22]]
713 ; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP21]], i32 1
714 ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP24]]
715 ; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP21]], i32 2
716 ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP26]]
717 ; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3
718 ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP28]]
719 ; VF4-NEXT: [[TMP30:%.*]] = load i64, ptr [[TMP23]], align 8
720 ; VF4-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP25]], align 8
721 ; VF4-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP27]], align 8
722 ; VF4-NEXT: [[TMP33:%.*]] = load i64, ptr [[TMP29]], align 8
723 ; VF4-NEXT: [[TMP34:%.*]] = insertelement <4 x i64> poison, i64 [[TMP30]], i32 0
724 ; VF4-NEXT: [[TMP35:%.*]] = insertelement <4 x i64> [[TMP34]], i64 [[TMP31]], i32 1
725 ; VF4-NEXT: [[TMP36:%.*]] = insertelement <4 x i64> [[TMP35]], i64 [[TMP32]], i32 2
726 ; VF4-NEXT: [[TMP37:%.*]] = insertelement <4 x i64> [[TMP36]], i64 [[TMP33]], i32 3
727 ; VF4-NEXT: [[TMP38:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[TMP37]]
728 ; VF4-NEXT: [[TMP39:%.*]] = extractelement <4 x i64> [[TMP20]], i32 0
729 ; VF4-NEXT: store i64 [[TMP39]], ptr [[TMP23]], align 8
730 ; VF4-NEXT: [[TMP40:%.*]] = extractelement <4 x i64> [[TMP20]], i32 1
731 ; VF4-NEXT: store i64 [[TMP40]], ptr [[TMP25]], align 8
732 ; VF4-NEXT: [[TMP41:%.*]] = extractelement <4 x i64> [[TMP20]], i32 2
733 ; VF4-NEXT: store i64 [[TMP41]], ptr [[TMP27]], align 8
734 ; VF4-NEXT: [[TMP42:%.*]] = extractelement <4 x i64> [[TMP20]], i32 3
735 ; VF4-NEXT: store i64 [[TMP42]], ptr [[TMP29]], align 8
736 ; VF4-NEXT: [[TMP43:%.*]] = extractelement <4 x i64> [[TMP38]], i32 0
737 ; VF4-NEXT: store i64 [[TMP43]], ptr [[TMP5]], align 8
738 ; VF4-NEXT: [[TMP44:%.*]] = extractelement <4 x i64> [[TMP38]], i32 1
739 ; VF4-NEXT: store i64 [[TMP44]], ptr [[TMP7]], align 8
740 ; VF4-NEXT: [[TMP45:%.*]] = extractelement <4 x i64> [[TMP38]], i32 2
741 ; VF4-NEXT: store i64 [[TMP45]], ptr [[TMP9]], align 8
742 ; VF4-NEXT: [[TMP46:%.*]] = extractelement <4 x i64> [[TMP38]], i32 3
743 ; VF4-NEXT: store i64 [[TMP46]], ptr [[TMP11]], align 8
744 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
745 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
746 ; VF4-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
747 ; VF4-NEXT: br i1 [[TMP47]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
748 ; VF4: [[MIDDLE_BLOCK]]:
749 ; VF4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
750 ; VF4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
751 ; VF4: [[SCALAR_PH]]:
752 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
753 ; VF4-NEXT: br label %[[LOOP:.*]]
755 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
756 ; VF4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[IV]]
757 ; VF4-NEXT: [[L_FACTOR:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
758 ; VF4-NEXT: [[TMP48:%.*]] = shl nsw i64 [[IV]], 1
759 ; VF4-NEXT: [[DATA_0:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP48]]
760 ; VF4-NEXT: [[L_0:%.*]] = load i64, ptr [[DATA_0]], align 8
761 ; VF4-NEXT: [[MUL_0:%.*]] = mul i64 [[L_FACTOR]], [[L_0]]
762 ; VF4-NEXT: [[TMP49:%.*]] = or disjoint i64 [[TMP48]], 1
763 ; VF4-NEXT: [[DATA_1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP49]]
764 ; VF4-NEXT: [[L_1:%.*]] = load i64, ptr [[DATA_1]], align 8
765 ; VF4-NEXT: [[MUL_1:%.*]] = mul i64 [[L_FACTOR]], [[L_1]]
766 ; VF4-NEXT: store i64 [[MUL_0]], ptr [[DATA_1]], align 8
767 ; VF4-NEXT: store i64 [[MUL_1]], ptr [[DATA_0]], align 8
768 ; VF4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
769 ; VF4-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
770 ; VF4-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
778 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
779 %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv
780 %l.factor = load i64, ptr %arrayidx, align 8
781 %1 = shl nsw i64 %iv, 1
782 %data.0 = getelementptr inbounds i64, ptr %data, i64 %1
783 %l.0 = load i64, ptr %data.0, align 8
784 %mul.0 = mul i64 %l.factor, %l.0
785 %3 = or disjoint i64 %1, 1
786 %data.1 = getelementptr inbounds i64, ptr %data, i64 %3
787 %l.1 = load i64, ptr %data.1, align 8
788 %mul.1 = mul i64 %l.factor, %l.1
789 store i64 %mul.0, ptr %data.1, align 8
790 store i64 %mul.1, ptr %data.0, align 8
791 %iv.next = add nuw nsw i64 %iv, 1
792 %ec = icmp eq i64 %iv.next, %n
793 br i1 %ec, label %exit, label %loop
799 define void @test_2xi64_different_loads_feeding_fmul(ptr noalias %data, ptr noalias %src.0, ptr noalias %src.1, i64 noundef %n) {
800 ; VF2-LABEL: define void @test_2xi64_different_loads_feeding_fmul(
801 ; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[SRC_0:%.*]], ptr noalias [[SRC_1:%.*]], i64 noundef [[N:%.*]]) {
802 ; VF2-NEXT: [[ENTRY:.*]]:
803 ; VF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
804 ; VF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
805 ; VF2: [[VECTOR_PH]]:
806 ; VF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
807 ; VF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
808 ; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
809 ; VF2: [[VECTOR_BODY]]:
810 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
811 ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
812 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
813 ; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[SRC_0]], i64 [[TMP0]]
814 ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
815 ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8
816 ; VF2-NEXT: [[TMP3:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 1)
817 ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
818 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP4]]
819 ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
820 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]]
821 ; VF2-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8
822 ; VF2-NEXT: [[TMP8:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]]
823 ; VF2-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP8]], i32 0
824 ; VF2-NEXT: store i64 [[TMP9]], ptr [[TMP5]], align 8
825 ; VF2-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP8]], i32 1
826 ; VF2-NEXT: store i64 [[TMP10]], ptr [[TMP7]], align 8
827 ; VF2-NEXT: [[TMP11:%.*]] = or disjoint <2 x i64> [[TMP3]], splat (i64 1)
828 ; VF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
829 ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP12]]
830 ; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
831 ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP14]]
832 ; VF2-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP13]], align 8
833 ; VF2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 8
834 ; VF2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[TMP16]], i32 0
835 ; VF2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP17]], i32 1
836 ; VF2-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[SRC_1]], i64 [[TMP0]]
837 ; VF2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i32 0
838 ; VF2-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i64>, ptr [[TMP21]], align 8
839 ; VF2-NEXT: [[TMP22:%.*]] = mul <2 x i64> [[WIDE_LOAD2]], [[TMP19]]
840 ; VF2-NEXT: [[TMP23:%.*]] = extractelement <2 x i64> [[TMP22]], i32 0
841 ; VF2-NEXT: store i64 [[TMP23]], ptr [[TMP13]], align 8
842 ; VF2-NEXT: [[TMP24:%.*]] = extractelement <2 x i64> [[TMP22]], i32 1
843 ; VF2-NEXT: store i64 [[TMP24]], ptr [[TMP15]], align 8
844 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
845 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
846 ; VF2-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
847 ; VF2-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
848 ; VF2: [[MIDDLE_BLOCK]]:
849 ; VF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
850 ; VF2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
851 ; VF2: [[SCALAR_PH]]:
852 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
853 ; VF2-NEXT: br label %[[LOOP:.*]]
855 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
856 ; VF2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i64, ptr [[SRC_0]], i64 [[IV]]
857 ; VF2-NEXT: [[L_SRC_0:%.*]] = load i64, ptr [[GEP_SRC_0]], align 8
858 ; VF2-NEXT: [[TMP26:%.*]] = shl nsw i64 [[IV]], 1
859 ; VF2-NEXT: [[DATA_0:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP26]]
860 ; VF2-NEXT: [[L_0:%.*]] = load i64, ptr [[GEP_SRC_0]], align 8
861 ; VF2-NEXT: [[MUL_0:%.*]] = mul i64 [[L_SRC_0]], [[L_0]]
862 ; VF2-NEXT: store i64 [[MUL_0]], ptr [[DATA_0]], align 8
863 ; VF2-NEXT: [[TMP27:%.*]] = or disjoint i64 [[TMP26]], 1
864 ; VF2-NEXT: [[DATA_1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP27]]
865 ; VF2-NEXT: [[L_1:%.*]] = load i64, ptr [[DATA_1]], align 8
866 ; VF2-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i64, ptr [[SRC_1]], i64 [[IV]]
867 ; VF2-NEXT: [[L_SRC_1:%.*]] = load i64, ptr [[GEP_SRC_1]], align 8
868 ; VF2-NEXT: [[MUL_1:%.*]] = mul i64 [[L_SRC_1]], [[L_1]]
869 ; VF2-NEXT: store i64 [[MUL_1]], ptr [[DATA_1]], align 8
870 ; VF2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
871 ; VF2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
872 ; VF2-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
876 ; VF4-LABEL: define void @test_2xi64_different_loads_feeding_fmul(
877 ; VF4-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[SRC_0:%.*]], ptr noalias [[SRC_1:%.*]], i64 noundef [[N:%.*]]) {
878 ; VF4-NEXT: [[ENTRY:.*]]:
879 ; VF4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
880 ; VF4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
881 ; VF4: [[VECTOR_PH]]:
882 ; VF4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
883 ; VF4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
884 ; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
885 ; VF4: [[VECTOR_BODY]]:
886 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
887 ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
888 ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
889 ; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[SRC_0]], i64 [[TMP0]]
890 ; VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
891 ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
892 ; VF4-NEXT: [[TMP3:%.*]] = shl nsw <4 x i64> [[VEC_IND]], splat (i64 1)
893 ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
894 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP4]]
895 ; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
896 ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]]
897 ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
898 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP8]]
899 ; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
900 ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP10]]
901 ; VF4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
902 ; VF4-NEXT: [[TMP12:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]]
903 ; VF4-NEXT: [[TMP13:%.*]] = extractelement <4 x i64> [[TMP12]], i32 0
904 ; VF4-NEXT: store i64 [[TMP13]], ptr [[TMP5]], align 8
905 ; VF4-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP12]], i32 1
906 ; VF4-NEXT: store i64 [[TMP14]], ptr [[TMP7]], align 8
907 ; VF4-NEXT: [[TMP15:%.*]] = extractelement <4 x i64> [[TMP12]], i32 2
908 ; VF4-NEXT: store i64 [[TMP15]], ptr [[TMP9]], align 8
909 ; VF4-NEXT: [[TMP16:%.*]] = extractelement <4 x i64> [[TMP12]], i32 3
910 ; VF4-NEXT: store i64 [[TMP16]], ptr [[TMP11]], align 8
911 ; VF4-NEXT: [[TMP17:%.*]] = or disjoint <4 x i64> [[TMP3]], splat (i64 1)
912 ; VF4-NEXT: [[TMP18:%.*]] = extractelement <4 x i64> [[TMP17]], i32 0
913 ; VF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP18]]
914 ; VF4-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP17]], i32 1
915 ; VF4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP20]]
916 ; VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP17]], i32 2
917 ; VF4-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP22]]
918 ; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP17]], i32 3
919 ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP24]]
920 ; VF4-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP19]], align 8
921 ; VF4-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP21]], align 8
922 ; VF4-NEXT: [[TMP28:%.*]] = load i64, ptr [[TMP23]], align 8
923 ; VF4-NEXT: [[TMP29:%.*]] = load i64, ptr [[TMP25]], align 8
924 ; VF4-NEXT: [[TMP30:%.*]] = insertelement <4 x i64> poison, i64 [[TMP26]], i32 0
925 ; VF4-NEXT: [[TMP31:%.*]] = insertelement <4 x i64> [[TMP30]], i64 [[TMP27]], i32 1
926 ; VF4-NEXT: [[TMP32:%.*]] = insertelement <4 x i64> [[TMP31]], i64 [[TMP28]], i32 2
927 ; VF4-NEXT: [[TMP33:%.*]] = insertelement <4 x i64> [[TMP32]], i64 [[TMP29]], i32 3
928 ; VF4-NEXT: [[TMP34:%.*]] = getelementptr inbounds i64, ptr [[SRC_1]], i64 [[TMP0]]
929 ; VF4-NEXT: [[TMP35:%.*]] = getelementptr inbounds i64, ptr [[TMP34]], i32 0
930 ; VF4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i64>, ptr [[TMP35]], align 8
931 ; VF4-NEXT: [[TMP36:%.*]] = mul <4 x i64> [[WIDE_LOAD2]], [[TMP33]]
932 ; VF4-NEXT: [[TMP37:%.*]] = extractelement <4 x i64> [[TMP36]], i32 0
933 ; VF4-NEXT: store i64 [[TMP37]], ptr [[TMP19]], align 8
934 ; VF4-NEXT: [[TMP38:%.*]] = extractelement <4 x i64> [[TMP36]], i32 1
935 ; VF4-NEXT: store i64 [[TMP38]], ptr [[TMP21]], align 8
936 ; VF4-NEXT: [[TMP39:%.*]] = extractelement <4 x i64> [[TMP36]], i32 2
937 ; VF4-NEXT: store i64 [[TMP39]], ptr [[TMP23]], align 8
938 ; VF4-NEXT: [[TMP40:%.*]] = extractelement <4 x i64> [[TMP36]], i32 3
939 ; VF4-NEXT: store i64 [[TMP40]], ptr [[TMP25]], align 8
940 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
941 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
942 ; VF4-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
943 ; VF4-NEXT: br i1 [[TMP41]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
944 ; VF4: [[MIDDLE_BLOCK]]:
945 ; VF4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
946 ; VF4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
947 ; VF4: [[SCALAR_PH]]:
948 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
949 ; VF4-NEXT: br label %[[LOOP:.*]]
951 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
952 ; VF4-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i64, ptr [[SRC_0]], i64 [[IV]]
953 ; VF4-NEXT: [[L_SRC_0:%.*]] = load i64, ptr [[GEP_SRC_0]], align 8
954 ; VF4-NEXT: [[TMP42:%.*]] = shl nsw i64 [[IV]], 1
955 ; VF4-NEXT: [[DATA_0:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP42]]
956 ; VF4-NEXT: [[L_0:%.*]] = load i64, ptr [[GEP_SRC_0]], align 8
957 ; VF4-NEXT: [[MUL_0:%.*]] = mul i64 [[L_SRC_0]], [[L_0]]
958 ; VF4-NEXT: store i64 [[MUL_0]], ptr [[DATA_0]], align 8
959 ; VF4-NEXT: [[TMP43:%.*]] = or disjoint i64 [[TMP42]], 1
960 ; VF4-NEXT: [[DATA_1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP43]]
961 ; VF4-NEXT: [[L_1:%.*]] = load i64, ptr [[DATA_1]], align 8
962 ; VF4-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i64, ptr [[SRC_1]], i64 [[IV]]
963 ; VF4-NEXT: [[L_SRC_1:%.*]] = load i64, ptr [[GEP_SRC_1]], align 8
964 ; VF4-NEXT: [[MUL_1:%.*]] = mul i64 [[L_SRC_1]], [[L_1]]
965 ; VF4-NEXT: store i64 [[MUL_1]], ptr [[DATA_1]], align 8
966 ; VF4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
967 ; VF4-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
968 ; VF4-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
976 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
977 %gep.src.0 = getelementptr inbounds i64, ptr %src.0, i64 %iv
978 %l.src.0 = load i64, ptr %gep.src.0, align 8
979 %1 = shl nsw i64 %iv, 1
980 %data.0 = getelementptr inbounds i64, ptr %data, i64 %1
981 %l.0 = load i64, ptr %gep.src.0, align 8
982 %mul.0 = mul i64 %l.src.0, %l.0
983 store i64 %mul.0, ptr %data.0, align 8
984 %3 = or disjoint i64 %1, 1
985 %data.1 = getelementptr inbounds i64, ptr %data, i64 %3
986 %l.1 = load i64, ptr %data.1, align 8
987 %gep.src.1 = getelementptr inbounds i64, ptr %src.1, i64 %iv
988 %l.src.1 = load i64, ptr %gep.src.1, align 8
989 %mul.1 = mul i64 %l.src.1, %l.1
990 store i64 %mul.1, ptr %data.1, align 8
991 %iv.next = add nuw nsw i64 %iv, 1
992 %ec = icmp eq i64 %iv.next, %n
993 br i1 %ec, label %exit, label %loop
999 define void @test_3xi64(ptr noalias %data, ptr noalias %factor, i64 noundef %n) {
1000 ; VF2-LABEL: define void @test_3xi64(
1001 ; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]], i64 noundef [[N:%.*]]) {
1002 ; VF2-NEXT: [[ENTRY:.*]]:
1003 ; VF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
1004 ; VF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1005 ; VF2: [[VECTOR_PH]]:
1006 ; VF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
1007 ; VF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1008 ; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
1009 ; VF2: [[VECTOR_BODY]]:
1010 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1011 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1012 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
1013 ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]]
1014 ; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
1015 ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8
1016 ; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[TMP0]], i32 0
1017 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[TMP1]], i32 0
1018 ; VF2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8
1019 ; VF2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
1020 ; VF2-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0
1021 ; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1
1022 ; VF2-NEXT: [[TMP10:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP9]]
1023 ; VF2-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0
1024 ; VF2-NEXT: store i64 [[TMP11]], ptr [[TMP4]], align 8
1025 ; VF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1
1026 ; VF2-NEXT: store i64 [[TMP12]], ptr [[TMP5]], align 8
1027 ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[TMP0]], i32 1
1028 ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[TMP1]], i32 1
1029 ; VF2-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP13]], align 8
1030 ; VF2-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 8
1031 ; VF2-NEXT: [[TMP17:%.*]] = insertelement <2 x i64> poison, i64 [[TMP15]], i32 0
1032 ; VF2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> [[TMP17]], i64 [[TMP16]], i32 1
1033 ; VF2-NEXT: [[TMP19:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP18]]
1034 ; VF2-NEXT: [[TMP20:%.*]] = extractelement <2 x i64> [[TMP19]], i32 0
1035 ; VF2-NEXT: store i64 [[TMP20]], ptr [[TMP13]], align 8
1036 ; VF2-NEXT: [[TMP21:%.*]] = extractelement <2 x i64> [[TMP19]], i32 1
1037 ; VF2-NEXT: store i64 [[TMP21]], ptr [[TMP14]], align 8
1038 ; VF2-NEXT: [[TMP22:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[TMP0]], i32 2
1039 ; VF2-NEXT: [[TMP23:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[TMP1]], i32 2
1040 ; VF2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 8
1041 ; VF2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 8
1042 ; VF2-NEXT: [[TMP26:%.*]] = insertelement <2 x i64> poison, i64 [[TMP24]], i32 0
1043 ; VF2-NEXT: [[TMP27:%.*]] = insertelement <2 x i64> [[TMP26]], i64 [[TMP25]], i32 1
1044 ; VF2-NEXT: [[TMP28:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP27]]
1045 ; VF2-NEXT: [[TMP29:%.*]] = extractelement <2 x i64> [[TMP28]], i32 0
1046 ; VF2-NEXT: store i64 [[TMP29]], ptr [[TMP22]], align 8
1047 ; VF2-NEXT: [[TMP30:%.*]] = extractelement <2 x i64> [[TMP28]], i32 1
1048 ; VF2-NEXT: store i64 [[TMP30]], ptr [[TMP23]], align 8
1049 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1050 ; VF2-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1051 ; VF2-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
1052 ; VF2: [[MIDDLE_BLOCK]]:
1053 ; VF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1054 ; VF2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
1055 ; VF2: [[SCALAR_PH]]:
1056 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
1057 ; VF2-NEXT: br label %[[LOOP:.*]]
1059 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1060 ; VF2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[IV]]
1061 ; VF2-NEXT: [[L_FACTOR:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
1062 ; VF2-NEXT: [[DATA_0:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[IV]], i32 0
1063 ; VF2-NEXT: [[L_0:%.*]] = load i64, ptr [[DATA_0]], align 8
1064 ; VF2-NEXT: [[MUL_0:%.*]] = mul i64 [[L_FACTOR]], [[L_0]]
1065 ; VF2-NEXT: store i64 [[MUL_0]], ptr [[DATA_0]], align 8
1066 ; VF2-NEXT: [[DATA_1:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[IV]], i32 1
1067 ; VF2-NEXT: [[L_1:%.*]] = load i64, ptr [[DATA_1]], align 8
1068 ; VF2-NEXT: [[MUL_1:%.*]] = mul i64 [[L_FACTOR]], [[L_1]]
1069 ; VF2-NEXT: store i64 [[MUL_1]], ptr [[DATA_1]], align 8
1070 ; VF2-NEXT: [[DATA_2:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[IV]], i32 2
1071 ; VF2-NEXT: [[L_2:%.*]] = load i64, ptr [[DATA_2]], align 8
1072 ; VF2-NEXT: [[MUL_2:%.*]] = mul i64 [[L_FACTOR]], [[L_2]]
1073 ; VF2-NEXT: store i64 [[MUL_2]], ptr [[DATA_2]], align 8
1074 ; VF2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1075 ; VF2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1076 ; VF2-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
1078 ; VF2-NEXT: ret void
1080 ; VF4-LABEL: define void @test_3xi64(
1081 ; VF4-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]], i64 noundef [[N:%.*]]) {
1082 ; VF4-NEXT: [[ENTRY:.*]]:
1083 ; VF4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
1084 ; VF4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1085 ; VF4: [[VECTOR_PH]]:
1086 ; VF4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
1087 ; VF4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1088 ; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
1089 ; VF4: [[VECTOR_BODY]]:
1090 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1091 ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1092 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
1093 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
1094 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
1095 ; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]]
1096 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
1097 ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
1098 ; VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[TMP0]], i32 0
1099 ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[TMP1]], i32 0
1100 ; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[TMP2]], i32 0
1101 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[TMP3]], i32 0
1102 ; VF4-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 8
1103 ; VF4-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 8
1104 ; VF4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP8]], align 8
1105 ; VF4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8
1106 ; VF4-NEXT: [[TMP14:%.*]] = insertelement <4 x i64> poison, i64 [[TMP10]], i32 0
1107 ; VF4-NEXT: [[TMP15:%.*]] = insertelement <4 x i64> [[TMP14]], i64 [[TMP11]], i32 1
1108 ; VF4-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> [[TMP15]], i64 [[TMP12]], i32 2
1109 ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 3
1110 ; VF4-NEXT: [[TMP18:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[TMP17]]
1111 ; VF4-NEXT: [[TMP19:%.*]] = extractelement <4 x i64> [[TMP18]], i32 0
1112 ; VF4-NEXT: store i64 [[TMP19]], ptr [[TMP6]], align 8
1113 ; VF4-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP18]], i32 1
1114 ; VF4-NEXT: store i64 [[TMP20]], ptr [[TMP7]], align 8
1115 ; VF4-NEXT: [[TMP21:%.*]] = extractelement <4 x i64> [[TMP18]], i32 2
1116 ; VF4-NEXT: store i64 [[TMP21]], ptr [[TMP8]], align 8
1117 ; VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP18]], i32 3
1118 ; VF4-NEXT: store i64 [[TMP22]], ptr [[TMP9]], align 8
1119 ; VF4-NEXT: [[TMP23:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[TMP0]], i32 1
1120 ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[TMP1]], i32 1
1121 ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[TMP2]], i32 1
1122 ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[TMP3]], i32 1
1123 ; VF4-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP23]], align 8
1124 ; VF4-NEXT: [[TMP28:%.*]] = load i64, ptr [[TMP24]], align 8
1125 ; VF4-NEXT: [[TMP29:%.*]] = load i64, ptr [[TMP25]], align 8
1126 ; VF4-NEXT: [[TMP30:%.*]] = load i64, ptr [[TMP26]], align 8
1127 ; VF4-NEXT: [[TMP31:%.*]] = insertelement <4 x i64> poison, i64 [[TMP27]], i32 0
1128 ; VF4-NEXT: [[TMP32:%.*]] = insertelement <4 x i64> [[TMP31]], i64 [[TMP28]], i32 1
1129 ; VF4-NEXT: [[TMP33:%.*]] = insertelement <4 x i64> [[TMP32]], i64 [[TMP29]], i32 2
1130 ; VF4-NEXT: [[TMP34:%.*]] = insertelement <4 x i64> [[TMP33]], i64 [[TMP30]], i32 3
1131 ; VF4-NEXT: [[TMP35:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[TMP34]]
1132 ; VF4-NEXT: [[TMP36:%.*]] = extractelement <4 x i64> [[TMP35]], i32 0
1133 ; VF4-NEXT: store i64 [[TMP36]], ptr [[TMP23]], align 8
1134 ; VF4-NEXT: [[TMP37:%.*]] = extractelement <4 x i64> [[TMP35]], i32 1
1135 ; VF4-NEXT: store i64 [[TMP37]], ptr [[TMP24]], align 8
1136 ; VF4-NEXT: [[TMP38:%.*]] = extractelement <4 x i64> [[TMP35]], i32 2
1137 ; VF4-NEXT: store i64 [[TMP38]], ptr [[TMP25]], align 8
1138 ; VF4-NEXT: [[TMP39:%.*]] = extractelement <4 x i64> [[TMP35]], i32 3
1139 ; VF4-NEXT: store i64 [[TMP39]], ptr [[TMP26]], align 8
1140 ; VF4-NEXT: [[TMP40:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[TMP0]], i32 2
1141 ; VF4-NEXT: [[TMP41:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[TMP1]], i32 2
1142 ; VF4-NEXT: [[TMP42:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[TMP2]], i32 2
1143 ; VF4-NEXT: [[TMP43:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[TMP3]], i32 2
1144 ; VF4-NEXT: [[TMP44:%.*]] = load i64, ptr [[TMP40]], align 8
1145 ; VF4-NEXT: [[TMP45:%.*]] = load i64, ptr [[TMP41]], align 8
1146 ; VF4-NEXT: [[TMP46:%.*]] = load i64, ptr [[TMP42]], align 8
1147 ; VF4-NEXT: [[TMP47:%.*]] = load i64, ptr [[TMP43]], align 8
1148 ; VF4-NEXT: [[TMP48:%.*]] = insertelement <4 x i64> poison, i64 [[TMP44]], i32 0
1149 ; VF4-NEXT: [[TMP49:%.*]] = insertelement <4 x i64> [[TMP48]], i64 [[TMP45]], i32 1
1150 ; VF4-NEXT: [[TMP50:%.*]] = insertelement <4 x i64> [[TMP49]], i64 [[TMP46]], i32 2
1151 ; VF4-NEXT: [[TMP51:%.*]] = insertelement <4 x i64> [[TMP50]], i64 [[TMP47]], i32 3
1152 ; VF4-NEXT: [[TMP52:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[TMP51]]
1153 ; VF4-NEXT: [[TMP53:%.*]] = extractelement <4 x i64> [[TMP52]], i32 0
1154 ; VF4-NEXT: store i64 [[TMP53]], ptr [[TMP40]], align 8
1155 ; VF4-NEXT: [[TMP54:%.*]] = extractelement <4 x i64> [[TMP52]], i32 1
1156 ; VF4-NEXT: store i64 [[TMP54]], ptr [[TMP41]], align 8
1157 ; VF4-NEXT: [[TMP55:%.*]] = extractelement <4 x i64> [[TMP52]], i32 2
1158 ; VF4-NEXT: store i64 [[TMP55]], ptr [[TMP42]], align 8
1159 ; VF4-NEXT: [[TMP56:%.*]] = extractelement <4 x i64> [[TMP52]], i32 3
1160 ; VF4-NEXT: store i64 [[TMP56]], ptr [[TMP43]], align 8
1161 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1162 ; VF4-NEXT: [[TMP57:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1163 ; VF4-NEXT: br i1 [[TMP57]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
1164 ; VF4: [[MIDDLE_BLOCK]]:
1165 ; VF4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1166 ; VF4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
1167 ; VF4: [[SCALAR_PH]]:
1168 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
1169 ; VF4-NEXT: br label %[[LOOP:.*]]
1171 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1172 ; VF4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[IV]]
1173 ; VF4-NEXT: [[L_FACTOR:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
1174 ; VF4-NEXT: [[DATA_0:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[IV]], i32 0
1175 ; VF4-NEXT: [[L_0:%.*]] = load i64, ptr [[DATA_0]], align 8
1176 ; VF4-NEXT: [[MUL_0:%.*]] = mul i64 [[L_FACTOR]], [[L_0]]
1177 ; VF4-NEXT: store i64 [[MUL_0]], ptr [[DATA_0]], align 8
1178 ; VF4-NEXT: [[DATA_1:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[IV]], i32 1
1179 ; VF4-NEXT: [[L_1:%.*]] = load i64, ptr [[DATA_1]], align 8
1180 ; VF4-NEXT: [[MUL_1:%.*]] = mul i64 [[L_FACTOR]], [[L_1]]
1181 ; VF4-NEXT: store i64 [[MUL_1]], ptr [[DATA_1]], align 8
1182 ; VF4-NEXT: [[DATA_2:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[IV]], i32 2
1183 ; VF4-NEXT: [[L_2:%.*]] = load i64, ptr [[DATA_2]], align 8
1184 ; VF4-NEXT: [[MUL_2:%.*]] = mul i64 [[L_FACTOR]], [[L_2]]
1185 ; VF4-NEXT: store i64 [[MUL_2]], ptr [[DATA_2]], align 8
1186 ; VF4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1187 ; VF4-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1188 ; VF4-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
1190 ; VF4-NEXT: ret void
1196 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
1197 %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv
1198 %l.factor = load i64, ptr %arrayidx, align 8
1199 %data.0 = getelementptr inbounds { i64 , i64, i64 }, ptr %data, i64 %iv, i32 0
1200 %l.0 = load i64, ptr %data.0, align 8
1201 %mul.0 = mul i64 %l.factor, %l.0
1202 store i64 %mul.0, ptr %data.0, align 8
1203 %data.1 = getelementptr inbounds { i64 , i64, i64 }, ptr %data, i64 %iv, i32 1
1204 %l.1 = load i64, ptr %data.1, align 8
1205 %mul.1 = mul i64 %l.factor, %l.1
1206 store i64 %mul.1, ptr %data.1, align 8
1207 %data.2 = getelementptr inbounds { i64 , i64, i64 }, ptr %data, i64 %iv, i32 2
1208 %l.2 = load i64, ptr %data.2, align 8
1209 %mul.2 = mul i64 %l.factor, %l.2
1210 store i64 %mul.2, ptr %data.2, align 8
1211 %iv.next = add nuw nsw i64 %iv, 1
1212 %ec = icmp eq i64 %iv.next, %n
1213 br i1 %ec, label %exit, label %loop
1220 define void @test_2xi32(ptr noalias %data, ptr noalias %factor, i64 noundef %n) {
1221 ; VF2-LABEL: define void @test_2xi32(
1222 ; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]], i64 noundef [[N:%.*]]) {
1223 ; VF2-NEXT: [[ENTRY:.*]]:
1224 ; VF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
1225 ; VF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1226 ; VF2: [[VECTOR_PH]]:
1227 ; VF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
1228 ; VF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1229 ; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
1230 ; VF2: [[VECTOR_BODY]]:
1231 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1232 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1233 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
1234 ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]]
1235 ; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP1]]
1236 ; VF2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 8
1237 ; VF2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 8
1238 ; VF2-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i32 0
1239 ; VF2-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP5]], i32 1
1240 ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 0
1241 ; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 0
1242 ; VF2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 8
1243 ; VF2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 8
1244 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0
1245 ; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP11]], i32 1
1246 ; VF2-NEXT: [[TMP14:%.*]] = mul <2 x i32> [[TMP7]], [[TMP13]]
1247 ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP14]], i32 0
1248 ; VF2-NEXT: store i32 [[TMP15]], ptr [[TMP8]], align 8
1249 ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP14]], i32 1
1250 ; VF2-NEXT: store i32 [[TMP16]], ptr [[TMP9]], align 8
1251 ; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 1
1252 ; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 1
1253 ; VF2-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 8
1254 ; VF2-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP18]], align 8
1255 ; VF2-NEXT: [[TMP21:%.*]] = insertelement <2 x i32> poison, i32 [[TMP19]], i32 0
1256 ; VF2-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> [[TMP21]], i32 [[TMP20]], i32 1
1257 ; VF2-NEXT: [[TMP23:%.*]] = mul <2 x i32> [[TMP7]], [[TMP22]]
1258 ; VF2-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP23]], i32 0
1259 ; VF2-NEXT: store i32 [[TMP24]], ptr [[TMP17]], align 8
1260 ; VF2-NEXT: [[TMP25:%.*]] = extractelement <2 x i32> [[TMP23]], i32 1
1261 ; VF2-NEXT: store i32 [[TMP25]], ptr [[TMP18]], align 8
1262 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1263 ; VF2-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1264 ; VF2-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
1265 ; VF2: [[MIDDLE_BLOCK]]:
1266 ; VF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1267 ; VF2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
1268 ; VF2: [[SCALAR_PH]]:
1269 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
1270 ; VF2-NEXT: br label %[[LOOP:.*]]
1272 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1273 ; VF2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[IV]]
1274 ; VF2-NEXT: [[L_FACTOR:%.*]] = load i32, ptr [[ARRAYIDX]], align 8
1275 ; VF2-NEXT: [[DATA_0:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[IV]], i32 0
1276 ; VF2-NEXT: [[L_0:%.*]] = load i32, ptr [[DATA_0]], align 8
1277 ; VF2-NEXT: [[MUL_0:%.*]] = mul i32 [[L_FACTOR]], [[L_0]]
1278 ; VF2-NEXT: store i32 [[MUL_0]], ptr [[DATA_0]], align 8
1279 ; VF2-NEXT: [[DATA_1:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[IV]], i32 1
1280 ; VF2-NEXT: [[L_1:%.*]] = load i32, ptr [[DATA_1]], align 8
1281 ; VF2-NEXT: [[MUL_1:%.*]] = mul i32 [[L_FACTOR]], [[L_1]]
1282 ; VF2-NEXT: store i32 [[MUL_1]], ptr [[DATA_1]], align 8
1283 ; VF2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1284 ; VF2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1285 ; VF2-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
1287 ; VF2-NEXT: ret void
1289 ; VF4-LABEL: define void @test_2xi32(
1290 ; VF4-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]], i64 noundef [[N:%.*]]) {
1291 ; VF4-NEXT: [[ENTRY:.*]]:
1292 ; VF4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
1293 ; VF4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1294 ; VF4: [[VECTOR_PH]]:
1295 ; VF4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
1296 ; VF4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1297 ; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
1298 ; VF4: [[VECTOR_BODY]]:
1299 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1300 ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1301 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
1302 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
1303 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
1304 ; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]]
1305 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP1]]
1306 ; VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP2]]
1307 ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP3]]
1308 ; VF4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 8
1309 ; VF4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 8
1310 ; VF4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 8
1311 ; VF4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 8
1312 ; VF4-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> poison, i32 [[TMP8]], i32 0
1313 ; VF4-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i32 1
1314 ; VF4-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i32 2
1315 ; VF4-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP11]], i32 3
1316 ; VF4-NEXT: [[TMP16:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 0
1317 ; VF4-NEXT: [[TMP17:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 0
1318 ; VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP2]], i32 0
1319 ; VF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP3]], i32 0
1320 ; VF4-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP16]], align 8
1321 ; VF4-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP17]], align 8
1322 ; VF4-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP18]], align 8
1323 ; VF4-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP19]], align 8
1324 ; VF4-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> poison, i32 [[TMP20]], i32 0
1325 ; VF4-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> [[TMP24]], i32 [[TMP21]], i32 1
1326 ; VF4-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> [[TMP25]], i32 [[TMP22]], i32 2
1327 ; VF4-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP26]], i32 [[TMP23]], i32 3
1328 ; VF4-NEXT: [[TMP28:%.*]] = mul <4 x i32> [[TMP15]], [[TMP27]]
1329 ; VF4-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP28]], i32 0
1330 ; VF4-NEXT: store i32 [[TMP29]], ptr [[TMP16]], align 8
1331 ; VF4-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP28]], i32 1
1332 ; VF4-NEXT: store i32 [[TMP30]], ptr [[TMP17]], align 8
1333 ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i32> [[TMP28]], i32 2
1334 ; VF4-NEXT: store i32 [[TMP31]], ptr [[TMP18]], align 8
1335 ; VF4-NEXT: [[TMP32:%.*]] = extractelement <4 x i32> [[TMP28]], i32 3
1336 ; VF4-NEXT: store i32 [[TMP32]], ptr [[TMP19]], align 8
1337 ; VF4-NEXT: [[TMP33:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 1
1338 ; VF4-NEXT: [[TMP34:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 1
1339 ; VF4-NEXT: [[TMP35:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP2]], i32 1
1340 ; VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP3]], i32 1
1341 ; VF4-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP33]], align 8
1342 ; VF4-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP34]], align 8
1343 ; VF4-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP35]], align 8
1344 ; VF4-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP36]], align 8
1345 ; VF4-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> poison, i32 [[TMP37]], i32 0
1346 ; VF4-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP41]], i32 [[TMP38]], i32 1
1347 ; VF4-NEXT: [[TMP43:%.*]] = insertelement <4 x i32> [[TMP42]], i32 [[TMP39]], i32 2
1348 ; VF4-NEXT: [[TMP44:%.*]] = insertelement <4 x i32> [[TMP43]], i32 [[TMP40]], i32 3
1349 ; VF4-NEXT: [[TMP45:%.*]] = mul <4 x i32> [[TMP15]], [[TMP44]]
1350 ; VF4-NEXT: [[TMP46:%.*]] = extractelement <4 x i32> [[TMP45]], i32 0
1351 ; VF4-NEXT: store i32 [[TMP46]], ptr [[TMP33]], align 8
1352 ; VF4-NEXT: [[TMP47:%.*]] = extractelement <4 x i32> [[TMP45]], i32 1
1353 ; VF4-NEXT: store i32 [[TMP47]], ptr [[TMP34]], align 8
1354 ; VF4-NEXT: [[TMP48:%.*]] = extractelement <4 x i32> [[TMP45]], i32 2
1355 ; VF4-NEXT: store i32 [[TMP48]], ptr [[TMP35]], align 8
1356 ; VF4-NEXT: [[TMP49:%.*]] = extractelement <4 x i32> [[TMP45]], i32 3
1357 ; VF4-NEXT: store i32 [[TMP49]], ptr [[TMP36]], align 8
1358 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1359 ; VF4-NEXT: [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1360 ; VF4-NEXT: br i1 [[TMP50]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
1361 ; VF4: [[MIDDLE_BLOCK]]:
1362 ; VF4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1363 ; VF4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
1364 ; VF4: [[SCALAR_PH]]:
1365 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
1366 ; VF4-NEXT: br label %[[LOOP:.*]]
1368 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1369 ; VF4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[IV]]
1370 ; VF4-NEXT: [[L_FACTOR:%.*]] = load i32, ptr [[ARRAYIDX]], align 8
1371 ; VF4-NEXT: [[DATA_0:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[IV]], i32 0
1372 ; VF4-NEXT: [[L_0:%.*]] = load i32, ptr [[DATA_0]], align 8
1373 ; VF4-NEXT: [[MUL_0:%.*]] = mul i32 [[L_FACTOR]], [[L_0]]
1374 ; VF4-NEXT: store i32 [[MUL_0]], ptr [[DATA_0]], align 8
1375 ; VF4-NEXT: [[DATA_1:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[IV]], i32 1
1376 ; VF4-NEXT: [[L_1:%.*]] = load i32, ptr [[DATA_1]], align 8
1377 ; VF4-NEXT: [[MUL_1:%.*]] = mul i32 [[L_FACTOR]], [[L_1]]
1378 ; VF4-NEXT: store i32 [[MUL_1]], ptr [[DATA_1]], align 8
1379 ; VF4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1380 ; VF4-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1381 ; VF4-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
1383 ; VF4-NEXT: ret void
1389 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
1390 %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv
1391 %l.factor = load i32 , ptr %arrayidx, align 8
1392 %data.0 = getelementptr inbounds { i32, i32, i32 }, ptr %data, i64 %iv, i32 0
1393 %l.0 = load i32, ptr %data.0, align 8
1394 %mul.0 = mul i32 %l.factor, %l.0
1395 store i32 %mul.0, ptr %data.0, align 8
1396 %data.1 = getelementptr inbounds { i32, i32, i32 }, ptr %data, i64 %iv, i32 1
1397 %l.1 = load i32, ptr %data.1, align 8
1398 %mul.1 = mul i32 %l.factor, %l.1
1399 store i32%mul.1, ptr %data.1, align 8
1400 %iv.next = add nuw nsw i64 %iv, 1
1401 %ec = icmp eq i64 %iv.next, %n
1402 br i1 %ec, label %exit, label %loop
1407 define void @test_3xi32(ptr noalias %data, ptr noalias %factor, i64 noundef %n) {
1408 ; VF2-LABEL: define void @test_3xi32(
1409 ; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]], i64 noundef [[N:%.*]]) {
1410 ; VF2-NEXT: [[ENTRY:.*]]:
1411 ; VF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
1412 ; VF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1413 ; VF2: [[VECTOR_PH]]:
1414 ; VF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
1415 ; VF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1416 ; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
1417 ; VF2: [[VECTOR_BODY]]:
1418 ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1419 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1420 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
1421 ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]]
1422 ; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP1]]
1423 ; VF2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 8
1424 ; VF2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 8
1425 ; VF2-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i32 0
1426 ; VF2-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP5]], i32 1
1427 ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 0
1428 ; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 0
1429 ; VF2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 8
1430 ; VF2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 8
1431 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0
1432 ; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP11]], i32 1
1433 ; VF2-NEXT: [[TMP14:%.*]] = mul <2 x i32> [[TMP7]], [[TMP13]]
1434 ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP14]], i32 0
1435 ; VF2-NEXT: store i32 [[TMP15]], ptr [[TMP8]], align 8
1436 ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP14]], i32 1
1437 ; VF2-NEXT: store i32 [[TMP16]], ptr [[TMP9]], align 8
1438 ; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 1
1439 ; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 1
1440 ; VF2-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 8
1441 ; VF2-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP18]], align 8
1442 ; VF2-NEXT: [[TMP21:%.*]] = insertelement <2 x i32> poison, i32 [[TMP19]], i32 0
1443 ; VF2-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> [[TMP21]], i32 [[TMP20]], i32 1
1444 ; VF2-NEXT: [[TMP23:%.*]] = mul <2 x i32> [[TMP7]], [[TMP22]]
1445 ; VF2-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP23]], i32 0
1446 ; VF2-NEXT: store i32 [[TMP24]], ptr [[TMP17]], align 8
1447 ; VF2-NEXT: [[TMP25:%.*]] = extractelement <2 x i32> [[TMP23]], i32 1
1448 ; VF2-NEXT: store i32 [[TMP25]], ptr [[TMP18]], align 8
1449 ; VF2-NEXT: [[TMP26:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 2
1450 ; VF2-NEXT: [[TMP27:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 2
1451 ; VF2-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP26]], align 8
1452 ; VF2-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP27]], align 8
1453 ; VF2-NEXT: [[TMP30:%.*]] = insertelement <2 x i32> poison, i32 [[TMP28]], i32 0
1454 ; VF2-NEXT: [[TMP31:%.*]] = insertelement <2 x i32> [[TMP30]], i32 [[TMP29]], i32 1
1455 ; VF2-NEXT: [[TMP32:%.*]] = mul <2 x i32> [[TMP7]], [[TMP31]]
1456 ; VF2-NEXT: [[TMP33:%.*]] = extractelement <2 x i32> [[TMP32]], i32 0
1457 ; VF2-NEXT: store i32 [[TMP33]], ptr [[TMP26]], align 8
1458 ; VF2-NEXT: [[TMP34:%.*]] = extractelement <2 x i32> [[TMP32]], i32 1
1459 ; VF2-NEXT: store i32 [[TMP34]], ptr [[TMP27]], align 8
1460 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1461 ; VF2-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1462 ; VF2-NEXT: br i1 [[TMP35]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
1463 ; VF2: [[MIDDLE_BLOCK]]:
1464 ; VF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1465 ; VF2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
1466 ; VF2: [[SCALAR_PH]]:
1467 ; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
1468 ; VF2-NEXT: br label %[[LOOP:.*]]
1470 ; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1471 ; VF2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[IV]]
1472 ; VF2-NEXT: [[L_FACTOR:%.*]] = load i32, ptr [[ARRAYIDX]], align 8
1473 ; VF2-NEXT: [[DATA_0:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[IV]], i32 0
1474 ; VF2-NEXT: [[L_0:%.*]] = load i32, ptr [[DATA_0]], align 8
1475 ; VF2-NEXT: [[MUL_0:%.*]] = mul i32 [[L_FACTOR]], [[L_0]]
1476 ; VF2-NEXT: store i32 [[MUL_0]], ptr [[DATA_0]], align 8
1477 ; VF2-NEXT: [[DATA_1:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[IV]], i32 1
1478 ; VF2-NEXT: [[L_1:%.*]] = load i32, ptr [[DATA_1]], align 8
1479 ; VF2-NEXT: [[MUL_1:%.*]] = mul i32 [[L_FACTOR]], [[L_1]]
1480 ; VF2-NEXT: store i32 [[MUL_1]], ptr [[DATA_1]], align 8
1481 ; VF2-NEXT: [[DATA_2:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[IV]], i32 2
1482 ; VF2-NEXT: [[L_2:%.*]] = load i32, ptr [[DATA_2]], align 8
1483 ; VF2-NEXT: [[MUL_2:%.*]] = mul i32 [[L_FACTOR]], [[L_2]]
1484 ; VF2-NEXT: store i32 [[MUL_2]], ptr [[DATA_2]], align 8
1485 ; VF2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1486 ; VF2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1487 ; VF2-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
1489 ; VF2-NEXT: ret void
1491 ; VF4-LABEL: define void @test_3xi32(
1492 ; VF4-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]], i64 noundef [[N:%.*]]) {
1493 ; VF4-NEXT: [[ENTRY:.*]]:
1494 ; VF4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
1495 ; VF4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1496 ; VF4: [[VECTOR_PH]]:
1497 ; VF4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
1498 ; VF4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1499 ; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
1500 ; VF4: [[VECTOR_BODY]]:
1501 ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1502 ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1503 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
1504 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
1505 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
1506 ; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]]
1507 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP1]]
1508 ; VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP2]]
1509 ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP3]]
1510 ; VF4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 8
1511 ; VF4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 8
1512 ; VF4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 8
1513 ; VF4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 8
1514 ; VF4-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> poison, i32 [[TMP8]], i32 0
1515 ; VF4-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i32 1
1516 ; VF4-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i32 2
1517 ; VF4-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP11]], i32 3
1518 ; VF4-NEXT: [[TMP16:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 0
1519 ; VF4-NEXT: [[TMP17:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 0
1520 ; VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP2]], i32 0
1521 ; VF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP3]], i32 0
1522 ; VF4-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP16]], align 8
1523 ; VF4-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP17]], align 8
1524 ; VF4-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP18]], align 8
1525 ; VF4-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP19]], align 8
1526 ; VF4-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> poison, i32 [[TMP20]], i32 0
1527 ; VF4-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> [[TMP24]], i32 [[TMP21]], i32 1
1528 ; VF4-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> [[TMP25]], i32 [[TMP22]], i32 2
1529 ; VF4-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP26]], i32 [[TMP23]], i32 3
1530 ; VF4-NEXT: [[TMP28:%.*]] = mul <4 x i32> [[TMP15]], [[TMP27]]
1531 ; VF4-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP28]], i32 0
1532 ; VF4-NEXT: store i32 [[TMP29]], ptr [[TMP16]], align 8
1533 ; VF4-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP28]], i32 1
1534 ; VF4-NEXT: store i32 [[TMP30]], ptr [[TMP17]], align 8
1535 ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i32> [[TMP28]], i32 2
1536 ; VF4-NEXT: store i32 [[TMP31]], ptr [[TMP18]], align 8
1537 ; VF4-NEXT: [[TMP32:%.*]] = extractelement <4 x i32> [[TMP28]], i32 3
1538 ; VF4-NEXT: store i32 [[TMP32]], ptr [[TMP19]], align 8
1539 ; VF4-NEXT: [[TMP33:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 1
1540 ; VF4-NEXT: [[TMP34:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 1
1541 ; VF4-NEXT: [[TMP35:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP2]], i32 1
1542 ; VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP3]], i32 1
1543 ; VF4-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP33]], align 8
1544 ; VF4-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP34]], align 8
1545 ; VF4-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP35]], align 8
1546 ; VF4-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP36]], align 8
1547 ; VF4-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> poison, i32 [[TMP37]], i32 0
1548 ; VF4-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP41]], i32 [[TMP38]], i32 1
1549 ; VF4-NEXT: [[TMP43:%.*]] = insertelement <4 x i32> [[TMP42]], i32 [[TMP39]], i32 2
1550 ; VF4-NEXT: [[TMP44:%.*]] = insertelement <4 x i32> [[TMP43]], i32 [[TMP40]], i32 3
1551 ; VF4-NEXT: [[TMP45:%.*]] = mul <4 x i32> [[TMP15]], [[TMP44]]
1552 ; VF4-NEXT: [[TMP46:%.*]] = extractelement <4 x i32> [[TMP45]], i32 0
1553 ; VF4-NEXT: store i32 [[TMP46]], ptr [[TMP33]], align 8
1554 ; VF4-NEXT: [[TMP47:%.*]] = extractelement <4 x i32> [[TMP45]], i32 1
1555 ; VF4-NEXT: store i32 [[TMP47]], ptr [[TMP34]], align 8
1556 ; VF4-NEXT: [[TMP48:%.*]] = extractelement <4 x i32> [[TMP45]], i32 2
1557 ; VF4-NEXT: store i32 [[TMP48]], ptr [[TMP35]], align 8
1558 ; VF4-NEXT: [[TMP49:%.*]] = extractelement <4 x i32> [[TMP45]], i32 3
1559 ; VF4-NEXT: store i32 [[TMP49]], ptr [[TMP36]], align 8
1560 ; VF4-NEXT: [[TMP50:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 2
1561 ; VF4-NEXT: [[TMP51:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 2
1562 ; VF4-NEXT: [[TMP52:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP2]], i32 2
1563 ; VF4-NEXT: [[TMP53:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP3]], i32 2
1564 ; VF4-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP50]], align 8
1565 ; VF4-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP51]], align 8
1566 ; VF4-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP52]], align 8
1567 ; VF4-NEXT: [[TMP57:%.*]] = load i32, ptr [[TMP53]], align 8
1568 ; VF4-NEXT: [[TMP58:%.*]] = insertelement <4 x i32> poison, i32 [[TMP54]], i32 0
1569 ; VF4-NEXT: [[TMP59:%.*]] = insertelement <4 x i32> [[TMP58]], i32 [[TMP55]], i32 1
1570 ; VF4-NEXT: [[TMP60:%.*]] = insertelement <4 x i32> [[TMP59]], i32 [[TMP56]], i32 2
1571 ; VF4-NEXT: [[TMP61:%.*]] = insertelement <4 x i32> [[TMP60]], i32 [[TMP57]], i32 3
1572 ; VF4-NEXT: [[TMP62:%.*]] = mul <4 x i32> [[TMP15]], [[TMP61]]
1573 ; VF4-NEXT: [[TMP63:%.*]] = extractelement <4 x i32> [[TMP62]], i32 0
1574 ; VF4-NEXT: store i32 [[TMP63]], ptr [[TMP50]], align 8
1575 ; VF4-NEXT: [[TMP64:%.*]] = extractelement <4 x i32> [[TMP62]], i32 1
1576 ; VF4-NEXT: store i32 [[TMP64]], ptr [[TMP51]], align 8
1577 ; VF4-NEXT: [[TMP65:%.*]] = extractelement <4 x i32> [[TMP62]], i32 2
1578 ; VF4-NEXT: store i32 [[TMP65]], ptr [[TMP52]], align 8
1579 ; VF4-NEXT: [[TMP66:%.*]] = extractelement <4 x i32> [[TMP62]], i32 3
1580 ; VF4-NEXT: store i32 [[TMP66]], ptr [[TMP53]], align 8
1581 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1582 ; VF4-NEXT: [[TMP67:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1583 ; VF4-NEXT: br i1 [[TMP67]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
1584 ; VF4: [[MIDDLE_BLOCK]]:
1585 ; VF4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1586 ; VF4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
1587 ; VF4: [[SCALAR_PH]]:
1588 ; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
1589 ; VF4-NEXT: br label %[[LOOP:.*]]
1591 ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1592 ; VF4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[IV]]
1593 ; VF4-NEXT: [[L_FACTOR:%.*]] = load i32, ptr [[ARRAYIDX]], align 8
1594 ; VF4-NEXT: [[DATA_0:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[IV]], i32 0
1595 ; VF4-NEXT: [[L_0:%.*]] = load i32, ptr [[DATA_0]], align 8
1596 ; VF4-NEXT: [[MUL_0:%.*]] = mul i32 [[L_FACTOR]], [[L_0]]
1597 ; VF4-NEXT: store i32 [[MUL_0]], ptr [[DATA_0]], align 8
1598 ; VF4-NEXT: [[DATA_1:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[IV]], i32 1
1599 ; VF4-NEXT: [[L_1:%.*]] = load i32, ptr [[DATA_1]], align 8
1600 ; VF4-NEXT: [[MUL_1:%.*]] = mul i32 [[L_FACTOR]], [[L_1]]
1601 ; VF4-NEXT: store i32 [[MUL_1]], ptr [[DATA_1]], align 8
1602 ; VF4-NEXT: [[DATA_2:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[IV]], i32 2
1603 ; VF4-NEXT: [[L_2:%.*]] = load i32, ptr [[DATA_2]], align 8
1604 ; VF4-NEXT: [[MUL_2:%.*]] = mul i32 [[L_FACTOR]], [[L_2]]
1605 ; VF4-NEXT: store i32 [[MUL_2]], ptr [[DATA_2]], align 8
1606 ; VF4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1607 ; VF4-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1608 ; VF4-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
1610 ; VF4-NEXT: ret void
1616 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
1617 %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv
1618 %l.factor = load i32 , ptr %arrayidx, align 8
1619 %data.0 = getelementptr inbounds { i32, i32, i32 }, ptr %data, i64 %iv, i32 0
1620 %l.0 = load i32, ptr %data.0, align 8
1621 %mul.0 = mul i32 %l.factor, %l.0
1622 store i32 %mul.0, ptr %data.0, align 8
1623 %data.1 = getelementptr inbounds { i32, i32, i32 }, ptr %data, i64 %iv, i32 1
1624 %l.1 = load i32, ptr %data.1, align 8
1625 %mul.1 = mul i32 %l.factor, %l.1
1626 store i32%mul.1, ptr %data.1, align 8
1627 %data.2 = getelementptr inbounds { i32, i32, i32 }, ptr %data, i64 %iv, i32 2
1628 %l.2 = load i32, ptr %data.2, align 8
1629 %mul.2 = mul i32 %l.factor, %l.2
1630 store i32 %mul.2, ptr %data.2, align 8
1631 %iv.next = add nuw nsw i64 %iv, 1
1632 %ec = icmp eq i64 %iv.next, %n
1633 br i1 %ec, label %exit, label %loop
1639 ; VF2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
1640 ; VF2: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
1641 ; VF2: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
1642 ; VF2: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
1643 ; VF2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
1644 ; VF2: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
1645 ; VF2: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
1646 ; VF2: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
1647 ; VF2: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
1648 ; VF2: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
1649 ; VF2: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
1650 ; VF2: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
1651 ; VF2: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
1652 ; VF2: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
1653 ; VF2: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
1654 ; VF2: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]}
1655 ; VF2: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]}
1656 ; VF2: [[LOOP17]] = distinct !{[[LOOP17]], [[META2]], [[META1]]}
1658 ; VF4: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
1659 ; VF4: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
1660 ; VF4: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
1661 ; VF4: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
1662 ; VF4: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
1663 ; VF4: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
1664 ; VF4: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
1665 ; VF4: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
1666 ; VF4: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
1667 ; VF4: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
1668 ; VF4: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
1669 ; VF4: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
1670 ; VF4: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
1671 ; VF4: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
1672 ; VF4: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
1673 ; VF4: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]}
1674 ; VF4: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]}
1675 ; VF4: [[LOOP17]] = distinct !{[[LOOP17]], [[META2]], [[META1]]}