1 ; RUN: opt -passes=loop-vectorize -force-vector-width=2 -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=VF_2
2 ; RUN: opt -passes=loop-vectorize -force-vector-width=4 -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=VF_4
3 ; RUN: opt -passes=loop-vectorize -force-vector-width=8 -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=VF_8
4 ; RUN: opt -passes=loop-vectorize -force-vector-width=16 -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=VF_16
7 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
8 target triple = "thumbv8.1m.main-none-eabi"
13 define void @i8_factor_2(ptr %data, i64 %n) #0 {
17 ; VF_2-LABEL: Checking a loop in 'i8_factor_2'
18 ; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
19 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
20 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp2, ptr %tmp0, align 1
21 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp3, ptr %tmp1, align 1
22 ; VF_4-LABEL: Checking a loop in 'i8_factor_2'
23 ; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
24 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
25 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 %tmp2, ptr %tmp0, align 1
26 ; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store i8 %tmp3, ptr %tmp1, align 1
27 ; VF_8-LABEL: Checking a loop in 'i8_factor_2'
28 ; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
29 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
30 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 %tmp2, ptr %tmp0, align 1
31 ; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store i8 %tmp3, ptr %tmp1, align 1
32 ; VF_16-LABEL: Checking a loop in 'i8_factor_2'
33 ; VF_16: Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
34 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
35 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 %tmp2, ptr %tmp0, align 1
36 ; VF_16-NEXT: Found an estimated cost of 4 for VF 16 For instruction: store i8 %tmp3, ptr %tmp1, align 1
38 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
39 %tmp0 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 0
40 %tmp1 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 1
41 %tmp2 = load i8, ptr %tmp0, align 1
42 %tmp3 = load i8, ptr %tmp1, align 1
43 store i8 %tmp2, ptr %tmp0, align 1
44 store i8 %tmp3, ptr %tmp1, align 1
45 %i.next = add nuw nsw i64 %i, 1
46 %cond = icmp slt i64 %i.next, %n
47 br i1 %cond, label %for.body, label %for.end
53 %i16.2 = type {i16, i16}
54 define void @i16_factor_2(ptr %data, i64 %n) #0 {
58 ; VF_2-LABEL: Checking a loop in 'i16_factor_2'
59 ; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
60 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
61 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp2, ptr %tmp0, align 2
62 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp3, ptr %tmp1, align 2
63 ; VF_4-LABEL: Checking a loop in 'i16_factor_2'
64 ; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
65 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
66 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 %tmp2, ptr %tmp0, align 2
67 ; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store i16 %tmp3, ptr %tmp1, align 2
68 ; VF_8-LABEL: Checking a loop in 'i16_factor_2'
69 ; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
70 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
71 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 %tmp2, ptr %tmp0, align 2
72 ; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store i16 %tmp3, ptr %tmp1, align 2
73 ; VF_16-LABEL: Checking a loop in 'i16_factor_2'
74 ; VF_16: Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
75 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
76 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 %tmp2, ptr %tmp0, align 2
77 ; VF_16-NEXT: Found an estimated cost of 8 for VF 16 For instruction: store i16 %tmp3, ptr %tmp1, align 2
79 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
80 %tmp0 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 0
81 %tmp1 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 1
82 %tmp2 = load i16, ptr %tmp0, align 2
83 %tmp3 = load i16, ptr %tmp1, align 2
84 store i16 %tmp2, ptr %tmp0, align 2
85 store i16 %tmp3, ptr %tmp1, align 2
86 %i.next = add nuw nsw i64 %i, 1
87 %cond = icmp slt i64 %i.next, %n
88 br i1 %cond, label %for.body, label %for.end
94 %i32.2 = type {i32, i32}
95 define void @i32_factor_2(ptr %data, i64 %n) #0 {
99 ; VF_2-LABEL: Checking a loop in 'i32_factor_2'
100 ; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
101 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
102 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp2, ptr %tmp0, align 4
103 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp3, ptr %tmp1, align 4
104 ; VF_4-LABEL: Checking a loop in 'i32_factor_2'
105 ; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
106 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
107 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp2, ptr %tmp0, align 4
108 ; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store i32 %tmp3, ptr %tmp1, align 4
109 ; VF_8-LABEL: Checking a loop in 'i32_factor_2'
110 ; VF_8: Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
111 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
112 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 %tmp2, ptr %tmp0, align 4
113 ; VF_8-NEXT: Found an estimated cost of 8 for VF 8 For instruction: store i32 %tmp3, ptr %tmp1, align 4
114 ; VF_16-LABEL: Checking a loop in 'i32_factor_2'
115 ; VF_16: Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
116 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
117 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 %tmp2, ptr %tmp0, align 4
118 ; VF_16-NEXT: Found an estimated cost of 16 for VF 16 For instruction: store i32 %tmp3, ptr %tmp1, align 4
120 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
121 %tmp0 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 0
122 %tmp1 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 1
123 %tmp2 = load i32, ptr %tmp0, align 4
124 %tmp3 = load i32, ptr %tmp1, align 4
125 store i32 %tmp2, ptr %tmp0, align 4
126 store i32 %tmp3, ptr %tmp1, align 4
127 %i.next = add nuw nsw i64 %i, 1
128 %cond = icmp slt i64 %i.next, %n
129 br i1 %cond, label %for.body, label %for.end
135 %i64.2 = type {i64, i64}
136 define void @i64_factor_2(ptr %data, i64 %n) #0 {
140 ; VF_2-LABEL: Checking a loop in 'i64_factor_2'
141 ; VF_2: Found an estimated cost of 22 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
142 ; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
143 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp2, ptr %tmp0, align 8
144 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp3, ptr %tmp1, align 8
145 ; VF_4-LABEL: Checking a loop in 'i64_factor_2'
146 ; VF_4: Found an estimated cost of 44 for VF 4 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
147 ; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
148 ; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp2, ptr %tmp0, align 8
149 ; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp3, ptr %tmp1, align 8
150 ; VF_8-LABEL: Checking a loop in 'i64_factor_2'
151 ; VF_8: Found an estimated cost of 88 for VF 8 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
152 ; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
153 ; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp2, ptr %tmp0, align 8
154 ; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp3, ptr %tmp1, align 8
155 ; VF_16-LABEL: Checking a loop in 'i64_factor_2'
156 ; VF_16: Found an estimated cost of 176 for VF 16 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
157 ; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
158 ; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp2, ptr %tmp0, align 8
159 ; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp3, ptr %tmp1, align 8
161 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
162 %tmp0 = getelementptr inbounds %i64.2, ptr %data, i64 %i, i32 0
163 %tmp1 = getelementptr inbounds %i64.2, ptr %data, i64 %i, i32 1
164 %tmp2 = load i64, ptr %tmp0, align 8
165 %tmp3 = load i64, ptr %tmp1, align 8
166 store i64 %tmp2, ptr %tmp0, align 8
167 store i64 %tmp3, ptr %tmp1, align 8
168 %i.next = add nuw nsw i64 %i, 1
169 %cond = icmp slt i64 %i.next, %n
170 br i1 %cond, label %for.body, label %for.end
176 %f16.2 = type {half, half}
177 define void @f16_factor_2(ptr %data, i64 %n) #0 {
181 ; VF_2-LABEL: Checking a loop in 'f16_factor_2'
182 ; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp2 = load half, ptr %tmp0, align 2
183 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp3 = load half, ptr %tmp1, align 2
184 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store half %tmp2, ptr %tmp0, align 2
185 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store half %tmp3, ptr %tmp1, align 2
186 ; VF_4-LABEL: Checking a loop in 'f16_factor_2'
187 ; VF_4: Found an estimated cost of 18 for VF 4 For instruction: %tmp2 = load half, ptr %tmp0, align 2
188 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, ptr %tmp1, align 2
189 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half %tmp2, ptr %tmp0, align 2
190 ; VF_4-NEXT: Found an estimated cost of 18 for VF 4 For instruction: store half %tmp3, ptr %tmp1, align 2
191 ; VF_8-LABEL: Checking a loop in 'f16_factor_2'
192 ; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load half, ptr %tmp0, align 2
193 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, ptr %tmp1, align 2
194 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half %tmp2, ptr %tmp0, align 2
195 ; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store half %tmp3, ptr %tmp1, align 2
196 ; VF_16-LABEL: Checking a loop in 'f16_factor_2'
197 ; VF_16: Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load half, ptr %tmp0, align 2
198 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load half, ptr %tmp1, align 2
199 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half %tmp2, ptr %tmp0, align 2
200 ; VF_16-NEXT: Found an estimated cost of 8 for VF 16 For instruction: store half %tmp3, ptr %tmp1, align 2
202 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
203 %tmp0 = getelementptr inbounds %f16.2, ptr %data, i64 %i, i32 0
204 %tmp1 = getelementptr inbounds %f16.2, ptr %data, i64 %i, i32 1
205 %tmp2 = load half, ptr %tmp0, align 2
206 %tmp3 = load half, ptr %tmp1, align 2
207 store half %tmp2, ptr %tmp0, align 2
208 store half %tmp3, ptr %tmp1, align 2
209 %i.next = add nuw nsw i64 %i, 1
210 %cond = icmp slt i64 %i.next, %n
211 br i1 %cond, label %for.body, label %for.end
217 %f32.2 = type {float, float}
218 define void @f32_factor_2(ptr %data, i64 %n) #0 {
222 ; VF_2-LABEL: Checking a loop in 'f32_factor_2'
223 ; VF_2: Found an estimated cost of 10 for VF 2 For instruction: %tmp2 = load float, ptr %tmp0, align 4
224 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load float, ptr %tmp1, align 4
225 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float %tmp2, ptr %tmp0, align 4
226 ; VF_2-NEXT: Found an estimated cost of 10 for VF 2 For instruction: store float %tmp3, ptr %tmp1, align 4
227 ; VF_4-LABEL: Checking a loop in 'f32_factor_2'
228 ; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load float, ptr %tmp0, align 4
229 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load float, ptr %tmp1, align 4
230 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float %tmp2, ptr %tmp0, align 4
231 ; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store float %tmp3, ptr %tmp1, align 4
232 ; VF_8-LABEL: Checking a loop in 'f32_factor_2'
233 ; VF_8: Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load float, ptr %tmp0, align 4
234 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load float, ptr %tmp1, align 4
235 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float %tmp2, ptr %tmp0, align 4
236 ; VF_8-NEXT: Found an estimated cost of 8 for VF 8 For instruction: store float %tmp3, ptr %tmp1, align 4
237 ; VF_16-LABEL: Checking a loop in 'f32_factor_2'
238 ; VF_16: Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load float, ptr %tmp0, align 4
239 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load float, ptr %tmp1, align 4
240 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float %tmp2, ptr %tmp0, align 4
241 ; VF_16-NEXT: Found an estimated cost of 16 for VF 16 For instruction: store float %tmp3, ptr %tmp1, align 4
243 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
244 %tmp0 = getelementptr inbounds %f32.2, ptr %data, i64 %i, i32 0
245 %tmp1 = getelementptr inbounds %f32.2, ptr %data, i64 %i, i32 1
246 %tmp2 = load float, ptr %tmp0, align 4
247 %tmp3 = load float, ptr %tmp1, align 4
248 store float %tmp2, ptr %tmp0, align 4
249 store float %tmp3, ptr %tmp1, align 4
250 %i.next = add nuw nsw i64 %i, 1
251 %cond = icmp slt i64 %i.next, %n
252 br i1 %cond, label %for.body, label %for.end
258 %f64.2 = type {double, double}
259 define void @f64_factor_2(ptr %data, i64 %n) #0 {
263 ; VF_2-LABEL: Checking a loop in 'f64_factor_2'
264 ; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp2 = load double, ptr %tmp0, align 8
265 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp3 = load double, ptr %tmp1, align 8
266 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp2, ptr %tmp0, align 8
267 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp3, ptr %tmp1, align 8
268 ; VF_4-LABEL: Checking a loop in 'f64_factor_2'
269 ; VF_4: Found an estimated cost of 12 for VF 4 For instruction: %tmp2 = load double, ptr %tmp0, align 8
270 ; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp3 = load double, ptr %tmp1, align 8
271 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp2, ptr %tmp0, align 8
272 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp3, ptr %tmp1, align 8
273 ; VF_8-LABEL: Checking a loop in 'f64_factor_2'
274 ; VF_8: Found an estimated cost of 24 for VF 8 For instruction: %tmp2 = load double, ptr %tmp0, align 8
275 ; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp3 = load double, ptr %tmp1, align 8
276 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp2, ptr %tmp0, align 8
277 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp3, ptr %tmp1, align 8
278 ; VF_16-LABEL: Checking a loop in 'f64_factor_2'
279 ; VF_16: Found an estimated cost of 48 for VF 16 For instruction: %tmp2 = load double, ptr %tmp0, align 8
280 ; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp3 = load double, ptr %tmp1, align 8
281 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp2, ptr %tmp0, align 8
282 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp3, ptr %tmp1, align 8
284 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
285 %tmp0 = getelementptr inbounds %f64.2, ptr %data, i64 %i, i32 0
286 %tmp1 = getelementptr inbounds %f64.2, ptr %data, i64 %i, i32 1
287 %tmp2 = load double, ptr %tmp0, align 8
288 %tmp3 = load double, ptr %tmp1, align 8
289 store double %tmp2, ptr %tmp0, align 8
290 store double %tmp3, ptr %tmp1, align 8
291 %i.next = add nuw nsw i64 %i, 1
292 %cond = icmp slt i64 %i.next, %n
293 br i1 %cond, label %for.body, label %for.end
303 %i8.3 = type {i8, i8, i8}
304 define void @i8_factor_3(ptr %data, i64 %n) #0 {
308 ; VF_2-LABEL: Checking a loop in 'i8_factor_3'
309 ; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i8, ptr %tmp0, align 1
310 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i8, ptr %tmp1, align 1
311 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i8, ptr %tmp2, align 1
312 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp3, ptr %tmp0, align 1
313 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp4, ptr %tmp1, align 1
314 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp5, ptr %tmp2, align 1
315 ; VF_4-LABEL: Checking a loop in 'i8_factor_3'
316 ; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load i8, ptr %tmp0, align 1
317 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp4 = load i8, ptr %tmp1, align 1
318 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp5 = load i8, ptr %tmp2, align 1
319 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i8 %tmp3, ptr %tmp0, align 1
320 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i8 %tmp4, ptr %tmp1, align 1
321 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i8 %tmp5, ptr %tmp2, align 1
322 ; VF_8-LABEL: Checking a loop in 'i8_factor_3'
323 ; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp0, align 1
324 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i8, ptr %tmp1, align 1
325 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i8, ptr %tmp2, align 1
326 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i8 %tmp3, ptr %tmp0, align 1
327 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i8 %tmp4, ptr %tmp1, align 1
328 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i8 %tmp5, ptr %tmp2, align 1
329 ; VF_16-LABEL: Checking a loop in 'i8_factor_3'
330 ; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp0, align 1
331 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i8, ptr %tmp1, align 1
332 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i8, ptr %tmp2, align 1
333 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i8 %tmp3, ptr %tmp0, align 1
334 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i8 %tmp4, ptr %tmp1, align 1
335 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i8 %tmp5, ptr %tmp2, align 1
337 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
338 %tmp0 = getelementptr inbounds %i8.3, ptr %data, i64 %i, i32 0
339 %tmp1 = getelementptr inbounds %i8.3, ptr %data, i64 %i, i32 1
340 %tmp2 = getelementptr inbounds %i8.3, ptr %data, i64 %i, i32 2
341 %tmp3 = load i8, ptr %tmp0, align 1
342 %tmp4 = load i8, ptr %tmp1, align 1
343 %tmp5 = load i8, ptr %tmp2, align 1
344 store i8 %tmp3, ptr %tmp0, align 1
345 store i8 %tmp4, ptr %tmp1, align 1
346 store i8 %tmp5, ptr %tmp2, align 1
347 %i.next = add nuw nsw i64 %i, 1
348 %cond = icmp slt i64 %i.next, %n
349 br i1 %cond, label %for.body, label %for.end
355 %i16.3 = type {i16, i16, i16}
356 define void @i16_factor_3(ptr %data, i64 %n) #0 {
360 ; VF_2-LABEL: Checking a loop in 'i16_factor_3'
361 ; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i16, ptr %tmp0, align 2
362 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i16, ptr %tmp1, align 2
363 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i16, ptr %tmp2, align 2
364 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp3, ptr %tmp0, align 2
365 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp4, ptr %tmp1, align 2
366 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp5, ptr %tmp2, align 2
367 ; VF_4-LABEL: Checking a loop in 'i16_factor_3'
368 ; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp0, align 2
369 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp4 = load i16, ptr %tmp1, align 2
370 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp5 = load i16, ptr %tmp2, align 2
371 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i16 %tmp3, ptr %tmp0, align 2
372 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i16 %tmp4, ptr %tmp1, align 2
373 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i16 %tmp5, ptr %tmp2, align 2
374 ; VF_8-LABEL: Checking a loop in 'i16_factor_3'
375 ; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp0, align 2
376 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i16, ptr %tmp1, align 2
377 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i16, ptr %tmp2, align 2
378 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i16 %tmp3, ptr %tmp0, align 2
379 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i16 %tmp4, ptr %tmp1, align 2
380 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i16 %tmp5, ptr %tmp2, align 2
381 ; VF_16-LABEL: Checking a loop in 'i16_factor_3'
382 ; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp0, align 2
383 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i16, ptr %tmp1, align 2
384 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i16, ptr %tmp2, align 2
385 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i16 %tmp3, ptr %tmp0, align 2
386 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i16 %tmp4, ptr %tmp1, align 2
387 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i16 %tmp5, ptr %tmp2, align 2
389 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
390 %tmp0 = getelementptr inbounds %i16.3, ptr %data, i64 %i, i32 0
391 %tmp1 = getelementptr inbounds %i16.3, ptr %data, i64 %i, i32 1
392 %tmp2 = getelementptr inbounds %i16.3, ptr %data, i64 %i, i32 2
393 %tmp3 = load i16, ptr %tmp0, align 2
394 %tmp4 = load i16, ptr %tmp1, align 2
395 %tmp5 = load i16, ptr %tmp2, align 2
396 store i16 %tmp3, ptr %tmp0, align 2
397 store i16 %tmp4, ptr %tmp1, align 2
398 store i16 %tmp5, ptr %tmp2, align 2
399 %i.next = add nuw nsw i64 %i, 1
400 %cond = icmp slt i64 %i.next, %n
401 br i1 %cond, label %for.body, label %for.end
407 %i32.3 = type {i32, i32, i32}
408 define void @i32_factor_3(ptr %data, i64 %n) #0 {
412 ; VF_2-LABEL: Checking a loop in 'i32_factor_3'
413 ; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp0, align 4
414 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i32, ptr %tmp1, align 4
415 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i32, ptr %tmp2, align 4
416 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp3, ptr %tmp0, align 4
417 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp4, ptr %tmp1, align 4
418 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp5, ptr %tmp2, align 4
419 ; VF_4-LABEL: Checking a loop in 'i32_factor_3'
420 ; VF_4: Found an estimated cost of 8 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp0, align 4
421 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp4 = load i32, ptr %tmp1, align 4
422 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp5 = load i32, ptr %tmp2, align 4
423 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp3, ptr %tmp0, align 4
424 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp4, ptr %tmp1, align 4
425 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp5, ptr %tmp2, align 4
426 ; VF_8-LABEL: Checking a loop in 'i32_factor_3'
427 ; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp0, align 4
428 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i32, ptr %tmp1, align 4
429 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i32, ptr %tmp2, align 4
430 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i32 %tmp3, ptr %tmp0, align 4
431 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i32 %tmp4, ptr %tmp1, align 4
432 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i32 %tmp5, ptr %tmp2, align 4
433 ; VF_16-LABEL: Checking a loop in 'i32_factor_3'
434 ; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp0, align 4
435 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i32, ptr %tmp1, align 4
436 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i32, ptr %tmp2, align 4
437 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i32 %tmp3, ptr %tmp0, align 4
438 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i32 %tmp4, ptr %tmp1, align 4
439 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i32 %tmp5, ptr %tmp2, align 4
441 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
442 %tmp0 = getelementptr inbounds %i32.3, ptr %data, i64 %i, i32 0
443 %tmp1 = getelementptr inbounds %i32.3, ptr %data, i64 %i, i32 1
444 %tmp2 = getelementptr inbounds %i32.3, ptr %data, i64 %i, i32 2
445 %tmp3 = load i32, ptr %tmp0, align 4
446 %tmp4 = load i32, ptr %tmp1, align 4
447 %tmp5 = load i32, ptr %tmp2, align 4
448 store i32 %tmp3, ptr %tmp0, align 4
449 store i32 %tmp4, ptr %tmp1, align 4
450 store i32 %tmp5, ptr %tmp2, align 4
451 %i.next = add nuw nsw i64 %i, 1
452 %cond = icmp slt i64 %i.next, %n
453 br i1 %cond, label %for.body, label %for.end
459 %i64.3 = type {i64, i64, i64}
460 define void @i64_factor_3(ptr %data, i64 %n) #0 {
464 ; VF_2-LABEL: Checking a loop in 'i64_factor_3'
465 ; VF_2: Found an estimated cost of 22 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp0, align 8
466 ; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp4 = load i64, ptr %tmp1, align 8
467 ; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp5 = load i64, ptr %tmp2, align 8
468 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp3, ptr %tmp0, align 8
469 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp4, ptr %tmp1, align 8
470 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp5, ptr %tmp2, align 8
471 ; VF_4-LABEL: Checking a loop in 'i64_factor_3'
472 ; VF_4: Found an estimated cost of 44 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp0, align 8
473 ; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp4 = load i64, ptr %tmp1, align 8
474 ; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp5 = load i64, ptr %tmp2, align 8
475 ; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp3, ptr %tmp0, align 8
476 ; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp4, ptr %tmp1, align 8
477 ; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp5, ptr %tmp2, align 8
478 ; VF_8-LABEL: Checking a loop in 'i64_factor_3'
479 ; VF_8: Found an estimated cost of 88 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp0, align 8
480 ; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp4 = load i64, ptr %tmp1, align 8
481 ; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp5 = load i64, ptr %tmp2, align 8
482 ; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp3, ptr %tmp0, align 8
483 ; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp4, ptr %tmp1, align 8
484 ; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp5, ptr %tmp2, align 8
485 ; VF_16-LABEL: Checking a loop in 'i64_factor_3'
486 ; VF_16: Found an estimated cost of 176 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp0, align 8
487 ; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp4 = load i64, ptr %tmp1, align 8
488 ; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp5 = load i64, ptr %tmp2, align 8
489 ; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp3, ptr %tmp0, align 8
490 ; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp4, ptr %tmp1, align 8
491 ; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp5, ptr %tmp2, align 8
493 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
494 %tmp0 = getelementptr inbounds %i64.3, ptr %data, i64 %i, i32 0
495 %tmp1 = getelementptr inbounds %i64.3, ptr %data, i64 %i, i32 1
496 %tmp2 = getelementptr inbounds %i64.3, ptr %data, i64 %i, i32 2
497 %tmp3 = load i64, ptr %tmp0, align 8
498 %tmp4 = load i64, ptr %tmp1, align 8
499 %tmp5 = load i64, ptr %tmp2, align 8
500 store i64 %tmp3, ptr %tmp0, align 8
501 store i64 %tmp4, ptr %tmp1, align 8
502 store i64 %tmp5, ptr %tmp2, align 8
503 %i.next = add nuw nsw i64 %i, 1
504 %cond = icmp slt i64 %i.next, %n
505 br i1 %cond, label %for.body, label %for.end
511 %f16.3 = type {half, half, half}
512 define void @f16_factor_3(ptr %data, i64 %n) #0 {
516 ; VF_2-LABEL: Checking a loop in 'f16_factor_3'
517 ; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp3 = load half, ptr %tmp0, align 2
518 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp4 = load half, ptr %tmp1, align 2
519 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp5 = load half, ptr %tmp2, align 2
520 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store half %tmp3, ptr %tmp0, align 2
521 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store half %tmp4, ptr %tmp1, align 2
522 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store half %tmp5, ptr %tmp2, align 2
523 ; VF_4-LABEL: Checking a loop in 'f16_factor_3'
524 ; VF_4: Found an estimated cost of 28 for VF 4 For instruction: %tmp3 = load half, ptr %tmp0, align 2
525 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load half, ptr %tmp1, align 2
526 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load half, ptr %tmp2, align 2
527 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half %tmp3, ptr %tmp0, align 2
528 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half %tmp4, ptr %tmp1, align 2
529 ; VF_4-NEXT: Found an estimated cost of 28 for VF 4 For instruction: store half %tmp5, ptr %tmp2, align 2
530 ; VF_8-LABEL: Checking a loop in 'f16_factor_3'
531 ; VF_8: Found an estimated cost of 56 for VF 8 For instruction: %tmp3 = load half, ptr %tmp0, align 2
532 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load half, ptr %tmp1, align 2
533 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load half, ptr %tmp2, align 2
534 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half %tmp3, ptr %tmp0, align 2
535 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half %tmp4, ptr %tmp1, align 2
536 ; VF_8-NEXT: Found an estimated cost of 56 for VF 8 For instruction: store half %tmp5, ptr %tmp2, align 2
537 ; VF_16-LABEL: Checking a loop in 'f16_factor_3'
538 ; VF_16: Found an estimated cost of 112 for VF 16 For instruction: %tmp3 = load half, ptr %tmp0, align 2
539 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load half, ptr %tmp1, align 2
540 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load half, ptr %tmp2, align 2
541 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half %tmp3, ptr %tmp0, align 2
542 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half %tmp4, ptr %tmp1, align 2
543 ; VF_16-NEXT: Found an estimated cost of 112 for VF 16 For instruction: store half %tmp5, ptr %tmp2, align 2
545 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
546 %tmp0 = getelementptr inbounds %f16.3, ptr %data, i64 %i, i32 0
547 %tmp1 = getelementptr inbounds %f16.3, ptr %data, i64 %i, i32 1
548 %tmp2 = getelementptr inbounds %f16.3, ptr %data, i64 %i, i32 2
549 %tmp3 = load half, ptr %tmp0, align 2
550 %tmp4 = load half, ptr %tmp1, align 2
551 %tmp5 = load half, ptr %tmp2, align 2
552 store half %tmp3, ptr %tmp0, align 2
553 store half %tmp4, ptr %tmp1, align 2
554 store half %tmp5, ptr %tmp2, align 2
555 %i.next = add nuw nsw i64 %i, 1
556 %cond = icmp slt i64 %i.next, %n
557 br i1 %cond, label %for.body, label %for.end
563 %f32.3 = type {float, float, float}
564 define void @f32_factor_3(ptr %data, i64 %n) #0 {
568 ; VF_2-LABEL: Checking a loop in 'f32_factor_3'
569 ; VF_2: Found an estimated cost of 16 for VF 2 For instruction: %tmp3 = load float, ptr %tmp0, align 4
570 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load float, ptr %tmp1, align 4
571 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load float, ptr %tmp2, align 4
572 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float %tmp3, ptr %tmp0, align 4
573 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float %tmp4, ptr %tmp1, align 4
574 ; VF_2-NEXT: Found an estimated cost of 16 for VF 2 For instruction: store float %tmp5, ptr %tmp2, align 4
575 ; VF_4-LABEL: Checking a loop in 'f32_factor_3'
576 ; VF_4: Found an estimated cost of 8 for VF 4 For instruction: %tmp3 = load float, ptr %tmp0, align 4
577 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp4 = load float, ptr %tmp1, align 4
578 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp5 = load float, ptr %tmp2, align 4
579 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp3, ptr %tmp0, align 4
580 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp4, ptr %tmp1, align 4
581 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp5, ptr %tmp2, align 4
582 ; VF_8-LABEL: Checking a loop in 'f32_factor_3'
583 ; VF_8: Found an estimated cost of 64 for VF 8 For instruction: %tmp3 = load float, ptr %tmp0, align 4
584 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load float, ptr %tmp1, align 4
585 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, ptr %tmp2, align 4
586 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float %tmp3, ptr %tmp0, align 4
587 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float %tmp4, ptr %tmp1, align 4
588 ; VF_8-NEXT: Found an estimated cost of 64 for VF 8 For instruction: store float %tmp5, ptr %tmp2, align 4
589 ; VF_16-LABEL: Checking a loop in 'f32_factor_3'
590 ; VF_16: Found an estimated cost of 128 for VF 16 For instruction: %tmp3 = load float, ptr %tmp0, align 4
591 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load float, ptr %tmp1, align 4
592 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load float, ptr %tmp2, align 4
593 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float %tmp3, ptr %tmp0, align 4
594 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float %tmp4, ptr %tmp1, align 4
595 ; VF_16-NEXT: Found an estimated cost of 128 for VF 16 For instruction: store float %tmp5, ptr %tmp2, align 4
597 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
598 %tmp0 = getelementptr inbounds %f32.3, ptr %data, i64 %i, i32 0
599 %tmp1 = getelementptr inbounds %f32.3, ptr %data, i64 %i, i32 1
600 %tmp2 = getelementptr inbounds %f32.3, ptr %data, i64 %i, i32 2
601 %tmp3 = load float, ptr %tmp0, align 4
602 %tmp4 = load float, ptr %tmp1, align 4
603 %tmp5 = load float, ptr %tmp2, align 4
604 store float %tmp3, ptr %tmp0, align 4
605 store float %tmp4, ptr %tmp1, align 4
606 store float %tmp5, ptr %tmp2, align 4
607 %i.next = add nuw nsw i64 %i, 1
608 %cond = icmp slt i64 %i.next, %n
609 br i1 %cond, label %for.body, label %for.end
615 %f64.3 = type {double, double, double}
616 define void @f64_factor_3(ptr %data, i64 %n) #0 {
620 ; VF_2-LABEL: Checking a loop in 'f64_factor_3'
621 ; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp3 = load double, ptr %tmp0, align 8
622 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp4 = load double, ptr %tmp1, align 8
623 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp5 = load double, ptr %tmp2, align 8
624 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp3, ptr %tmp0, align 8
625 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp4, ptr %tmp1, align 8
626 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp5, ptr %tmp2, align 8
627 ; VF_4-LABEL: Checking a loop in 'f64_factor_3'
628 ; VF_4: Found an estimated cost of 12 for VF 4 For instruction: %tmp3 = load double, ptr %tmp0, align 8
629 ; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp4 = load double, ptr %tmp1, align 8
630 ; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp5 = load double, ptr %tmp2, align 8
631 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp3, ptr %tmp0, align 8
632 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp4, ptr %tmp1, align 8
633 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp5, ptr %tmp2, align 8
634 ; VF_8-LABEL: Checking a loop in 'f64_factor_3'
635 ; VF_8: Found an estimated cost of 24 for VF 8 For instruction: %tmp3 = load double, ptr %tmp0, align 8
636 ; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp4 = load double, ptr %tmp1, align 8
637 ; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp5 = load double, ptr %tmp2, align 8
638 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp3, ptr %tmp0, align 8
639 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp4, ptr %tmp1, align 8
640 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp5, ptr %tmp2, align 8
641 ; VF_16-LABEL: Checking a loop in 'f64_factor_3'
642 ; VF_16: Found an estimated cost of 48 for VF 16 For instruction: %tmp3 = load double, ptr %tmp0, align 8
643 ; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp4 = load double, ptr %tmp1, align 8
644 ; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp5 = load double, ptr %tmp2, align 8
645 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp3, ptr %tmp0, align 8
646 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp4, ptr %tmp1, align 8
647 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp5, ptr %tmp2, align 8
649 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
650 %tmp0 = getelementptr inbounds %f64.3, ptr %data, i64 %i, i32 0
651 %tmp1 = getelementptr inbounds %f64.3, ptr %data, i64 %i, i32 1
652 %tmp2 = getelementptr inbounds %f64.3, ptr %data, i64 %i, i32 2
653 %tmp3 = load double, ptr %tmp0, align 8
654 %tmp4 = load double, ptr %tmp1, align 8
655 %tmp5 = load double, ptr %tmp2, align 8
656 store double %tmp3, ptr %tmp0, align 8
657 store double %tmp4, ptr %tmp1, align 8
658 store double %tmp5, ptr %tmp2, align 8
659 %i.next = add nuw nsw i64 %i, 1
660 %cond = icmp slt i64 %i.next, %n
661 br i1 %cond, label %for.body, label %for.end
670 %i8.4 = type {i8, i8, i8, i8}
671 define void @i8_factor_4(ptr %data, i64 %n) #0 {
675 ; VF_2-LABEL: Checking a loop in 'i8_factor_4'
676 ; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i8, ptr %tmp0, align 1
677 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i8, ptr %tmp1, align 1
678 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp6 = load i8, ptr %tmp2, align 1
679 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp7 = load i8, ptr %tmp3, align 1
680 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp4, ptr %tmp0, align 1
681 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp5, ptr %tmp1, align 1
682 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp6, ptr %tmp2, align 1
683 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp7, ptr %tmp3, align 1
684 ; VF_4-LABEL: Checking a loop in 'i8_factor_4'
685 ; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp4 = load i8, ptr %tmp0, align 1
686 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp5 = load i8, ptr %tmp1, align 1
687 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp6 = load i8, ptr %tmp2, align 1
688 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp7 = load i8, ptr %tmp3, align 1
689 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i8 %tmp4, ptr %tmp0, align 1
690 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i8 %tmp5, ptr %tmp1, align 1
691 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i8 %tmp6, ptr %tmp2, align 1
692 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i8 %tmp7, ptr %tmp3, align 1
693 ; VF_8-LABEL: Checking a loop in 'i8_factor_4'
694 ; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i8, ptr %tmp0, align 1
695 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i8, ptr %tmp1, align 1
696 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp6 = load i8, ptr %tmp2, align 1
697 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp7 = load i8, ptr %tmp3, align 1
698 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i8 %tmp4, ptr %tmp0, align 1
699 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i8 %tmp5, ptr %tmp1, align 1
700 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i8 %tmp6, ptr %tmp2, align 1
701 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i8 %tmp7, ptr %tmp3, align 1
702 ; VF_16-LABEL: Checking a loop in 'i8_factor_4'
703 ; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i8, ptr %tmp0, align 1
704 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i8, ptr %tmp1, align 1
705 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp6 = load i8, ptr %tmp2, align 1
706 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp7 = load i8, ptr %tmp3, align 1
707 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i8 %tmp4, ptr %tmp0, align 1
708 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i8 %tmp5, ptr %tmp1, align 1
709 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i8 %tmp6, ptr %tmp2, align 1
710 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i8 %tmp7, ptr %tmp3, align 1
712 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
713 %tmp0 = getelementptr inbounds %i8.4, ptr %data, i64 %i, i32 0
714 %tmp1 = getelementptr inbounds %i8.4, ptr %data, i64 %i, i32 1
715 %tmp2 = getelementptr inbounds %i8.4, ptr %data, i64 %i, i32 2
716 %tmp3 = getelementptr inbounds %i8.4, ptr %data, i64 %i, i32 3
717 %tmp4 = load i8, ptr %tmp0, align 1
718 %tmp5 = load i8, ptr %tmp1, align 1
719 %tmp6 = load i8, ptr %tmp2, align 1
720 %tmp7 = load i8, ptr %tmp3, align 1
721 store i8 %tmp4, ptr %tmp0, align 1
722 store i8 %tmp5, ptr %tmp1, align 1
723 store i8 %tmp6, ptr %tmp2, align 1
724 store i8 %tmp7, ptr %tmp3, align 1
725 %i.next = add nuw nsw i64 %i, 1
726 %cond = icmp slt i64 %i.next, %n
727 br i1 %cond, label %for.body, label %for.end
733 %i16.4 = type {i16, i16, i16, i16}
734 define void @i16_factor_4(ptr %data, i64 %n) #0 {
738 ; VF_2-LABEL: Checking a loop in 'i16_factor_4'
739 ; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i16, ptr %tmp0, align 2
740 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i16, ptr %tmp1, align 2
741 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp6 = load i16, ptr %tmp2, align 2
742 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp7 = load i16, ptr %tmp3, align 2
743 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp4, ptr %tmp0, align 2
744 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp5, ptr %tmp1, align 2
745 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp6, ptr %tmp2, align 2
746 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp7, ptr %tmp3, align 2
747 ; VF_4-LABEL: Checking a loop in 'i16_factor_4'
748 ; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp4 = load i16, ptr %tmp0, align 2
749 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp5 = load i16, ptr %tmp1, align 2
750 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp6 = load i16, ptr %tmp2, align 2
751 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp7 = load i16, ptr %tmp3, align 2
752 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i16 %tmp4, ptr %tmp0, align 2
753 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i16 %tmp5, ptr %tmp1, align 2
754 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i16 %tmp6, ptr %tmp2, align 2
755 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i16 %tmp7, ptr %tmp3, align 2
756 ; VF_8-LABEL: Checking a loop in 'i16_factor_4'
757 ; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i16, ptr %tmp0, align 2
758 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i16, ptr %tmp1, align 2
759 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp6 = load i16, ptr %tmp2, align 2
760 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp7 = load i16, ptr %tmp3, align 2
761 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i16 %tmp4, ptr %tmp0, align 2
762 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i16 %tmp5, ptr %tmp1, align 2
763 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i16 %tmp6, ptr %tmp2, align 2
764 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i16 %tmp7, ptr %tmp3, align 2
765 ; VF_16-LABEL: Checking a loop in 'i16_factor_4'
766 ; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i16, ptr %tmp0, align 2
767 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i16, ptr %tmp1, align 2
768 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp6 = load i16, ptr %tmp2, align 2
769 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp7 = load i16, ptr %tmp3, align 2
770 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i16 %tmp4, ptr %tmp0, align 2
771 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i16 %tmp5, ptr %tmp1, align 2
772 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i16 %tmp6, ptr %tmp2, align 2
773 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i16 %tmp7, ptr %tmp3, align 2
775 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
776 %tmp0 = getelementptr inbounds %i16.4, ptr %data, i64 %i, i32 0
777 %tmp1 = getelementptr inbounds %i16.4, ptr %data, i64 %i, i32 1
778 %tmp2 = getelementptr inbounds %i16.4, ptr %data, i64 %i, i32 2
779 %tmp3 = getelementptr inbounds %i16.4, ptr %data, i64 %i, i32 3
780 %tmp4 = load i16, ptr %tmp0, align 2
781 %tmp5 = load i16, ptr %tmp1, align 2
782 %tmp6 = load i16, ptr %tmp2, align 2
783 %tmp7 = load i16, ptr %tmp3, align 2
784 store i16 %tmp4, ptr %tmp0, align 2
785 store i16 %tmp5, ptr %tmp1, align 2
786 store i16 %tmp6, ptr %tmp2, align 2
787 store i16 %tmp7, ptr %tmp3, align 2
788 %i.next = add nuw nsw i64 %i, 1
789 %cond = icmp slt i64 %i.next, %n
790 br i1 %cond, label %for.body, label %for.end
796 %i32.4 = type {i32, i32, i32, i32}
797 define void @i32_factor_4(ptr %data, i64 %n) #0 {
801 ; VF_2-LABEL: Checking a loop in 'i32_factor_4'
802 ; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i32, ptr %tmp0, align 4
803 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i32, ptr %tmp1, align 4
804 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp6 = load i32, ptr %tmp2, align 4
805 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp7 = load i32, ptr %tmp3, align 4
806 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp4, ptr %tmp0, align 4
807 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp5, ptr %tmp1, align 4
808 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp6, ptr %tmp2, align 4
809 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp7, ptr %tmp3, align 4
810 ; VF_4-LABEL: Checking a loop in 'i32_factor_4'
811 ; VF_4: Found an estimated cost of 8 for VF 4 For instruction: %tmp4 = load i32, ptr %tmp0, align 4
812 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp5 = load i32, ptr %tmp1, align 4
813 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp6 = load i32, ptr %tmp2, align 4
814 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp7 = load i32, ptr %tmp3, align 4
815 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp4, ptr %tmp0, align 4
816 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp5, ptr %tmp1, align 4
817 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp6, ptr %tmp2, align 4
818 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp7, ptr %tmp3, align 4
819 ; VF_8-LABEL: Checking a loop in 'i32_factor_4'
820 ; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i32, ptr %tmp0, align 4
821 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i32, ptr %tmp1, align 4
822 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp6 = load i32, ptr %tmp2, align 4
823 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp7 = load i32, ptr %tmp3, align 4
824 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i32 %tmp4, ptr %tmp0, align 4
825 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i32 %tmp5, ptr %tmp1, align 4
826 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i32 %tmp6, ptr %tmp2, align 4
827 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i32 %tmp7, ptr %tmp3, align 4
828 ; VF_16-LABEL: Checking a loop in 'i32_factor_4'
829 ; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i32, ptr %tmp0, align 4
830 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i32, ptr %tmp1, align 4
831 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp6 = load i32, ptr %tmp2, align 4
832 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp7 = load i32, ptr %tmp3, align 4
833 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i32 %tmp4, ptr %tmp0, align 4
834 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i32 %tmp5, ptr %tmp1, align 4
835 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i32 %tmp6, ptr %tmp2, align 4
836 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i32 %tmp7, ptr %tmp3, align 4
838 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
839 %tmp0 = getelementptr inbounds %i32.4, ptr %data, i64 %i, i32 0
840 %tmp1 = getelementptr inbounds %i32.4, ptr %data, i64 %i, i32 1
841 %tmp2 = getelementptr inbounds %i32.4, ptr %data, i64 %i, i32 2
842 %tmp3 = getelementptr inbounds %i32.4, ptr %data, i64 %i, i32 3
843 %tmp4 = load i32, ptr %tmp0, align 4
844 %tmp5 = load i32, ptr %tmp1, align 4
845 %tmp6 = load i32, ptr %tmp2, align 4
846 %tmp7 = load i32, ptr %tmp3, align 4
847 store i32 %tmp4, ptr %tmp0, align 4
848 store i32 %tmp5, ptr %tmp1, align 4
849 store i32 %tmp6, ptr %tmp2, align 4
850 store i32 %tmp7, ptr %tmp3, align 4
851 %i.next = add nuw nsw i64 %i, 1
852 %cond = icmp slt i64 %i.next, %n
853 br i1 %cond, label %for.body, label %for.end
859 %i64.4 = type {i64, i64, i64, i64}
860 define void @i64_factor_4(ptr %data, i64 %n) #0 {
864 ; VF_2-LABEL: Checking a loop in 'i64_factor_4'
865 ; VF_2: Found an estimated cost of 22 for VF 2 For instruction: %tmp4 = load i64, ptr %tmp0, align 8
866 ; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp5 = load i64, ptr %tmp1, align 8
867 ; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp6 = load i64, ptr %tmp2, align 8
868 ; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp7 = load i64, ptr %tmp3, align 8
869 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp4, ptr %tmp0, align 8
870 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp5, ptr %tmp1, align 8
871 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp6, ptr %tmp2, align 8
872 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp7, ptr %tmp3, align 8
873 ; VF_4-LABEL: Checking a loop in 'i64_factor_4'
874 ; VF_4: Found an estimated cost of 44 for VF 4 For instruction: %tmp4 = load i64, ptr %tmp0, align 8
875 ; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp5 = load i64, ptr %tmp1, align 8
876 ; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp6 = load i64, ptr %tmp2, align 8
877 ; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp7 = load i64, ptr %tmp3, align 8
878 ; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp4, ptr %tmp0, align 8
879 ; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp5, ptr %tmp1, align 8
880 ; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp6, ptr %tmp2, align 8
881 ; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp7, ptr %tmp3, align 8
882 ; VF_8-LABEL: Checking a loop in 'i64_factor_4'
883 ; VF_8: Found an estimated cost of 88 for VF 8 For instruction: %tmp4 = load i64, ptr %tmp0, align 8
884 ; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp5 = load i64, ptr %tmp1, align 8
885 ; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp6 = load i64, ptr %tmp2, align 8
886 ; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp7 = load i64, ptr %tmp3, align 8
887 ; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp4, ptr %tmp0, align 8
888 ; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp5, ptr %tmp1, align 8
889 ; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp6, ptr %tmp2, align 8
890 ; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp7, ptr %tmp3, align 8
891 ; VF_16-LABEL: Checking a loop in 'i64_factor_4'
892 ; VF_16: Found an estimated cost of 176 for VF 16 For instruction: %tmp4 = load i64, ptr %tmp0, align 8
893 ; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp5 = load i64, ptr %tmp1, align 8
894 ; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp6 = load i64, ptr %tmp2, align 8
895 ; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp7 = load i64, ptr %tmp3, align 8
896 ; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp4, ptr %tmp0, align 8
897 ; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp5, ptr %tmp1, align 8
898 ; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp6, ptr %tmp2, align 8
899 ; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp7, ptr %tmp3, align 8
901 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
902 %tmp0 = getelementptr inbounds %i64.4, ptr %data, i64 %i, i32 0
903 %tmp1 = getelementptr inbounds %i64.4, ptr %data, i64 %i, i32 1
904 %tmp2 = getelementptr inbounds %i64.4, ptr %data, i64 %i, i32 2
905 %tmp3 = getelementptr inbounds %i64.4, ptr %data, i64 %i, i32 3
906 %tmp4 = load i64, ptr %tmp0, align 8
907 %tmp5 = load i64, ptr %tmp1, align 8
908 %tmp6 = load i64, ptr %tmp2, align 8
909 %tmp7 = load i64, ptr %tmp3, align 8
910 store i64 %tmp4, ptr %tmp0, align 8
911 store i64 %tmp5, ptr %tmp1, align 8
912 store i64 %tmp6, ptr %tmp2, align 8
913 store i64 %tmp7, ptr %tmp3, align 8
914 %i.next = add nuw nsw i64 %i, 1
915 %cond = icmp slt i64 %i.next, %n
916 br i1 %cond, label %for.body, label %for.end
922 %f16.4 = type {half, half, half, half}
923 define void @f16_factor_4(ptr %data, i64 %n) #0 {
927 ; VF_2-LABEL: Checking a loop in 'f16_factor_4'
928 ; VF_2: Found an estimated cost of 18 for VF 2 For instruction: %tmp4 = load half, ptr %tmp0, align 2
929 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load half, ptr %tmp1, align 2
930 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load half, ptr %tmp2, align 2
931 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load half, ptr %tmp3, align 2
932 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half %tmp4, ptr %tmp0, align 2
933 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half %tmp5, ptr %tmp1, align 2
934 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half %tmp6, ptr %tmp2, align 2
935 ; VF_2-NEXT: Found an estimated cost of 18 for VF 2 For instruction: store half %tmp7, ptr %tmp3, align 2
936 ; VF_4-LABEL: Checking a loop in 'f16_factor_4'
937 ; VF_4: Found an estimated cost of 36 for VF 4 For instruction: %tmp4 = load half, ptr %tmp0, align 2
938 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load half, ptr %tmp1, align 2
939 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load half, ptr %tmp2, align 2
940 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load half, ptr %tmp3, align 2
941 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half %tmp4, ptr %tmp0, align 2
942 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half %tmp5, ptr %tmp1, align 2
943 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half %tmp6, ptr %tmp2, align 2
944 ; VF_4-NEXT: Found an estimated cost of 36 for VF 4 For instruction: store half %tmp7, ptr %tmp3, align 2
945 ; VF_8-LABEL: Checking a loop in 'f16_factor_4'
946 ; VF_8: Found an estimated cost of 72 for VF 8 For instruction: %tmp4 = load half, ptr %tmp0, align 2
947 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load half, ptr %tmp1, align 2
948 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load half, ptr %tmp2, align 2
949 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load half, ptr %tmp3, align 2
950 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half %tmp4, ptr %tmp0, align 2
951 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half %tmp5, ptr %tmp1, align 2
952 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half %tmp6, ptr %tmp2, align 2
953 ; VF_8-NEXT: Found an estimated cost of 72 for VF 8 For instruction: store half %tmp7, ptr %tmp3, align 2
954 ; VF_16-LABEL: Checking a loop in 'f16_factor_4'
955 ; VF_16: Found an estimated cost of 144 for VF 16 For instruction: %tmp4 = load half, ptr %tmp0, align 2
956 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load half, ptr %tmp1, align 2
957 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load half, ptr %tmp2, align 2
958 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load half, ptr %tmp3, align 2
959 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half %tmp4, ptr %tmp0, align 2
960 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half %tmp5, ptr %tmp1, align 2
961 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half %tmp6, ptr %tmp2, align 2
962 ; VF_16-NEXT: Found an estimated cost of 144 for VF 16 For instruction: store half %tmp7, ptr %tmp3, align 2
964 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
965 %tmp0 = getelementptr inbounds %f16.4, ptr %data, i64 %i, i32 0
966 %tmp1 = getelementptr inbounds %f16.4, ptr %data, i64 %i, i32 1
967 %tmp2 = getelementptr inbounds %f16.4, ptr %data, i64 %i, i32 2
968 %tmp3 = getelementptr inbounds %f16.4, ptr %data, i64 %i, i32 3
969 %tmp4 = load half, ptr %tmp0, align 2
970 %tmp5 = load half, ptr %tmp1, align 2
971 %tmp6 = load half, ptr %tmp2, align 2
972 %tmp7 = load half, ptr %tmp3, align 2
973 store half %tmp4, ptr %tmp0, align 2
974 store half %tmp5, ptr %tmp1, align 2
975 store half %tmp6, ptr %tmp2, align 2
976 store half %tmp7, ptr %tmp3, align 2
977 %i.next = add nuw nsw i64 %i, 1
978 %cond = icmp slt i64 %i.next, %n
979 br i1 %cond, label %for.body, label %for.end
985 %f32.4 = type {float, float, float, float}
986 define void @f32_factor_4(ptr %data, i64 %n) #0 {
990 ; VF_2-LABEL: Checking a loop in 'f32_factor_4'
991 ; VF_2: Found an estimated cost of 20 for VF 2 For instruction: %tmp4 = load float, ptr %tmp0, align 4
992 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load float, ptr %tmp1, align 4
993 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load float, ptr %tmp2, align 4
994 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load float, ptr %tmp3, align 4
995 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float %tmp4, ptr %tmp0, align 4
996 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float %tmp5, ptr %tmp1, align 4
997 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float %tmp6, ptr %tmp2, align 4
998 ; VF_2-NEXT: Found an estimated cost of 20 for VF 2 For instruction: store float %tmp7, ptr %tmp3, align 4
999 ; VF_4-LABEL: Checking a loop in 'f32_factor_4'
1000 ; VF_4: Found an estimated cost of 8 for VF 4 For instruction: %tmp4 = load float, ptr %tmp0, align 4
1001 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp5 = load float, ptr %tmp1, align 4
1002 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp6 = load float, ptr %tmp2, align 4
1003 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp7 = load float, ptr %tmp3, align 4
1004 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp4, ptr %tmp0, align 4
1005 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp5, ptr %tmp1, align 4
1006 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp6, ptr %tmp2, align 4
1007 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp7, ptr %tmp3, align 4
1008 ; VF_8-LABEL: Checking a loop in 'f32_factor_4'
1009 ; VF_8: Found an estimated cost of 80 for VF 8 For instruction: %tmp4 = load float, ptr %tmp0, align 4
1010 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, ptr %tmp1, align 4
1011 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load float, ptr %tmp2, align 4
1012 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load float, ptr %tmp3, align 4
1013 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float %tmp4, ptr %tmp0, align 4
1014 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float %tmp5, ptr %tmp1, align 4
1015 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float %tmp6, ptr %tmp2, align 4
1016 ; VF_8-NEXT: Found an estimated cost of 80 for VF 8 For instruction: store float %tmp7, ptr %tmp3, align 4
1017 ; VF_16-LABEL: Checking a loop in 'f32_factor_4'
1018 ; VF_16: Found an estimated cost of 160 for VF 16 For instruction: %tmp4 = load float, ptr %tmp0, align 4
1019 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load float, ptr %tmp1, align 4
1020 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load float, ptr %tmp2, align 4
1021 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load float, ptr %tmp3, align 4
1022 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float %tmp4, ptr %tmp0, align 4
1023 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float %tmp5, ptr %tmp1, align 4
1024 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float %tmp6, ptr %tmp2, align 4
1025 ; VF_16-NEXT: Found an estimated cost of 160 for VF 16 For instruction: store float %tmp7, ptr %tmp3, align 4
1027 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
1028 %tmp0 = getelementptr inbounds %f32.4, ptr %data, i64 %i, i32 0
1029 %tmp1 = getelementptr inbounds %f32.4, ptr %data, i64 %i, i32 1
1030 %tmp2 = getelementptr inbounds %f32.4, ptr %data, i64 %i, i32 2
1031 %tmp3 = getelementptr inbounds %f32.4, ptr %data, i64 %i, i32 3
1032 %tmp4 = load float, ptr %tmp0, align 4
1033 %tmp5 = load float, ptr %tmp1, align 4
1034 %tmp6 = load float, ptr %tmp2, align 4
1035 %tmp7 = load float, ptr %tmp3, align 4
1036 store float %tmp4, ptr %tmp0, align 4
1037 store float %tmp5, ptr %tmp1, align 4
1038 store float %tmp6, ptr %tmp2, align 4
1039 store float %tmp7, ptr %tmp3, align 4
1040 %i.next = add nuw nsw i64 %i, 1
1041 %cond = icmp slt i64 %i.next, %n
1042 br i1 %cond, label %for.body, label %for.end
1048 %f64.4 = type {double, double, double, double}
1049 define void @f64_factor_4(ptr %data, i64 %n) #0 {
1053 ; VF_2-LABEL: Checking a loop in 'f64_factor_4'
1054 ; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp4 = load double, ptr %tmp0, align 8
1055 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp5 = load double, ptr %tmp1, align 8
1056 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp6 = load double, ptr %tmp2, align 8
1057 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp7 = load double, ptr %tmp3, align 8
1058 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp4, ptr %tmp0, align 8
1059 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp5, ptr %tmp1, align 8
1060 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp6, ptr %tmp2, align 8
1061 ; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp7, ptr %tmp3, align 8
1062 ; VF_4-LABEL: Checking a loop in 'f64_factor_4'
1063 ; VF_4: Found an estimated cost of 12 for VF 4 For instruction: %tmp4 = load double, ptr %tmp0, align 8
1064 ; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp5 = load double, ptr %tmp1, align 8
1065 ; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp6 = load double, ptr %tmp2, align 8
1066 ; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp7 = load double, ptr %tmp3, align 8
1067 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp4, ptr %tmp0, align 8
1068 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp5, ptr %tmp1, align 8
1069 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp6, ptr %tmp2, align 8
1070 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp7, ptr %tmp3, align 8
1071 ; VF_8-LABEL: Checking a loop in 'f64_factor_4'
1072 ; VF_8: Found an estimated cost of 24 for VF 8 For instruction: %tmp4 = load double, ptr %tmp0, align 8
1073 ; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp5 = load double, ptr %tmp1, align 8
1074 ; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp6 = load double, ptr %tmp2, align 8
1075 ; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp7 = load double, ptr %tmp3, align 8
1076 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp4, ptr %tmp0, align 8
1077 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp5, ptr %tmp1, align 8
1078 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp6, ptr %tmp2, align 8
1079 ; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp7, ptr %tmp3, align 8
1080 ; VF_16-LABEL: Checking a loop in 'f64_factor_4'
1081 ; VF_16: Found an estimated cost of 48 for VF 16 For instruction: %tmp4 = load double, ptr %tmp0, align 8
1082 ; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp5 = load double, ptr %tmp1, align 8
1083 ; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp6 = load double, ptr %tmp2, align 8
1084 ; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp7 = load double, ptr %tmp3, align 8
1085 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp4, ptr %tmp0, align 8
1086 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp5, ptr %tmp1, align 8
1087 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp6, ptr %tmp2, align 8
1088 ; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp7, ptr %tmp3, align 8
1090 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
1091 %tmp0 = getelementptr inbounds %f64.4, ptr %data, i64 %i, i32 0
1092 %tmp1 = getelementptr inbounds %f64.4, ptr %data, i64 %i, i32 1
1093 %tmp2 = getelementptr inbounds %f64.4, ptr %data, i64 %i, i32 2
1094 %tmp3 = getelementptr inbounds %f64.4, ptr %data, i64 %i, i32 3
1095 %tmp4 = load double, ptr %tmp0, align 8
1096 %tmp5 = load double, ptr %tmp1, align 8
1097 %tmp6 = load double, ptr %tmp2, align 8
1098 %tmp7 = load double, ptr %tmp3, align 8
1099 store double %tmp4, ptr %tmp0, align 8
1100 store double %tmp5, ptr %tmp1, align 8
1101 store double %tmp6, ptr %tmp2, align 8
1102 store double %tmp7, ptr %tmp3, align 8
1103 %i.next = add nuw nsw i64 %i, 1
1104 %cond = icmp slt i64 %i.next, %n
1105 br i1 %cond, label %for.body, label %for.end
1111 attributes #0 = { "target-features"="+mve.fp" }