1 ; RUN: opt -loop-vectorize -force-vector-width=2 -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=VF_2
2 ; RUN: opt -loop-vectorize -force-vector-width=4 -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=VF_4
3 ; RUN: opt -loop-vectorize -force-vector-width=8 -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=VF_8
4 ; RUN: opt -loop-vectorize -force-vector-width=16 -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=VF_16
7 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
8 target triple = "thumbv8.1-m.main-none-eabi"
11 define void @i8_factor_2(%i8.2* %data, i64 %n) #0 {
15 ; VF_8-LABEL: Checking a loop in "i8_factor_2"
16 ; VF_8: Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
17 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
18 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1
19 ; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1
20 ; VF_16-LABEL: Checking a loop in "i8_factor_2"
21 ; VF_16: Found an estimated cost of 2 for VF 16 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
22 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
23 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1
24 ; VF_16-NEXT: Found an estimated cost of 2 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1
26 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
27 %tmp0 = getelementptr inbounds %i8.2, %i8.2* %data, i64 %i, i32 0
28 %tmp1 = getelementptr inbounds %i8.2, %i8.2* %data, i64 %i, i32 1
29 %tmp2 = load i8, i8* %tmp0, align 1
30 %tmp3 = load i8, i8* %tmp1, align 1
31 store i8 0, i8* %tmp0, align 1
32 store i8 0, i8* %tmp1, align 1
33 %i.next = add nuw nsw i64 %i, 1
34 %cond = icmp slt i64 %i.next, %n
35 br i1 %cond, label %for.body, label %for.end
41 %i16.2 = type {i16, i16}
42 define void @i16_factor_2(%i16.2* %data, i64 %n) #0 {
46 ; VF_4-LABEL: Checking a loop in "i16_factor_2"
47 ; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
48 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
49 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2
50 ; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2
51 ; VF_8-LABEL: Checking a loop in "i16_factor_2"
52 ; VF_8: Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
53 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
54 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2
55 ; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2
56 ; VF_16-LABEL: Checking a loop in "i16_factor_2"
57 ; VF_16: Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
58 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
59 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2
60 ; VF_16-NEXT: Found an estimated cost of 4 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2
62 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
63 %tmp0 = getelementptr inbounds %i16.2, %i16.2* %data, i64 %i, i32 0
64 %tmp1 = getelementptr inbounds %i16.2, %i16.2* %data, i64 %i, i32 1
65 %tmp2 = load i16, i16* %tmp0, align 2
66 %tmp3 = load i16, i16* %tmp1, align 2
67 store i16 0, i16* %tmp0, align 2
68 store i16 0, i16* %tmp1, align 2
69 %i.next = add nuw nsw i64 %i, 1
70 %cond = icmp slt i64 %i.next, %n
71 br i1 %cond, label %for.body, label %for.end
77 %i32.2 = type {i32, i32}
78 define void @i32_factor_2(%i32.2* %data, i64 %n) #0 {
82 ; VF_2-LABEL: Checking a loop in "i32_factor_2"
83 ; VF_2: Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
84 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
85 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4
86 ; VF_2-NEXT: Found an estimated cost of 2 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4
87 ; VF_4-LABEL: Checking a loop in "i32_factor_2"
88 ; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
89 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
90 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4
91 ; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4
92 ; VF_8-LABEL: Checking a loop in "i32_factor_2"
93 ; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
94 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
95 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4
96 ; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4
97 ; VF_16-LABEL: Checking a loop in "i32_factor_2"
98 ; VF_16: Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
99 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
100 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4
101 ; VF_16-NEXT: Found an estimated cost of 8 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4
103 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
104 %tmp0 = getelementptr inbounds %i32.2, %i32.2* %data, i64 %i, i32 0
105 %tmp1 = getelementptr inbounds %i32.2, %i32.2* %data, i64 %i, i32 1
106 %tmp2 = load i32, i32* %tmp0, align 4
107 %tmp3 = load i32, i32* %tmp1, align 4
108 store i32 0, i32* %tmp0, align 4
109 store i32 0, i32* %tmp1, align 4
110 %i.next = add nuw nsw i64 %i, 1
111 %cond = icmp slt i64 %i.next, %n
112 br i1 %cond, label %for.body, label %for.end
118 %i8.3 = type {i8, i8, i8}
119 define void @i8_factor_3(%i8.3* %data, i64 %n) #0 {
123 ; VF_8-LABEL: Checking a loop in "i8_factor_3"
124 ; VF_8: Found an estimated cost of 408 for VF 8 For instruction: %tmp3 = load i8, i8* %tmp0, align 1
125 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i8, i8* %tmp1, align 1
126 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i8, i8* %tmp2, align 1
127 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1
128 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1
129 ; VF_8-NEXT: Found an estimated cost of 216 for VF 8 For instruction: store i8 0, i8* %tmp2, align 1
130 ; VF_16-LABEL: Checking a loop in "i8_factor_3"
131 ; VF_16: Found an estimated cost of 1584 for VF 16 For instruction: %tmp3 = load i8, i8* %tmp0, align 1
132 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i8, i8* %tmp1, align 1
133 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, i8* %tmp2, align 1
134 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1
135 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1
136 ; VF_16-NEXT: Found an estimated cost of 816 for VF 16 For instruction: store i8 0, i8* %tmp2, align 1
138 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
139 %tmp0 = getelementptr inbounds %i8.3, %i8.3* %data, i64 %i, i32 0
140 %tmp1 = getelementptr inbounds %i8.3, %i8.3* %data, i64 %i, i32 1
141 %tmp2 = getelementptr inbounds %i8.3, %i8.3* %data, i64 %i, i32 2
142 %tmp3 = load i8, i8* %tmp0, align 1
143 %tmp4 = load i8, i8* %tmp1, align 1
144 %tmp5 = load i8, i8* %tmp2, align 1
145 store i8 0, i8* %tmp0, align 1
146 store i8 0, i8* %tmp1, align 1
147 store i8 0, i8* %tmp2, align 1
148 %i.next = add nuw nsw i64 %i, 1
149 %cond = icmp slt i64 %i.next, %n
150 br i1 %cond, label %for.body, label %for.end
156 %i16.3 = type {i16, i16, i16}
157 define void @i16_factor_3(%i16.3* %data, i64 %n) #0 {
161 ; VF_4-LABEL: Checking a loop in "i16_factor_3"
162 ; VF_4: Found an estimated cost of 108 for VF 4 For instruction: %tmp3 = load i16, i16* %tmp0, align 2
163 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i16, i16* %tmp1, align 2
164 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i16, i16* %tmp2, align 2
165 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2
166 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2
167 ; VF_4-NEXT: Found an estimated cost of 60 for VF 4 For instruction: store i16 0, i16* %tmp2, align 2
168 ; VF_8-LABEL: Checking a loop in "i16_factor_3"
169 ; VF_8: Found an estimated cost of 408 for VF 8 For instruction: %tmp3 = load i16, i16* %tmp0, align 2
170 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i16, i16* %tmp1, align 2
171 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, i16* %tmp2, align 2
172 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2
173 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2
174 ; VF_8-NEXT: Found an estimated cost of 216 for VF 8 For instruction: store i16 0, i16* %tmp2, align 2
175 ; VF_16-LABEL: Checking a loop in "i16_factor_3"
176 ; VF_16: Found an estimated cost of 1584 for VF 16 For instruction: %tmp3 = load i16, i16* %tmp0, align 2
177 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i16, i16* %tmp1, align 2
178 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, i16* %tmp2, align 2
179 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2
180 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2
181 ; VF_16-NEXT: Found an estimated cost of 816 for VF 16 For instruction: store i16 0, i16* %tmp2, align 2
183 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
184 %tmp0 = getelementptr inbounds %i16.3, %i16.3* %data, i64 %i, i32 0
185 %tmp1 = getelementptr inbounds %i16.3, %i16.3* %data, i64 %i, i32 1
186 %tmp2 = getelementptr inbounds %i16.3, %i16.3* %data, i64 %i, i32 2
187 %tmp3 = load i16, i16* %tmp0, align 2
188 %tmp4 = load i16, i16* %tmp1, align 2
189 %tmp5 = load i16, i16* %tmp2, align 2
190 store i16 0, i16* %tmp0, align 2
191 store i16 0, i16* %tmp1, align 2
192 store i16 0, i16* %tmp2, align 2
193 %i.next = add nuw nsw i64 %i, 1
194 %cond = icmp slt i64 %i.next, %n
195 br i1 %cond, label %for.body, label %for.end
201 %i32.3 = type {i32, i32, i32}
202 define void @i32_factor_3(%i32.3* %data, i64 %n) #0 {
206 ; VF_2-LABEL: Checking a loop in "i32_factor_3"
207 ; VF_2: Found an estimated cost of 30 for VF 2 For instruction: %tmp3 = load i32, i32* %tmp0, align 4
208 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i32, i32* %tmp1, align 4
209 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i32, i32* %tmp2, align 4
210 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4
211 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4
212 ; VF_2-NEXT: Found an estimated cost of 18 for VF 2 For instruction: store i32 0, i32* %tmp2, align 4
213 ; VF_4-LABEL: Checking a loop in "i32_factor_3"
214 ; VF_4: Found an estimated cost of 108 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp0, align 4
215 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i32, i32* %tmp1, align 4
216 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, i32* %tmp2, align 4
217 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4
218 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4
219 ; VF_4-NEXT: Found an estimated cost of 60 for VF 4 For instruction: store i32 0, i32* %tmp2, align 4
220 ; VF_8-LABEL: Checking a loop in "i32_factor_3"
221 ; VF_8: Found an estimated cost of 408 for VF 8 For instruction: %tmp3 = load i32, i32* %tmp0, align 4
222 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i32, i32* %tmp1, align 4
223 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, i32* %tmp2, align 4
224 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4
225 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4
226 ; VF_8-NEXT: Found an estimated cost of 216 for VF 8 For instruction: store i32 0, i32* %tmp2, align 4
227 ; VF_16-LABEL: Checking a loop in "i32_factor_3"
228 ; VF_16: Found an estimated cost of 1584 for VF 16 For instruction: %tmp3 = load i32, i32* %tmp0, align 4
229 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i32, i32* %tmp1, align 4
230 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, i32* %tmp2, align 4
231 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4
232 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4
233 ; VF_16-NEXT: Found an estimated cost of 816 for VF 16 For instruction: store i32 0, i32* %tmp2, align 4
235 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
236 %tmp0 = getelementptr inbounds %i32.3, %i32.3* %data, i64 %i, i32 0
237 %tmp1 = getelementptr inbounds %i32.3, %i32.3* %data, i64 %i, i32 1
238 %tmp2 = getelementptr inbounds %i32.3, %i32.3* %data, i64 %i, i32 2
239 %tmp3 = load i32, i32* %tmp0, align 4
240 %tmp4 = load i32, i32* %tmp1, align 4
241 %tmp5 = load i32, i32* %tmp2, align 4
242 store i32 0, i32* %tmp0, align 4
243 store i32 0, i32* %tmp1, align 4
244 store i32 0, i32* %tmp2, align 4
245 %i.next = add nuw nsw i64 %i, 1
246 %cond = icmp slt i64 %i.next, %n
247 br i1 %cond, label %for.body, label %for.end
254 %i8.4 = type {i8, i8, i8, i8}
255 define void @i8_factor_4(%i8.4* %data, i64 %n) #0 {
259 ; VF_8-LABEL: Checking a loop in "i8_factor_4"
260 ; VF_8: Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load i8, i8* %tmp0, align 1
261 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i8, i8* %tmp1, align 1
262 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i8, i8* %tmp2, align 1
263 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i8, i8* %tmp3, align 1
264 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1
265 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1
266 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp2, align 1
267 ; VF_8-NEXT: Found an estimated cost of 288 for VF 8 For instruction: store i8 0, i8* %tmp3, align 1
268 ; VF_16-LABEL: Checking a loop in "i8_factor_4"
269 ; VF_16: Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load i8, i8* %tmp0, align 1
270 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, i8* %tmp1, align 1
271 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i8, i8* %tmp2, align 1
272 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i8, i8* %tmp3, align 1
273 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1
274 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1
275 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp2, align 1
276 ; VF_16-NEXT: Found an estimated cost of 1088 for VF 16 For instruction: store i8 0, i8* %tmp3, align 1
278 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
279 %tmp0 = getelementptr inbounds %i8.4, %i8.4* %data, i64 %i, i32 0
280 %tmp1 = getelementptr inbounds %i8.4, %i8.4* %data, i64 %i, i32 1
281 %tmp2 = getelementptr inbounds %i8.4, %i8.4* %data, i64 %i, i32 2
282 %tmp3 = getelementptr inbounds %i8.4, %i8.4* %data, i64 %i, i32 3
283 %tmp4 = load i8, i8* %tmp0, align 1
284 %tmp5 = load i8, i8* %tmp1, align 1
285 %tmp6 = load i8, i8* %tmp2, align 1
286 %tmp7 = load i8, i8* %tmp3, align 1
287 store i8 0, i8* %tmp0, align 1
288 store i8 0, i8* %tmp1, align 1
289 store i8 0, i8* %tmp2, align 1
290 store i8 0, i8* %tmp3, align 1
291 %i.next = add nuw nsw i64 %i, 1
292 %cond = icmp slt i64 %i.next, %n
293 br i1 %cond, label %for.body, label %for.end
299 %i16.4 = type {i16, i16, i16, i16}
300 define void @i16_factor_4(%i16.4* %data, i64 %n) #0 {
304 ; VF_4-LABEL: Checking a loop in "i16_factor_4"
305 ; VF_4: Found an estimated cost of 144 for VF 4 For instruction: %tmp4 = load i16, i16* %tmp0, align 2
306 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i16, i16* %tmp1, align 2
307 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i16, i16* %tmp2, align 2
308 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i16, i16* %tmp3, align 2
309 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2
310 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2
311 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp2, align 2
312 ; VF_4-NEXT: Found an estimated cost of 80 for VF 4 For instruction: store i16 0, i16* %tmp3, align 2
313 ; VF_8-LABEL: Checking a loop in "i16_factor_4"
314 ; VF_8: Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load i16, i16* %tmp0, align 2
315 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, i16* %tmp1, align 2
316 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i16, i16* %tmp2, align 2
317 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i16, i16* %tmp3, align 2
318 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2
319 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2
320 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp2, align 2
321 ; VF_8-NEXT: Found an estimated cost of 288 for VF 8 For instruction: store i16 0, i16* %tmp3, align 2
322 ; VF_16-LABEL: Checking a loop in "i16_factor_4"
323 ; VF_16: Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load i16, i16* %tmp0, align 2
324 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, i16* %tmp1, align 2
325 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i16, i16* %tmp2, align 2
326 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i16, i16* %tmp3, align 2
327 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2
328 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2
329 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp2, align 2
330 ; VF_16-NEXT: Found an estimated cost of 1088 for VF 16 For instruction: store i16 0, i16* %tmp3, align 2
332 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
333 %tmp0 = getelementptr inbounds %i16.4, %i16.4* %data, i64 %i, i32 0
334 %tmp1 = getelementptr inbounds %i16.4, %i16.4* %data, i64 %i, i32 1
335 %tmp2 = getelementptr inbounds %i16.4, %i16.4* %data, i64 %i, i32 2
336 %tmp3 = getelementptr inbounds %i16.4, %i16.4* %data, i64 %i, i32 3
337 %tmp4 = load i16, i16* %tmp0, align 2
338 %tmp5 = load i16, i16* %tmp1, align 2
339 %tmp6 = load i16, i16* %tmp2, align 2
340 %tmp7 = load i16, i16* %tmp3, align 2
341 store i16 0, i16* %tmp0, align 2
342 store i16 0, i16* %tmp1, align 2
343 store i16 0, i16* %tmp2, align 2
344 store i16 0, i16* %tmp3, align 2
345 %i.next = add nuw nsw i64 %i, 1
346 %cond = icmp slt i64 %i.next, %n
347 br i1 %cond, label %for.body, label %for.end
353 %i32.4 = type {i32, i32, i32, i32}
354 define void @i32_factor_4(%i32.4* %data, i64 %n) #0 {
358 ; VF_2-LABEL: Checking a loop in "i32_factor_4"
359 ; VF_2: Found an estimated cost of 40 for VF 2 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
360 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i32, i32* %tmp1, align 4
361 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i32, i32* %tmp2, align 4
362 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i32, i32* %tmp3, align 4
363 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4
364 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4
365 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp2, align 4
366 ; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store i32 0, i32* %tmp3, align 4
367 ; VF_4-LABEL: Checking a loop in "i32_factor_4"
368 ; VF_4: Found an estimated cost of 144 for VF 4 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
369 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, i32* %tmp1, align 4
370 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i32, i32* %tmp2, align 4
371 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i32, i32* %tmp3, align 4
372 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4
373 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4
374 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp2, align 4
375 ; VF_4-NEXT: Found an estimated cost of 80 for VF 4 For instruction: store i32 0, i32* %tmp3, align 4
376 ; VF_8-LABEL: Checking a loop in "i32_factor_4"
377 ; VF_8: Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
378 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, i32* %tmp1, align 4
379 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i32, i32* %tmp2, align 4
380 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i32, i32* %tmp3, align 4
381 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4
382 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4
383 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp2, align 4
384 ; VF_8-NEXT: Found an estimated cost of 288 for VF 8 For instruction: store i32 0, i32* %tmp3, align 4
385 ; VF_16-LABEL: Checking a loop in "i32_factor_4"
386 ; VF_16: Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
387 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, i32* %tmp1, align 4
388 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i32, i32* %tmp2, align 4
389 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i32, i32* %tmp3, align 4
390 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4
391 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4
392 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp2, align 4
393 ; VF_16-NEXT: Found an estimated cost of 1088 for VF 16 For instruction: store i32 0, i32* %tmp3, align 4
395 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
396 %tmp0 = getelementptr inbounds %i32.4, %i32.4* %data, i64 %i, i32 0
397 %tmp1 = getelementptr inbounds %i32.4, %i32.4* %data, i64 %i, i32 1
398 %tmp2 = getelementptr inbounds %i32.4, %i32.4* %data, i64 %i, i32 2
399 %tmp3 = getelementptr inbounds %i32.4, %i32.4* %data, i64 %i, i32 3
400 %tmp4 = load i32, i32* %tmp0, align 4
401 %tmp5 = load i32, i32* %tmp1, align 4
402 %tmp6 = load i32, i32* %tmp2, align 4
403 %tmp7 = load i32, i32* %tmp3, align 4
404 store i32 0, i32* %tmp0, align 4
405 store i32 0, i32* %tmp1, align 4
406 store i32 0, i32* %tmp2, align 4
407 store i32 0, i32* %tmp3, align 4
408 %i.next = add nuw nsw i64 %i, 1
409 %cond = icmp slt i64 %i.next, %n
410 br i1 %cond, label %for.body, label %for.end
416 attributes #0 = { "target-features"="+mve.fp" }