1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
3 ; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr8 \
4 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \
5 ; RUN: -check-prefix=CHECK-P8 %s
7 ; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr8 -disable-ppc-vsx-swap-removal \
8 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \
9 ; RUN: -check-prefix=NOOPTSWAP-P8 %s
11 ; RUN: llc -O3 -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
12 ; RUN: -verify-machineinstrs -ppc-vsr-nums-as-vr < %s | FileCheck \
13 ; RUN: -check-prefix=CHECK-P9 --implicit-check-not xxswapd %s
15 ; RUN: llc -O3 -mcpu=pwr9 -disable-ppc-vsx-swap-removal -mattr=-power9-vector \
16 ; RUN: -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s \
17 ; RUN: | FileCheck -check-prefix=NOOPTSWAP-P9 %s
20 ; Updated align attritue from 16 to 8 to keep swap instructions tests.
21 ; Changes have been made on little-endian to use lvx and stvx
22 ; instructions instead of lxvd2x/xxswapd and xxswapd/stxvd2x for
23 ; aligned vectors with elements up to 4 bytes
25 ; This test was generated from the following source:
28 ; int ca[N] __attribute__((aligned(16)));
29 ; int cb[N] __attribute__((aligned(16)));
30 ; int cc[N] __attribute__((aligned(16)));
31 ; int cd[N] __attribute__((aligned(16)));
36 ; for (i = 0; i < N; i++) {
37 ; ca[i] = (cb[i] + cc[i]) * cd[i];
41 @cb = common global [4096 x i32] zeroinitializer, align 8
42 @cc = common global [4096 x i32] zeroinitializer, align 8
43 @cd = common global [4096 x i32] zeroinitializer, align 8
44 @ca = common global [4096 x i32] zeroinitializer, align 8
47 ; CHECK-P8-LABEL: foo:
48 ; CHECK-P8: # %bb.0: # %entry
49 ; CHECK-P8-NEXT: li 3, 256
50 ; CHECK-P8-NEXT: std 30, -16(1) # 8-byte Folded Spill
51 ; CHECK-P8-NEXT: addis 4, 2, .LC0@toc@ha
52 ; CHECK-P8-NEXT: addis 5, 2, .LC1@toc@ha
53 ; CHECK-P8-NEXT: addis 6, 2, .LC2@toc@ha
54 ; CHECK-P8-NEXT: addis 7, 2, .LC3@toc@ha
55 ; CHECK-P8-NEXT: li 8, 16
56 ; CHECK-P8-NEXT: li 9, 32
57 ; CHECK-P8-NEXT: mtctr 3
58 ; CHECK-P8-NEXT: ld 4, .LC0@toc@l(4)
59 ; CHECK-P8-NEXT: ld 5, .LC1@toc@l(5)
60 ; CHECK-P8-NEXT: ld 6, .LC2@toc@l(6)
61 ; CHECK-P8-NEXT: ld 7, .LC3@toc@l(7)
62 ; CHECK-P8-NEXT: li 3, 0
63 ; CHECK-P8-NEXT: li 10, 48
64 ; CHECK-P8-NEXT: .p2align 4
65 ; CHECK-P8-NEXT: .LBB0_1: # %vector.body
67 ; CHECK-P8-NEXT: lxvd2x 34, 4, 3
68 ; CHECK-P8-NEXT: lxvd2x 35, 5, 3
69 ; CHECK-P8-NEXT: add 11, 4, 3
70 ; CHECK-P8-NEXT: add 12, 5, 3
71 ; CHECK-P8-NEXT: lxvd2x 36, 6, 3
72 ; CHECK-P8-NEXT: add 30, 6, 3
73 ; CHECK-P8-NEXT: lxvd2x 37, 11, 8
74 ; CHECK-P8-NEXT: lxvd2x 32, 12, 10
75 ; CHECK-P8-NEXT: vadduwm 2, 3, 2
76 ; CHECK-P8-NEXT: lxvd2x 35, 12, 8
77 ; CHECK-P8-NEXT: vmuluwm 2, 2, 4
78 ; CHECK-P8-NEXT: lxvd2x 36, 11, 9
79 ; CHECK-P8-NEXT: vadduwm 3, 3, 5
80 ; CHECK-P8-NEXT: lxvd2x 37, 12, 9
81 ; CHECK-P8-NEXT: stxvd2x 34, 7, 3
82 ; CHECK-P8-NEXT: lxvd2x 34, 30, 10
83 ; CHECK-P8-NEXT: vadduwm 4, 5, 4
84 ; CHECK-P8-NEXT: lxvd2x 37, 11, 10
85 ; CHECK-P8-NEXT: add 11, 7, 3
86 ; CHECK-P8-NEXT: addi 3, 3, 64
87 ; CHECK-P8-NEXT: vadduwm 5, 0, 5
88 ; CHECK-P8-NEXT: lxvd2x 32, 30, 8
89 ; CHECK-P8-NEXT: vmuluwm 2, 5, 2
90 ; CHECK-P8-NEXT: vmuluwm 3, 3, 0
91 ; CHECK-P8-NEXT: lxvd2x 32, 30, 9
92 ; CHECK-P8-NEXT: stxvd2x 34, 11, 10
93 ; CHECK-P8-NEXT: vmuluwm 4, 4, 0
94 ; CHECK-P8-NEXT: stxvd2x 35, 11, 8
95 ; CHECK-P8-NEXT: stxvd2x 36, 11, 9
96 ; CHECK-P8-NEXT: bdnz .LBB0_1
97 ; CHECK-P8-NEXT: # %bb.2: # %for.end
98 ; CHECK-P8-NEXT: ld 30, -16(1) # 8-byte Folded Reload
101 ; NOOPTSWAP-P8-LABEL: foo:
102 ; NOOPTSWAP-P8: # %bb.0: # %entry
103 ; NOOPTSWAP-P8-NEXT: li 3, 256
104 ; NOOPTSWAP-P8-NEXT: std 30, -16(1) # 8-byte Folded Spill
105 ; NOOPTSWAP-P8-NEXT: addis 4, 2, .LC0@toc@ha
106 ; NOOPTSWAP-P8-NEXT: addis 5, 2, .LC1@toc@ha
107 ; NOOPTSWAP-P8-NEXT: addis 6, 2, .LC2@toc@ha
108 ; NOOPTSWAP-P8-NEXT: addis 7, 2, .LC3@toc@ha
109 ; NOOPTSWAP-P8-NEXT: li 8, 16
110 ; NOOPTSWAP-P8-NEXT: li 9, 32
111 ; NOOPTSWAP-P8-NEXT: mtctr 3
112 ; NOOPTSWAP-P8-NEXT: ld 4, .LC0@toc@l(4)
113 ; NOOPTSWAP-P8-NEXT: ld 5, .LC1@toc@l(5)
114 ; NOOPTSWAP-P8-NEXT: ld 6, .LC2@toc@l(6)
115 ; NOOPTSWAP-P8-NEXT: ld 7, .LC3@toc@l(7)
116 ; NOOPTSWAP-P8-NEXT: li 3, 0
117 ; NOOPTSWAP-P8-NEXT: li 10, 48
118 ; NOOPTSWAP-P8-NEXT: .p2align 4
119 ; NOOPTSWAP-P8-NEXT: .LBB0_1: # %vector.body
120 ; NOOPTSWAP-P8-NEXT: #
121 ; NOOPTSWAP-P8-NEXT: lxvd2x 0, 4, 3
122 ; NOOPTSWAP-P8-NEXT: lxvd2x 1, 5, 3
123 ; NOOPTSWAP-P8-NEXT: add 30, 6, 3
124 ; NOOPTSWAP-P8-NEXT: add 11, 4, 3
125 ; NOOPTSWAP-P8-NEXT: add 12, 5, 3
126 ; NOOPTSWAP-P8-NEXT: lxvd2x 2, 11, 8
127 ; NOOPTSWAP-P8-NEXT: lxvd2x 3, 12, 8
128 ; NOOPTSWAP-P8-NEXT: lxvd2x 4, 11, 9
129 ; NOOPTSWAP-P8-NEXT: lxvd2x 5, 12, 9
130 ; NOOPTSWAP-P8-NEXT: lxvd2x 6, 11, 10
131 ; NOOPTSWAP-P8-NEXT: add 11, 7, 3
132 ; NOOPTSWAP-P8-NEXT: lxvd2x 7, 12, 10
133 ; NOOPTSWAP-P8-NEXT: xxswapd 34, 0
134 ; NOOPTSWAP-P8-NEXT: lxvd2x 0, 6, 3
135 ; NOOPTSWAP-P8-NEXT: xxswapd 35, 1
136 ; NOOPTSWAP-P8-NEXT: lxvd2x 1, 30, 8
137 ; NOOPTSWAP-P8-NEXT: vadduwm 2, 3, 2
138 ; NOOPTSWAP-P8-NEXT: xxswapd 36, 2
139 ; NOOPTSWAP-P8-NEXT: xxswapd 32, 4
140 ; NOOPTSWAP-P8-NEXT: xxswapd 38, 6
141 ; NOOPTSWAP-P8-NEXT: xxswapd 37, 3
142 ; NOOPTSWAP-P8-NEXT: xxswapd 33, 5
143 ; NOOPTSWAP-P8-NEXT: xxswapd 39, 7
144 ; NOOPTSWAP-P8-NEXT: vadduwm 3, 5, 4
145 ; NOOPTSWAP-P8-NEXT: vadduwm 4, 1, 0
146 ; NOOPTSWAP-P8-NEXT: xxswapd 40, 0
147 ; NOOPTSWAP-P8-NEXT: xxswapd 41, 1
148 ; NOOPTSWAP-P8-NEXT: lxvd2x 0, 30, 9
149 ; NOOPTSWAP-P8-NEXT: lxvd2x 1, 30, 10
150 ; NOOPTSWAP-P8-NEXT: vmuluwm 2, 2, 8
151 ; NOOPTSWAP-P8-NEXT: vmuluwm 3, 3, 9
152 ; NOOPTSWAP-P8-NEXT: xxswapd 42, 0
153 ; NOOPTSWAP-P8-NEXT: xxswapd 43, 1
154 ; NOOPTSWAP-P8-NEXT: vmuluwm 4, 4, 10
155 ; NOOPTSWAP-P8-NEXT: xxswapd 0, 34
156 ; NOOPTSWAP-P8-NEXT: vadduwm 2, 7, 6
157 ; NOOPTSWAP-P8-NEXT: xxswapd 1, 35
158 ; NOOPTSWAP-P8-NEXT: vmuluwm 2, 2, 11
159 ; NOOPTSWAP-P8-NEXT: stxvd2x 0, 7, 3
160 ; NOOPTSWAP-P8-NEXT: addi 3, 3, 64
161 ; NOOPTSWAP-P8-NEXT: stxvd2x 1, 11, 8
162 ; NOOPTSWAP-P8-NEXT: xxswapd 2, 36
163 ; NOOPTSWAP-P8-NEXT: stxvd2x 2, 11, 9
164 ; NOOPTSWAP-P8-NEXT: xxswapd 3, 34
165 ; NOOPTSWAP-P8-NEXT: stxvd2x 3, 11, 10
166 ; NOOPTSWAP-P8-NEXT: bdnz .LBB0_1
167 ; NOOPTSWAP-P8-NEXT: # %bb.2: # %for.end
168 ; NOOPTSWAP-P8-NEXT: ld 30, -16(1) # 8-byte Folded Reload
169 ; NOOPTSWAP-P8-NEXT: blr
171 ; CHECK-P9-LABEL: foo:
172 ; CHECK-P9: # %bb.0: # %entry
173 ; CHECK-P9-NEXT: li 6, 256
174 ; CHECK-P9-NEXT: addis 3, 2, .LC0@toc@ha
175 ; CHECK-P9-NEXT: addis 4, 2, .LC1@toc@ha
176 ; CHECK-P9-NEXT: addis 5, 2, .LC2@toc@ha
177 ; CHECK-P9-NEXT: mtctr 6
178 ; CHECK-P9-NEXT: addis 6, 2, .LC3@toc@ha
179 ; CHECK-P9-NEXT: ld 3, .LC0@toc@l(3)
180 ; CHECK-P9-NEXT: ld 4, .LC1@toc@l(4)
181 ; CHECK-P9-NEXT: ld 5, .LC2@toc@l(5)
182 ; CHECK-P9-NEXT: ld 6, .LC3@toc@l(6)
183 ; CHECK-P9-NEXT: addi 3, 3, 32
184 ; CHECK-P9-NEXT: addi 4, 4, 32
185 ; CHECK-P9-NEXT: addi 5, 5, 32
186 ; CHECK-P9-NEXT: addi 6, 6, 32
187 ; CHECK-P9-NEXT: .p2align 4
188 ; CHECK-P9-NEXT: .LBB0_1: # %vector.body
190 ; CHECK-P9-NEXT: lxv 2, -32(6)
191 ; CHECK-P9-NEXT: lxv 3, -32(5)
192 ; CHECK-P9-NEXT: lxv 4, -16(5)
193 ; CHECK-P9-NEXT: vadduwm 2, 3, 2
194 ; CHECK-P9-NEXT: lxv 3, -32(4)
195 ; CHECK-P9-NEXT: vmuluwm 2, 2, 3
196 ; CHECK-P9-NEXT: lxv 3, -16(6)
197 ; CHECK-P9-NEXT: vadduwm 3, 4, 3
198 ; CHECK-P9-NEXT: lxv 4, 0(5)
199 ; CHECK-P9-NEXT: stxv 2, -32(3)
200 ; CHECK-P9-NEXT: lxv 2, -16(4)
201 ; CHECK-P9-NEXT: vmuluwm 2, 3, 2
202 ; CHECK-P9-NEXT: lxv 3, 0(6)
203 ; CHECK-P9-NEXT: vadduwm 3, 4, 3
204 ; CHECK-P9-NEXT: lxv 4, 16(5)
205 ; CHECK-P9-NEXT: addi 5, 5, 64
206 ; CHECK-P9-NEXT: stxv 2, -16(3)
207 ; CHECK-P9-NEXT: lxv 2, 0(4)
208 ; CHECK-P9-NEXT: vmuluwm 2, 3, 2
209 ; CHECK-P9-NEXT: lxv 3, 16(6)
210 ; CHECK-P9-NEXT: addi 6, 6, 64
211 ; CHECK-P9-NEXT: vadduwm 3, 4, 3
212 ; CHECK-P9-NEXT: stxv 2, 0(3)
213 ; CHECK-P9-NEXT: lxv 2, 16(4)
214 ; CHECK-P9-NEXT: addi 4, 4, 64
215 ; CHECK-P9-NEXT: vmuluwm 2, 3, 2
216 ; CHECK-P9-NEXT: stxv 2, 16(3)
217 ; CHECK-P9-NEXT: addi 3, 3, 64
218 ; CHECK-P9-NEXT: bdnz .LBB0_1
219 ; CHECK-P9-NEXT: # %bb.2: # %for.end
222 ; NOOPTSWAP-P9-LABEL: foo:
223 ; NOOPTSWAP-P9: # %bb.0: # %entry
224 ; NOOPTSWAP-P9-NEXT: addis 4, 2, .LC0@toc@ha
225 ; NOOPTSWAP-P9-NEXT: addis 5, 2, .LC1@toc@ha
226 ; NOOPTSWAP-P9-NEXT: addis 6, 2, .LC2@toc@ha
227 ; NOOPTSWAP-P9-NEXT: addis 7, 2, .LC3@toc@ha
228 ; NOOPTSWAP-P9-NEXT: std 30, -16(1) # 8-byte Folded Spill
229 ; NOOPTSWAP-P9-NEXT: ld 4, .LC0@toc@l(4)
230 ; NOOPTSWAP-P9-NEXT: li 3, 256
231 ; NOOPTSWAP-P9-NEXT: ld 5, .LC1@toc@l(5)
232 ; NOOPTSWAP-P9-NEXT: ld 6, .LC2@toc@l(6)
233 ; NOOPTSWAP-P9-NEXT: ld 7, .LC3@toc@l(7)
234 ; NOOPTSWAP-P9-NEXT: mtctr 3
235 ; NOOPTSWAP-P9-NEXT: li 3, 0
236 ; NOOPTSWAP-P9-NEXT: li 8, 16
237 ; NOOPTSWAP-P9-NEXT: li 9, 32
238 ; NOOPTSWAP-P9-NEXT: li 10, 48
239 ; NOOPTSWAP-P9-NEXT: .p2align 4
240 ; NOOPTSWAP-P9-NEXT: .LBB0_1: # %vector.body
241 ; NOOPTSWAP-P9-NEXT: #
242 ; NOOPTSWAP-P9-NEXT: lxvd2x 0, 4, 3
243 ; NOOPTSWAP-P9-NEXT: lxvd2x 1, 5, 3
244 ; NOOPTSWAP-P9-NEXT: lxvd2x 2, 6, 3
245 ; NOOPTSWAP-P9-NEXT: add 12, 5, 3
246 ; NOOPTSWAP-P9-NEXT: add 11, 4, 3
247 ; NOOPTSWAP-P9-NEXT: add 30, 6, 3
248 ; NOOPTSWAP-P9-NEXT: lxvd2x 3, 11, 8
249 ; NOOPTSWAP-P9-NEXT: xxswapd 34, 0
250 ; NOOPTSWAP-P9-NEXT: xxswapd 35, 1
251 ; NOOPTSWAP-P9-NEXT: lxvd2x 0, 12, 8
252 ; NOOPTSWAP-P9-NEXT: xxswapd 36, 2
253 ; NOOPTSWAP-P9-NEXT: lxvd2x 1, 11, 9
254 ; NOOPTSWAP-P9-NEXT: vadduwm 2, 3, 2
255 ; NOOPTSWAP-P9-NEXT: xxswapd 35, 3
256 ; NOOPTSWAP-P9-NEXT: vmuluwm 2, 2, 4
257 ; NOOPTSWAP-P9-NEXT: xxswapd 36, 0
258 ; NOOPTSWAP-P9-NEXT: lxvd2x 0, 12, 9
259 ; NOOPTSWAP-P9-NEXT: vadduwm 3, 4, 3
260 ; NOOPTSWAP-P9-NEXT: xxswapd 36, 1
261 ; NOOPTSWAP-P9-NEXT: lxvd2x 1, 12, 10
262 ; NOOPTSWAP-P9-NEXT: xxswapd 37, 0
263 ; NOOPTSWAP-P9-NEXT: lxvd2x 0, 11, 10
264 ; NOOPTSWAP-P9-NEXT: add 11, 7, 3
265 ; NOOPTSWAP-P9-NEXT: vadduwm 4, 5, 4
266 ; NOOPTSWAP-P9-NEXT: xxswapd 32, 1
267 ; NOOPTSWAP-P9-NEXT: xxswapd 37, 0
268 ; NOOPTSWAP-P9-NEXT: lxvd2x 0, 30, 8
269 ; NOOPTSWAP-P9-NEXT: vadduwm 5, 0, 5
270 ; NOOPTSWAP-P9-NEXT: xxswapd 32, 0
271 ; NOOPTSWAP-P9-NEXT: lxvd2x 0, 30, 9
272 ; NOOPTSWAP-P9-NEXT: vmuluwm 3, 3, 0
273 ; NOOPTSWAP-P9-NEXT: xxswapd 32, 0
274 ; NOOPTSWAP-P9-NEXT: xxswapd 0, 34
275 ; NOOPTSWAP-P9-NEXT: vmuluwm 4, 4, 0
276 ; NOOPTSWAP-P9-NEXT: stxvd2x 0, 7, 3
277 ; NOOPTSWAP-P9-NEXT: addi 3, 3, 64
278 ; NOOPTSWAP-P9-NEXT: xxswapd 1, 35
279 ; NOOPTSWAP-P9-NEXT: stxvd2x 1, 11, 8
280 ; NOOPTSWAP-P9-NEXT: xxswapd 0, 36
281 ; NOOPTSWAP-P9-NEXT: stxvd2x 0, 11, 9
282 ; NOOPTSWAP-P9-NEXT: lxvd2x 0, 30, 10
283 ; NOOPTSWAP-P9-NEXT: xxswapd 34, 0
284 ; NOOPTSWAP-P9-NEXT: vmuluwm 2, 5, 2
285 ; NOOPTSWAP-P9-NEXT: xxswapd 0, 34
286 ; NOOPTSWAP-P9-NEXT: stxvd2x 0, 11, 10
287 ; NOOPTSWAP-P9-NEXT: bdnz .LBB0_1
288 ; NOOPTSWAP-P9-NEXT: # %bb.2: # %for.end
289 ; NOOPTSWAP-P9-NEXT: ld 30, -16(1) # 8-byte Folded Reload
290 ; NOOPTSWAP-P9-NEXT: blr
292 br label %vector.body
295 %index = phi i64 [ 0, %entry ], [ %index.next.3, %vector.body ]
296 %0 = getelementptr inbounds [4096 x i32], ptr @cb, i64 0, i64 %index
297 %wide.load = load <4 x i32>, ptr %0, align 8
298 %1 = getelementptr inbounds [4096 x i32], ptr @cc, i64 0, i64 %index
299 %wide.load13 = load <4 x i32>, ptr %1, align 8
300 %2 = add nsw <4 x i32> %wide.load13, %wide.load
301 %3 = getelementptr inbounds [4096 x i32], ptr @cd, i64 0, i64 %index
302 %wide.load14 = load <4 x i32>, ptr %3, align 8
303 %4 = mul nsw <4 x i32> %2, %wide.load14
304 %5 = getelementptr inbounds [4096 x i32], ptr @ca, i64 0, i64 %index
305 store <4 x i32> %4, ptr %5, align 8
306 %index.next = add nuw nsw i64 %index, 4
307 %6 = getelementptr inbounds [4096 x i32], ptr @cb, i64 0, i64 %index.next
308 %wide.load.1 = load <4 x i32>, ptr %6, align 8
309 %7 = getelementptr inbounds [4096 x i32], ptr @cc, i64 0, i64 %index.next
310 %wide.load13.1 = load <4 x i32>, ptr %7, align 8
311 %8 = add nsw <4 x i32> %wide.load13.1, %wide.load.1
312 %9 = getelementptr inbounds [4096 x i32], ptr @cd, i64 0, i64 %index.next
313 %wide.load14.1 = load <4 x i32>, ptr %9, align 8
314 %10 = mul nsw <4 x i32> %8, %wide.load14.1
315 %11 = getelementptr inbounds [4096 x i32], ptr @ca, i64 0, i64 %index.next
316 store <4 x i32> %10, ptr %11, align 8
317 %index.next.1 = add nuw nsw i64 %index.next, 4
318 %12 = getelementptr inbounds [4096 x i32], ptr @cb, i64 0, i64 %index.next.1
319 %wide.load.2 = load <4 x i32>, ptr %12, align 8
320 %13 = getelementptr inbounds [4096 x i32], ptr @cc, i64 0, i64 %index.next.1
321 %wide.load13.2 = load <4 x i32>, ptr %13, align 8
322 %14 = add nsw <4 x i32> %wide.load13.2, %wide.load.2
323 %15 = getelementptr inbounds [4096 x i32], ptr @cd, i64 0, i64 %index.next.1
324 %wide.load14.2 = load <4 x i32>, ptr %15, align 8
325 %16 = mul nsw <4 x i32> %14, %wide.load14.2
326 %17 = getelementptr inbounds [4096 x i32], ptr @ca, i64 0, i64 %index.next.1
327 store <4 x i32> %16, ptr %17, align 8
328 %index.next.2 = add nuw nsw i64 %index.next.1, 4
329 %18 = getelementptr inbounds [4096 x i32], ptr @cb, i64 0, i64 %index.next.2
330 %wide.load.3 = load <4 x i32>, ptr %18, align 8
331 %19 = getelementptr inbounds [4096 x i32], ptr @cc, i64 0, i64 %index.next.2
332 %wide.load13.3 = load <4 x i32>, ptr %19, align 8
333 %20 = add nsw <4 x i32> %wide.load13.3, %wide.load.3
334 %21 = getelementptr inbounds [4096 x i32], ptr @cd, i64 0, i64 %index.next.2
335 %wide.load14.3 = load <4 x i32>, ptr %21, align 8
336 %22 = mul nsw <4 x i32> %20, %wide.load14.3
337 %23 = getelementptr inbounds [4096 x i32], ptr @ca, i64 0, i64 %index.next.2
338 store <4 x i32> %22, ptr %23, align 8
339 %index.next.3 = add nuw nsw i64 %index.next.2, 4
340 %24 = icmp eq i64 %index.next.3, 4096
341 br i1 %24, label %for.end, label %vector.body