1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -o - %s | FileCheck %s
4 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
5 target triple = "arm64-apple-ios"
7 ; CHECK-LABEL: lCPI0_0:
8 ; CHECK-NEXT: .byte 0 ; 0x0
9 ; CHECK-NEXT: .byte 4 ; 0x4
10 ; CHECK-NEXT: .byte 8 ; 0x8
11 ; CHECK-NEXT: .byte 12 ; 0xc
12 ; CHECK-NEXT: .byte 16 ; 0x10
13 ; CHECK-NEXT: .byte 20 ; 0x14
14 ; CHECK-NEXT: .byte 24 ; 0x18
15 ; CHECK-NEXT: .byte 28 ; 0x1c
16 ; CHECK-NEXT: .byte 255 ; 0xff
17 ; CHECK-NEXT: .byte 255 ; 0xff
18 ; CHECK-NEXT: .byte 255 ; 0xff
19 ; CHECK-NEXT: .byte 255 ; 0xff
20 ; CHECK-NEXT: .byte 255 ; 0xff
21 ; CHECK-NEXT: .byte 255 ; 0xff
22 ; CHECK-NEXT: .byte 255 ; 0xff
23 ; CHECK-NEXT: .byte 255 ; 0xff
25 ; It's profitable to convert the fptoui float -> i8 to first convert from
26 ; float -> i32 and then use tbl for the truncate in a loop, so the mask can be
27 ; materialized outside the loop.
28 define void @fptoui_v8f32_to_v8i8_in_loop(ptr %A, ptr %dst) {
29 ; CHECK-LABEL: fptoui_v8f32_to_v8i8_in_loop:
30 ; CHECK: ; %bb.0: ; %entry
32 ; CHECK-NEXT: adrp x8, lCPI0_0@PAGE
34 ; CHECK-NEXT: ldr q0, [x8, lCPI0_0@PAGEOFF]
35 ; CHECK-NEXT: mov x8, xzr
36 ; CHECK-NEXT: LBB0_1: ; %loop
37 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
38 ; CHECK-NEXT: add x9, x0, x8, lsl #5
39 ; CHECK-NEXT: add x8, x8, #1
40 ; CHECK-NEXT: cmp x8, #1000
41 ; CHECK-NEXT: ldp q2, q1, [x9]
42 ; CHECK-NEXT: fcvtzu.4s v4, v1
43 ; CHECK-NEXT: fcvtzu.4s v3, v2
44 ; CHECK-NEXT: tbl.16b v1, { v3, v4 }, v0
45 ; CHECK-NEXT: str d1, [x1], #16
46 ; CHECK-NEXT: b.eq LBB0_1
47 ; CHECK-NEXT: ; %bb.2: ; %exit
49 ; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1
54 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
55 %gep.A = getelementptr inbounds <8 x float>, ptr %A, i64 %iv
56 %l.A = load <8 x float>, ptr %gep.A
57 %c = fptoui <8 x float> %l.A to <8 x i8>
58 %gep.dst = getelementptr inbounds <16 x i8>, ptr %dst, i64 %iv
59 store <8 x i8> %c, ptr %gep.dst
60 %iv.next = add i64 %iv, 1
61 %ec = icmp eq i64 %iv.next, 1000
62 br i1 %ec, label %loop, label %exit
68 ; Not profitable to use tbl, as materializing the masks requires more
70 define void @fptoui_v8f32_to_v8i8_no_loop(ptr %A, ptr %dst) {
71 ; CHECK-LABEL: fptoui_v8f32_to_v8i8_no_loop:
72 ; CHECK: ; %bb.0: ; %entry
73 ; CHECK-NEXT: ldp q0, q1, [x0]
74 ; CHECK-NEXT: fcvtzs.4s v1, v1
75 ; CHECK-NEXT: fcvtzs.4s v0, v0
76 ; CHECK-NEXT: uzp1.8h v0, v0, v1
77 ; CHECK-NEXT: xtn.8b v0, v0
78 ; CHECK-NEXT: str d0, [x1]
81 %l.A = load <8 x float>, ptr %A
82 %c = fptoui <8 x float> %l.A to <8 x i8>
83 store <8 x i8> %c, ptr %dst
87 ; CHECK-LABEL: lCPI2_0:
88 ; CHECK-NEXT: .byte 0 ; 0x0
89 ; CHECK-NEXT: .byte 4 ; 0x4
90 ; CHECK-NEXT: .byte 8 ; 0x8
91 ; CHECK-NEXT: .byte 12 ; 0xc
92 ; CHECK-NEXT: .byte 16 ; 0x10
93 ; CHECK-NEXT: .byte 20 ; 0x14
94 ; CHECK-NEXT: .byte 24 ; 0x18
95 ; CHECK-NEXT: .byte 28 ; 0x1c
96 ; CHECK-NEXT: .byte 32 ; 0x20
97 ; CHECK-NEXT: .byte 36 ; 0x24
98 ; CHECK-NEXT: .byte 40 ; 0x28
99 ; CHECK-NEXT: .byte 44 ; 0x2c
100 ; CHECK-NEXT: .byte 48 ; 0x30
101 ; CHECK-NEXT: .byte 52 ; 0x34
102 ; CHECK-NEXT: .byte 56 ; 0x38
103 ; CHECK-NEXT: .byte 60 ; 0x3c
105 ; Tbl can also be used when combining multiple fptoui using a shuffle. The loop
106 ; vectorizer may create such patterns.
107 define void @fptoui_2x_v8f32_to_v8i8_in_loop(ptr %A, ptr %B, ptr %dst) {
108 ; CHECK-LABEL: fptoui_2x_v8f32_to_v8i8_in_loop:
109 ; CHECK: ; %bb.0: ; %entry
111 ; CHECK-NEXT: adrp x8, lCPI2_0@PAGE
113 ; CHECK-NEXT: ldr q0, [x8, lCPI2_0@PAGEOFF]
114 ; CHECK-NEXT: mov x8, xzr
115 ; CHECK-NEXT: LBB2_1: ; %loop
116 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
117 ; CHECK-NEXT: lsl x9, x8, #5
118 ; CHECK-NEXT: add x10, x0, x9
119 ; CHECK-NEXT: add x9, x1, x9
120 ; CHECK-NEXT: ldp q2, q1, [x10]
121 ; CHECK-NEXT: fcvtzu.4s v5, v1
122 ; CHECK-NEXT: ldp q1, q3, [x9]
123 ; CHECK-NEXT: fcvtzu.4s v4, v2
124 ; CHECK-NEXT: fcvtzu.4s v7, v3
125 ; CHECK-NEXT: fcvtzu.4s v6, v1
126 ; CHECK-NEXT: tbl.16b v1, { v4, v5, v6, v7 }, v0
127 ; CHECK-NEXT: str q1, [x2, x8, lsl #4]
128 ; CHECK-NEXT: add x8, x8, #1
129 ; CHECK-NEXT: cmp x8, #1000
130 ; CHECK-NEXT: b.eq LBB2_1
131 ; CHECK-NEXT: ; %bb.2: ; %exit
133 ; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh3
138 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
139 %gep.A = getelementptr inbounds <8 x float>, ptr %A, i64 %iv
140 %gep.B = getelementptr inbounds <8x float>, ptr %B, i64 %iv
141 %l.A = load <8 x float>, ptr %gep.A
142 %l.B = load <8 x float>, ptr %gep.B
143 %c1 = fptoui <8 x float> %l.A to <8 x i8>
144 %c2 = fptoui <8 x float> %l.B to <8 x i8>
145 %s = shufflevector <8 x i8> %c1, <8 x i8> %c2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
146 %gep.dst = getelementptr inbounds <16 x i8>, ptr %dst, i64 %iv
147 store <16 x i8> %s, ptr %gep.dst
148 %iv.next = add i64 %iv, 1
149 %ec = icmp eq i64 %iv.next, 1000
150 br i1 %ec, label %loop, label %exit
156 ; CHECK-LABEL: lCPI3_0:
157 ; CHECK-NEXT: .byte 0 ; 0x0
158 ; CHECK-NEXT: .byte 36 ; 0x24
159 ; CHECK-NEXT: .byte 8 ; 0x8
160 ; CHECK-NEXT: .byte 12 ; 0xc
161 ; CHECK-NEXT: .byte 16 ; 0x10
162 ; CHECK-NEXT: .byte 20 ; 0x14
163 ; CHECK-NEXT: .byte 24 ; 0x18
164 ; CHECK-NEXT: .byte 44 ; 0x2c
165 ; CHECK-NEXT: .byte 32 ; 0x20
166 ; CHECK-NEXT: .byte 36 ; 0x24
167 ; CHECK-NEXT: .byte 40 ; 0x28
168 ; CHECK-NEXT: .byte 44 ; 0x2c
169 ; CHECK-NEXT: .byte 48 ; 0x30
170 ; CHECK-NEXT: .byte 12 ; 0xc
171 ; CHECK-NEXT: .byte 56 ; 0x38
172 ; CHECK-NEXT: .byte 60 ; 0x3c
174 define void @fptoui_2x_v8f32_to_v8i8_in_loop_no_concat_shuffle(ptr %A, ptr %B, ptr %dst) {
175 ; CHECK-LABEL: fptoui_2x_v8f32_to_v8i8_in_loop_no_concat_shuffle:
176 ; CHECK: ; %bb.0: ; %entry
178 ; CHECK-NEXT: adrp x8, lCPI3_0@PAGE
180 ; CHECK-NEXT: ldr q0, [x8, lCPI3_0@PAGEOFF]
181 ; CHECK-NEXT: mov x8, xzr
182 ; CHECK-NEXT: LBB3_1: ; %loop
183 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
184 ; CHECK-NEXT: lsl x9, x8, #5
185 ; CHECK-NEXT: add x10, x0, x9
186 ; CHECK-NEXT: add x9, x1, x9
187 ; CHECK-NEXT: ldp q2, q1, [x10]
188 ; CHECK-NEXT: fcvtzu.4s v5, v1
189 ; CHECK-NEXT: ldp q1, q3, [x9]
190 ; CHECK-NEXT: fcvtzu.4s v4, v2
191 ; CHECK-NEXT: fcvtzu.4s v7, v3
192 ; CHECK-NEXT: fcvtzu.4s v6, v1
193 ; CHECK-NEXT: tbl.16b v1, { v4, v5, v6, v7 }, v0
194 ; CHECK-NEXT: str q1, [x2, x8, lsl #4]
195 ; CHECK-NEXT: add x8, x8, #1
196 ; CHECK-NEXT: cmp x8, #1000
197 ; CHECK-NEXT: b.eq LBB3_1
198 ; CHECK-NEXT: ; %bb.2: ; %exit
200 ; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh5
205 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
206 %gep.A = getelementptr inbounds <8 x float>, ptr %A, i64 %iv
207 %gep.B = getelementptr inbounds <8x float>, ptr %B, i64 %iv
208 %l.A = load <8 x float>, ptr %gep.A
209 %l.B = load <8 x float>, ptr %gep.B
210 %c1 = fptoui <8 x float> %l.A to <8 x i8>
211 %c2 = fptoui <8 x float> %l.B to <8 x i8>
212 %s = shufflevector <8 x i8> %c1, <8 x i8> %c2, <16 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 11, i32 8, i32 9, i32 10, i32 11, i32 12, i32 3, i32 14, i32 15>
213 %gep.dst = getelementptr inbounds <16 x i8>, ptr %dst, i64 %iv
214 store <16 x i8> %s, ptr %gep.dst
215 %iv.next = add i64 %iv, 1
216 %ec = icmp eq i64 %iv.next, 1000
217 br i1 %ec, label %loop, label %exit
223 ; CHECK-LABEL: lCPI4_0:
224 ; CHECK-NEXT: .byte 0 ; 0x0
225 ; CHECK-NEXT: .byte 4 ; 0x4
226 ; CHECK-NEXT: .byte 8 ; 0x8
227 ; CHECK-NEXT: .byte 12 ; 0xc
228 ; CHECK-NEXT: .byte 16 ; 0x10
229 ; CHECK-NEXT: .byte 20 ; 0x14
230 ; CHECK-NEXT: .byte 24 ; 0x18
231 ; CHECK-NEXT: .byte 28 ; 0x1c
232 ; CHECK-NEXT: .byte 32 ; 0x20
233 ; CHECK-NEXT: .byte 36 ; 0x24
234 ; CHECK-NEXT: .byte 40 ; 0x28
235 ; CHECK-NEXT: .byte 44 ; 0x2c
236 ; CHECK-NEXT: .byte 48 ; 0x30
237 ; CHECK-NEXT: .byte 52 ; 0x34
238 ; CHECK-NEXT: .byte 56 ; 0x38
239 ; CHECK-NEXT: .byte 60 ; 0x3c
241 define void @fptoui_v16f32_to_v16i8_in_loop(ptr %A, ptr %dst) {
242 ; CHECK-LABEL: fptoui_v16f32_to_v16i8_in_loop:
243 ; CHECK: ; %bb.0: ; %entry
245 ; CHECK-NEXT: adrp x8, lCPI4_0@PAGE
247 ; CHECK-NEXT: ldr q0, [x8, lCPI4_0@PAGEOFF]
248 ; CHECK-NEXT: mov x8, xzr
249 ; CHECK-NEXT: LBB4_1: ; %loop
250 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
251 ; CHECK-NEXT: add x9, x0, x8, lsl #6
252 ; CHECK-NEXT: add x8, x8, #1
253 ; CHECK-NEXT: cmp x8, #1000
254 ; CHECK-NEXT: ldp q2, q1, [x9, #32]
255 ; CHECK-NEXT: fcvtzu.4s v7, v1
256 ; CHECK-NEXT: ldp q1, q3, [x9]
257 ; CHECK-NEXT: fcvtzu.4s v6, v2
258 ; CHECK-NEXT: fcvtzu.4s v5, v3
259 ; CHECK-NEXT: fcvtzu.4s v4, v1
260 ; CHECK-NEXT: tbl.16b v1, { v4, v5, v6, v7 }, v0
261 ; CHECK-NEXT: str q1, [x1], #32
262 ; CHECK-NEXT: b.eq LBB4_1
263 ; CHECK-NEXT: ; %bb.2: ; %exit
265 ; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh7
270 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
271 %gep.A = getelementptr inbounds <16 x float>, ptr %A, i64 %iv
272 %l.A = load <16 x float>, ptr %gep.A
273 %c = fptoui <16 x float> %l.A to <16 x i8>
274 %gep.dst = getelementptr inbounds <32 x i8>, ptr %dst, i64 %iv
275 store <16 x i8> %c, ptr %gep.dst
276 %iv.next = add i64 %iv, 1
277 %ec = icmp eq i64 %iv.next, 1000
278 br i1 %ec, label %loop, label %exit
284 ; CHECK-LABEL: lCPI5_0:
285 ; CHECK-NEXT: .byte 0 ; 0x0
286 ; CHECK-NEXT: .byte 4 ; 0x4
287 ; CHECK-NEXT: .byte 8 ; 0x8
288 ; CHECK-NEXT: .byte 12 ; 0xc
289 ; CHECK-NEXT: .byte 16 ; 0x10
290 ; CHECK-NEXT: .byte 20 ; 0x14
291 ; CHECK-NEXT: .byte 24 ; 0x18
292 ; CHECK-NEXT: .byte 28 ; 0x1c
293 ; CHECK-NEXT: .byte 32 ; 0x20
294 ; CHECK-NEXT: .byte 36 ; 0x24
295 ; CHECK-NEXT: .byte 40 ; 0x28
296 ; CHECK-NEXT: .byte 44 ; 0x2c
297 ; CHECK-NEXT: .byte 48 ; 0x30
298 ; CHECK-NEXT: .byte 52 ; 0x34
299 ; CHECK-NEXT: .byte 56 ; 0x38
300 ; CHECK-NEXT: .byte 60 ; 0x3c
302 define void @fptoui_2x_v16f32_to_v16i8_in_loop(ptr %A, ptr %B, ptr %dst) {
303 ; CHECK-LABEL: fptoui_2x_v16f32_to_v16i8_in_loop:
304 ; CHECK: ; %bb.0: ; %entry
306 ; CHECK-NEXT: adrp x8, lCPI5_0@PAGE
308 ; CHECK-NEXT: ldr q0, [x8, lCPI5_0@PAGEOFF]
309 ; CHECK-NEXT: mov x8, xzr
310 ; CHECK-NEXT: LBB5_1: ; %loop
311 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
312 ; CHECK-NEXT: lsl x9, x8, #6
313 ; CHECK-NEXT: add x10, x1, x9
314 ; CHECK-NEXT: add x9, x0, x9
315 ; CHECK-NEXT: ldp q2, q1, [x10, #32]
316 ; CHECK-NEXT: ldp q3, q4, [x9, #32]
317 ; CHECK-NEXT: ldp q5, q6, [x10]
318 ; CHECK-NEXT: fcvtzu.4s v19, v1
319 ; CHECK-NEXT: fcvtzu.4s v18, v2
320 ; CHECK-NEXT: ldp q2, q1, [x9]
321 ; CHECK-NEXT: fcvtzu.4s v23, v4
322 ; CHECK-NEXT: fcvtzu.4s v17, v6
323 ; CHECK-NEXT: add x9, x2, x8, lsl #5
324 ; CHECK-NEXT: fcvtzu.4s v22, v3
325 ; CHECK-NEXT: fcvtzu.4s v16, v5
326 ; CHECK-NEXT: add x8, x8, #1
327 ; CHECK-NEXT: fcvtzu.4s v21, v1
328 ; CHECK-NEXT: cmp x8, #1000
329 ; CHECK-NEXT: fcvtzu.4s v20, v2
330 ; CHECK-NEXT: tbl.16b v1, { v16, v17, v18, v19 }, v0
331 ; CHECK-NEXT: tbl.16b v2, { v20, v21, v22, v23 }, v0
332 ; CHECK-NEXT: stp q2, q1, [x9]
333 ; CHECK-NEXT: b.eq LBB5_1
334 ; CHECK-NEXT: ; %bb.2: ; %exit
336 ; CHECK-NEXT: .loh AdrpLdr Lloh8, Lloh9
341 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
342 %gep.A = getelementptr inbounds <16 x float>, ptr %A, i64 %iv
343 %gep.B = getelementptr inbounds <16 x float>, ptr %B, i64 %iv
344 %l.A = load <16 x float>, ptr %gep.A
345 %l.B = load <16 x float>, ptr %gep.B
346 %c1 = fptoui <16 x float> %l.A to <16 x i8>
347 %c2 = fptoui <16 x float> %l.B to <16 x i8>
348 %s = shufflevector <16 x i8> %c1, <16 x i8> %c2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
349 %gep.dst = getelementptr inbounds <32 x i8>, ptr %dst, i64 %iv
350 store <32 x i8> %s, ptr %gep.dst
351 %iv.next = add i64 %iv, 1
352 %ec = icmp eq i64 %iv.next, 1000
353 br i1 %ec, label %loop, label %exit
359 define void @fptoui_v8f32_to_v8i16_in_loop(ptr %A, ptr %dst) {
360 ; CHECK-LABEL: fptoui_v8f32_to_v8i16_in_loop:
361 ; CHECK: ; %bb.0: ; %entry
362 ; CHECK-NEXT: mov x8, xzr
363 ; CHECK-NEXT: LBB6_1: ; %loop
364 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
365 ; CHECK-NEXT: add x9, x0, x8, lsl #5
366 ; CHECK-NEXT: ldp q0, q1, [x9]
367 ; CHECK-NEXT: fcvtzu.4s v1, v1
368 ; CHECK-NEXT: fcvtzu.4s v0, v0
369 ; CHECK-NEXT: uzp1.8h v0, v0, v1
370 ; CHECK-NEXT: str q0, [x1, x8, lsl #4]
371 ; CHECK-NEXT: add x8, x8, #1
372 ; CHECK-NEXT: cmp x8, #1000
373 ; CHECK-NEXT: b.eq LBB6_1
374 ; CHECK-NEXT: ; %bb.2: ; %exit
380 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
381 %gep.A = getelementptr inbounds <8 x float>, ptr %A, i64 %iv
382 %l.A = load <8 x float>, ptr %gep.A
383 %c = fptoui <8 x float> %l.A to <8 x i16>
384 %gep.dst = getelementptr inbounds <8 x i16>, ptr %dst, i64 %iv
385 store <8 x i16> %c, ptr %gep.dst
386 %iv.next = add i64 %iv, 1
387 %ec = icmp eq i64 %iv.next, 1000
388 br i1 %ec, label %loop, label %exit
394 define void @fptoui_2x_v8f32_to_v8i16_in_loop(ptr %A, ptr %B, ptr %dst) {
395 ; CHECK-LABEL: fptoui_2x_v8f32_to_v8i16_in_loop:
396 ; CHECK: ; %bb.0: ; %entry
397 ; CHECK-NEXT: mov x8, xzr
398 ; CHECK-NEXT: LBB7_1: ; %loop
399 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
400 ; CHECK-NEXT: lsl x9, x8, #5
401 ; CHECK-NEXT: add x8, x8, #1
402 ; CHECK-NEXT: cmp x8, #1000
403 ; CHECK-NEXT: add x10, x0, x9
404 ; CHECK-NEXT: add x11, x1, x9
405 ; CHECK-NEXT: add x9, x2, x9
406 ; CHECK-NEXT: ldp q0, q1, [x10]
407 ; CHECK-NEXT: ldp q2, q3, [x11]
408 ; CHECK-NEXT: fcvtzu.4s v1, v1
409 ; CHECK-NEXT: fcvtzu.4s v0, v0
410 ; CHECK-NEXT: fcvtzu.4s v3, v3
411 ; CHECK-NEXT: fcvtzu.4s v2, v2
412 ; CHECK-NEXT: uzp1.8h v0, v0, v1
413 ; CHECK-NEXT: uzp1.8h v1, v2, v3
414 ; CHECK-NEXT: stp q0, q1, [x9]
415 ; CHECK-NEXT: b.eq LBB7_1
416 ; CHECK-NEXT: ; %bb.2: ; %exit
422 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
423 %gep.A = getelementptr inbounds <8 x float>, ptr %A, i64 %iv
424 %gep.B = getelementptr inbounds <8 x float>, ptr %B, i64 %iv
425 %l.A = load <8 x float>, ptr %gep.A
426 %l.B = load <8 x float>, ptr %gep.B
427 %c1 = fptoui <8 x float> %l.A to <8 x i16>
428 %c2 = fptoui <8 x float> %l.B to <8 x i16>
429 %s = shufflevector <8 x i16> %c1, <8 x i16> %c2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
430 %gep.dst = getelementptr inbounds <16 x i16>, ptr %dst, i64 %iv
431 store <16 x i16> %s, ptr %gep.dst
432 %iv.next = add i64 %iv, 1
433 %ec = icmp eq i64 %iv.next, 1000
434 br i1 %ec, label %loop, label %exit
440 ; CHECK-LABEL: lCPI8_0:
441 ; CHECK-NEXT: .byte 4 ; 0x4
442 ; CHECK-NEXT: .byte 255 ; 0xff
443 ; CHECK-NEXT: .byte 255 ; 0xff
444 ; CHECK-NEXT: .byte 255 ; 0xff
445 ; CHECK-NEXT: .byte 5 ; 0x5
446 ; CHECK-NEXT: .byte 255 ; 0xff
447 ; CHECK-NEXT: .byte 255 ; 0xff
448 ; CHECK-NEXT: .byte 255 ; 0xff
449 ; CHECK-NEXT: .byte 6 ; 0x6
450 ; CHECK-NEXT: .byte 255 ; 0xff
451 ; CHECK-NEXT: .byte 255 ; 0xff
452 ; CHECK-NEXT: .byte 255 ; 0xff
453 ; CHECK-NEXT: .byte 7 ; 0x7
454 ; CHECK-NEXT: .byte 255 ; 0xff
455 ; CHECK-NEXT: .byte 255 ; 0xff
456 ; CHECK-NEXT: .byte 255 ; 0xff
457 ; CHECK-NEXT: lCPI8_1:
458 ; CHECK-NEXT: .byte 0 ; 0x0
459 ; CHECK-NEXT: .byte 255 ; 0xff
460 ; CHECK-NEXT: .byte 255 ; 0xff
461 ; CHECK-NEXT: .byte 255 ; 0xff
462 ; CHECK-NEXT: .byte 1 ; 0x1
463 ; CHECK-NEXT: .byte 255 ; 0xff
464 ; CHECK-NEXT: .byte 255 ; 0xff
465 ; CHECK-NEXT: .byte 255 ; 0xff
466 ; CHECK-NEXT: .byte 2 ; 0x2
467 ; CHECK-NEXT: .byte 255 ; 0xff
468 ; CHECK-NEXT: .byte 255 ; 0xff
469 ; CHECK-NEXT: .byte 255 ; 0xff
470 ; CHECK-NEXT: .byte 3 ; 0x3
471 ; CHECK-NEXT: .byte 255 ; 0xff
472 ; CHECK-NEXT: .byte 255 ; 0xff
473 ; CHECK-NEXT: .byte 255 ; 0xff
475 define void @uitofp_v8i8_to_v8f32(ptr %src, ptr %dst) {
476 ; CHECK-LABEL: uitofp_v8i8_to_v8f32:
477 ; CHECK: ; %bb.0: ; %entry
478 ; CHECK-NEXT: Lloh10:
479 ; CHECK-NEXT: adrp x8, lCPI8_0@PAGE
480 ; CHECK-NEXT: Lloh11:
481 ; CHECK-NEXT: adrp x9, lCPI8_1@PAGE
482 ; CHECK-NEXT: Lloh12:
483 ; CHECK-NEXT: ldr q0, [x8, lCPI8_0@PAGEOFF]
484 ; CHECK-NEXT: Lloh13:
485 ; CHECK-NEXT: ldr q1, [x9, lCPI8_1@PAGEOFF]
486 ; CHECK-NEXT: mov x8, xzr
487 ; CHECK-NEXT: LBB8_1: ; %loop
488 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
489 ; CHECK-NEXT: ldr d2, [x0, x8, lsl #3]
490 ; CHECK-NEXT: add x9, x1, x8, lsl #5
491 ; CHECK-NEXT: add x8, x8, #1
492 ; CHECK-NEXT: cmp x8, #1000
493 ; CHECK-NEXT: tbl.16b v3, { v2 }, v0
494 ; CHECK-NEXT: tbl.16b v2, { v2 }, v1
495 ; CHECK-NEXT: ucvtf.4s v3, v3
496 ; CHECK-NEXT: ucvtf.4s v2, v2
497 ; CHECK-NEXT: stp q2, q3, [x9]
498 ; CHECK-NEXT: b.eq LBB8_1
499 ; CHECK-NEXT: ; %bb.2: ; %exit
501 ; CHECK-NEXT: .loh AdrpLdr Lloh11, Lloh13
502 ; CHECK-NEXT: .loh AdrpLdr Lloh10, Lloh12
507 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
508 %gep.src = getelementptr inbounds <8 x i8>, ptr %src, i64 %iv
509 %l = load <8 x i8>, ptr %gep.src
510 %conv = uitofp <8 x i8> %l to <8 x float>
511 %gep.dst = getelementptr inbounds <8 x float>, ptr %dst, i64 %iv
512 store <8 x float> %conv, ptr %gep.dst
513 %iv.next = add i64 %iv, 1
514 %ec = icmp eq i64 %iv.next, 1000
515 br i1 %ec, label %loop, label %exit
521 ; CHECK-LABEL: lCPI9_0:
522 ; CHECK-NEXT: .byte 12 ; 0xc
523 ; CHECK-NEXT: .byte 255 ; 0xff
524 ; CHECK-NEXT: .byte 255 ; 0xff
525 ; CHECK-NEXT: .byte 255 ; 0xff
526 ; CHECK-NEXT: .byte 13 ; 0xd
527 ; CHECK-NEXT: .byte 255 ; 0xff
528 ; CHECK-NEXT: .byte 255 ; 0xff
529 ; CHECK-NEXT: .byte 255 ; 0xff
530 ; CHECK-NEXT: .byte 14 ; 0xe
531 ; CHECK-NEXT: .byte 255 ; 0xff
532 ; CHECK-NEXT: .byte 255 ; 0xff
533 ; CHECK-NEXT: .byte 255 ; 0xff
534 ; CHECK-NEXT: .byte 15 ; 0xf
535 ; CHECK-NEXT: .byte 255 ; 0xff
536 ; CHECK-NEXT: .byte 255 ; 0xff
537 ; CHECK-NEXT: .byte 255 ; 0xff
538 ; CHECK-NEXT: lCPI9_1:
539 ; CHECK-NEXT: .byte 8 ; 0x8
540 ; CHECK-NEXT: .byte 255 ; 0xff
541 ; CHECK-NEXT: .byte 255 ; 0xff
542 ; CHECK-NEXT: .byte 255 ; 0xff
543 ; CHECK-NEXT: .byte 9 ; 0x9
544 ; CHECK-NEXT: .byte 255 ; 0xff
545 ; CHECK-NEXT: .byte 255 ; 0xff
546 ; CHECK-NEXT: .byte 255 ; 0xff
547 ; CHECK-NEXT: .byte 10 ; 0xa
548 ; CHECK-NEXT: .byte 255 ; 0xff
549 ; CHECK-NEXT: .byte 255 ; 0xff
550 ; CHECK-NEXT: .byte 255 ; 0xff
551 ; CHECK-NEXT: .byte 11 ; 0xb
552 ; CHECK-NEXT: .byte 255 ; 0xff
553 ; CHECK-NEXT: .byte 255 ; 0xff
554 ; CHECK-NEXT: .byte 255 ; 0xff
555 ; CHECK-NEXT: lCPI9_2:
556 ; CHECK-NEXT: .byte 4 ; 0x4
557 ; CHECK-NEXT: .byte 255 ; 0xff
558 ; CHECK-NEXT: .byte 255 ; 0xff
559 ; CHECK-NEXT: .byte 255 ; 0xff
560 ; CHECK-NEXT: .byte 5 ; 0x5
561 ; CHECK-NEXT: .byte 255 ; 0xff
562 ; CHECK-NEXT: .byte 255 ; 0xff
563 ; CHECK-NEXT: .byte 255 ; 0xff
564 ; CHECK-NEXT: .byte 6 ; 0x6
565 ; CHECK-NEXT: .byte 255 ; 0xff
566 ; CHECK-NEXT: .byte 255 ; 0xff
567 ; CHECK-NEXT: .byte 255 ; 0xff
568 ; CHECK-NEXT: .byte 7 ; 0x7
569 ; CHECK-NEXT: .byte 255 ; 0xff
570 ; CHECK-NEXT: .byte 255 ; 0xff
571 ; CHECK-NEXT: .byte 255 ; 0xff
572 ; CHECK-NEXT: lCPI9_3:
573 ; CHECK-NEXT: .byte 0 ; 0x0
574 ; CHECK-NEXT: .byte 255 ; 0xff
575 ; CHECK-NEXT: .byte 255 ; 0xff
576 ; CHECK-NEXT: .byte 255 ; 0xff
577 ; CHECK-NEXT: .byte 1 ; 0x1
578 ; CHECK-NEXT: .byte 255 ; 0xff
579 ; CHECK-NEXT: .byte 255 ; 0xff
580 ; CHECK-NEXT: .byte 255 ; 0xff
581 ; CHECK-NEXT: .byte 2 ; 0x2
582 ; CHECK-NEXT: .byte 255 ; 0xff
583 ; CHECK-NEXT: .byte 255 ; 0xff
584 ; CHECK-NEXT: .byte 255 ; 0xff
585 ; CHECK-NEXT: .byte 3 ; 0x3
586 ; CHECK-NEXT: .byte 255 ; 0xff
587 ; CHECK-NEXT: .byte 255 ; 0xff
588 ; CHECK-NEXT: .byte 255 ; 0xff
590 define void @uitofp_v16i8_to_v16f32(ptr %src, ptr %dst) {
591 ; CHECK-LABEL: uitofp_v16i8_to_v16f32:
592 ; CHECK: ; %bb.0: ; %entry
593 ; CHECK-NEXT: Lloh14:
594 ; CHECK-NEXT: adrp x8, lCPI9_0@PAGE
595 ; CHECK-NEXT: Lloh15:
596 ; CHECK-NEXT: adrp x9, lCPI9_1@PAGE
597 ; CHECK-NEXT: Lloh16:
598 ; CHECK-NEXT: adrp x10, lCPI9_2@PAGE
599 ; CHECK-NEXT: Lloh17:
600 ; CHECK-NEXT: ldr q0, [x8, lCPI9_0@PAGEOFF]
601 ; CHECK-NEXT: Lloh18:
602 ; CHECK-NEXT: adrp x8, lCPI9_3@PAGE
603 ; CHECK-NEXT: Lloh19:
604 ; CHECK-NEXT: ldr q1, [x9, lCPI9_1@PAGEOFF]
605 ; CHECK-NEXT: Lloh20:
606 ; CHECK-NEXT: ldr q2, [x10, lCPI9_2@PAGEOFF]
607 ; CHECK-NEXT: Lloh21:
608 ; CHECK-NEXT: ldr q3, [x8, lCPI9_3@PAGEOFF]
609 ; CHECK-NEXT: mov x8, xzr
610 ; CHECK-NEXT: LBB9_1: ; %loop
611 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
612 ; CHECK-NEXT: ldr q4, [x0, x8, lsl #4]
613 ; CHECK-NEXT: add x9, x1, x8, lsl #6
614 ; CHECK-NEXT: add x8, x8, #1
615 ; CHECK-NEXT: cmp x8, #1000
616 ; CHECK-NEXT: tbl.16b v5, { v4 }, v0
617 ; CHECK-NEXT: tbl.16b v6, { v4 }, v1
618 ; CHECK-NEXT: tbl.16b v7, { v4 }, v2
619 ; CHECK-NEXT: tbl.16b v4, { v4 }, v3
620 ; CHECK-NEXT: ucvtf.4s v5, v5
621 ; CHECK-NEXT: ucvtf.4s v6, v6
622 ; CHECK-NEXT: ucvtf.4s v7, v7
623 ; CHECK-NEXT: ucvtf.4s v4, v4
624 ; CHECK-NEXT: stp q6, q5, [x9, #32]
625 ; CHECK-NEXT: stp q4, q7, [x9]
626 ; CHECK-NEXT: b.eq LBB9_1
627 ; CHECK-NEXT: ; %bb.2: ; %exit
629 ; CHECK-NEXT: .loh AdrpLdr Lloh18, Lloh21
630 ; CHECK-NEXT: .loh AdrpLdr Lloh16, Lloh20
631 ; CHECK-NEXT: .loh AdrpLdr Lloh15, Lloh19
632 ; CHECK-NEXT: .loh AdrpAdrp Lloh14, Lloh18
633 ; CHECK-NEXT: .loh AdrpLdr Lloh14, Lloh17
638 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
639 %gep.src = getelementptr inbounds <16 x i8>, ptr %src, i64 %iv
640 %l = load <16 x i8>, ptr %gep.src
641 %conv = uitofp <16 x i8> %l to <16 x float>
642 %gep.dst = getelementptr inbounds <16 x float>, ptr %dst, i64 %iv
643 store <16 x float> %conv, ptr %gep.dst
644 %iv.next = add i64 %iv, 1
645 %ec = icmp eq i64 %iv.next, 1000
646 br i1 %ec, label %loop, label %exit
652 define void @uitofp_v8i16_to_v8f64(ptr nocapture noundef readonly %x, ptr nocapture noundef writeonly %y, i32 noundef %n) {
653 ; CHECK-LABEL: uitofp_v8i16_to_v8f64:
654 ; CHECK: ; %bb.0: ; %entry
655 ; CHECK-NEXT: Lloh22:
656 ; CHECK-NEXT: adrp x8, lCPI10_0@PAGE
657 ; CHECK-NEXT: Lloh23:
658 ; CHECK-NEXT: adrp x9, lCPI10_1@PAGE
659 ; CHECK-NEXT: Lloh24:
660 ; CHECK-NEXT: adrp x10, lCPI10_2@PAGE
661 ; CHECK-NEXT: Lloh25:
662 ; CHECK-NEXT: ldr q0, [x8, lCPI10_0@PAGEOFF]
663 ; CHECK-NEXT: Lloh26:
664 ; CHECK-NEXT: adrp x8, lCPI10_3@PAGE
665 ; CHECK-NEXT: Lloh27:
666 ; CHECK-NEXT: ldr q1, [x9, lCPI10_1@PAGEOFF]
667 ; CHECK-NEXT: Lloh28:
668 ; CHECK-NEXT: ldr q2, [x10, lCPI10_2@PAGEOFF]
669 ; CHECK-NEXT: Lloh29:
670 ; CHECK-NEXT: ldr q3, [x8, lCPI10_3@PAGEOFF]
671 ; CHECK-NEXT: mov x8, xzr
672 ; CHECK-NEXT: LBB10_1: ; %vector.body
673 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
674 ; CHECK-NEXT: ldr q4, [x0, x8]
675 ; CHECK-NEXT: add x9, x1, x8
676 ; CHECK-NEXT: add x8, x8, #64
677 ; CHECK-NEXT: cmp x8, #2, lsl #12 ; =8192
678 ; CHECK-NEXT: tbl.16b v5, { v4 }, v0
679 ; CHECK-NEXT: tbl.16b v6, { v4 }, v1
680 ; CHECK-NEXT: tbl.16b v7, { v4 }, v2
681 ; CHECK-NEXT: tbl.16b v4, { v4 }, v3
682 ; CHECK-NEXT: ucvtf.2d v5, v5
683 ; CHECK-NEXT: ucvtf.2d v6, v6
684 ; CHECK-NEXT: ucvtf.2d v7, v7
685 ; CHECK-NEXT: ucvtf.2d v4, v4
686 ; CHECK-NEXT: stp q6, q5, [x9, #32]
687 ; CHECK-NEXT: stp q4, q7, [x9]
688 ; CHECK-NEXT: b.ne LBB10_1
689 ; CHECK-NEXT: ; %bb.2: ; %for.cond.cleanup
691 ; CHECK-NEXT: .loh AdrpLdr Lloh26, Lloh29
692 ; CHECK-NEXT: .loh AdrpLdr Lloh24, Lloh28
693 ; CHECK-NEXT: .loh AdrpLdr Lloh23, Lloh27
694 ; CHECK-NEXT: .loh AdrpAdrp Lloh22, Lloh26
695 ; CHECK-NEXT: .loh AdrpLdr Lloh22, Lloh25
697 br label %vector.body
700 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
701 %.idx = shl nsw i64 %index, 3
702 %g = getelementptr inbounds i8, ptr %x, i64 %.idx
703 %wide.vec = load <8 x i16>, ptr %g, align 2
704 %u = uitofp <8 x i16> %wide.vec to <8 x double>
705 %h = getelementptr inbounds double, ptr %y, i64 %index
706 store <8 x double> %u, ptr %h, align 8
707 %index.next = add nuw i64 %index, 8
708 %c = icmp eq i64 %index.next, 1024
709 br i1 %c, label %for.cond.cleanup, label %vector.body
715 define void @uitofp_ld4_v32i16_to_v8f64(ptr nocapture noundef readonly %x, ptr nocapture noundef writeonly %y, i32 noundef %n) {
716 ; CHECK-LABEL: uitofp_ld4_v32i16_to_v8f64:
717 ; CHECK: ; %bb.0: ; %entry
718 ; CHECK-NEXT: Lloh30:
719 ; CHECK-NEXT: adrp x8, lCPI11_0@PAGE
720 ; CHECK-NEXT: Lloh31:
721 ; CHECK-NEXT: adrp x9, lCPI11_1@PAGE
722 ; CHECK-NEXT: Lloh32:
723 ; CHECK-NEXT: adrp x10, lCPI11_2@PAGE
724 ; CHECK-NEXT: Lloh33:
725 ; CHECK-NEXT: ldr q0, [x8, lCPI11_0@PAGEOFF]
726 ; CHECK-NEXT: Lloh34:
727 ; CHECK-NEXT: adrp x8, lCPI11_3@PAGE
728 ; CHECK-NEXT: Lloh35:
729 ; CHECK-NEXT: ldr q1, [x9, lCPI11_1@PAGEOFF]
730 ; CHECK-NEXT: Lloh36:
731 ; CHECK-NEXT: ldr q2, [x10, lCPI11_2@PAGEOFF]
732 ; CHECK-NEXT: Lloh37:
733 ; CHECK-NEXT: ldr q3, [x8, lCPI11_3@PAGEOFF]
734 ; CHECK-NEXT: mov x8, xzr
735 ; CHECK-NEXT: LBB11_1: ; %vector.body
736 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
737 ; CHECK-NEXT: add x9, x0, x8
738 ; CHECK-NEXT: ldp q5, q4, [x9, #32]
739 ; CHECK-NEXT: ldp q7, q6, [x9]
740 ; CHECK-NEXT: add x9, x1, x8
741 ; CHECK-NEXT: add x8, x8, #64
742 ; CHECK-NEXT: tbl.16b v16, { v4 }, v0
743 ; CHECK-NEXT: tbl.16b v17, { v5 }, v0
744 ; CHECK-NEXT: tbl.16b v21, { v4 }, v1
745 ; CHECK-NEXT: tbl.16b v18, { v6 }, v0
746 ; CHECK-NEXT: tbl.16b v19, { v7 }, v0
747 ; CHECK-NEXT: tbl.16b v20, { v7 }, v1
748 ; CHECK-NEXT: tbl.16b v22, { v5 }, v1
749 ; CHECK-NEXT: tbl.16b v23, { v5 }, v2
750 ; CHECK-NEXT: tbl.16b v24, { v4 }, v2
751 ; CHECK-NEXT: tbl.16b v25, { v7 }, v2
752 ; CHECK-NEXT: tbl.16b v5, { v5 }, v3
753 ; CHECK-NEXT: tbl.16b v4, { v4 }, v3
754 ; CHECK-NEXT: tbl.16b v7, { v7 }, v3
755 ; CHECK-NEXT: tbl.16b v26, { v6 }, v1
756 ; CHECK-NEXT: tbl.16b v27, { v6 }, v2
757 ; CHECK-NEXT: tbl.16b v6, { v6 }, v3
758 ; CHECK-NEXT: ucvtf.2d v17, v17
759 ; CHECK-NEXT: ucvtf.2d v16, v16
760 ; CHECK-NEXT: ucvtf.2d v19, v19
761 ; CHECK-NEXT: ucvtf.2d v18, v18
762 ; CHECK-NEXT: ucvtf.2d v22, v22
763 ; CHECK-NEXT: ucvtf.2d v23, v23
764 ; CHECK-NEXT: ucvtf.2d v5, v5
765 ; CHECK-NEXT: ucvtf.2d v21, v21
766 ; CHECK-NEXT: ucvtf.2d v24, v24
767 ; CHECK-NEXT: ucvtf.2d v4, v4
768 ; CHECK-NEXT: cmp x8, #2, lsl #12 ; =8192
769 ; CHECK-NEXT: ucvtf.2d v20, v20
770 ; CHECK-NEXT: ucvtf.2d v25, v25
771 ; CHECK-NEXT: ucvtf.2d v7, v7
772 ; CHECK-NEXT: ucvtf.2d v26, v26
773 ; CHECK-NEXT: ucvtf.2d v27, v27
774 ; CHECK-NEXT: ucvtf.2d v6, v6
775 ; CHECK-NEXT: fadd.2d v17, v22, v17
776 ; CHECK-NEXT: fadd.2d v5, v23, v5
777 ; CHECK-NEXT: fadd.2d v16, v21, v16
778 ; CHECK-NEXT: fadd.2d v4, v24, v4
779 ; CHECK-NEXT: fadd.2d v19, v20, v19
780 ; CHECK-NEXT: fadd.2d v7, v25, v7
781 ; CHECK-NEXT: fadd.2d v18, v26, v18
782 ; CHECK-NEXT: fadd.2d v6, v27, v6
783 ; CHECK-NEXT: fadd.2d v5, v17, v5
784 ; CHECK-NEXT: fadd.2d v4, v16, v4
785 ; CHECK-NEXT: fadd.2d v7, v19, v7
786 ; CHECK-NEXT: fadd.2d v6, v18, v6
787 ; CHECK-NEXT: stp q5, q4, [x9, #32]
788 ; CHECK-NEXT: stp q7, q6, [x9]
789 ; CHECK-NEXT: b.ne LBB11_1
790 ; CHECK-NEXT: ; %bb.2: ; %for.cond.cleanup
792 ; CHECK-NEXT: .loh AdrpLdr Lloh34, Lloh37
793 ; CHECK-NEXT: .loh AdrpLdr Lloh32, Lloh36
794 ; CHECK-NEXT: .loh AdrpLdr Lloh31, Lloh35
795 ; CHECK-NEXT: .loh AdrpAdrp Lloh30, Lloh34
796 ; CHECK-NEXT: .loh AdrpLdr Lloh30, Lloh33
798 br label %vector.body
801 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
802 %.idx = shl nsw i64 %index, 3
803 %0 = getelementptr inbounds i8, ptr %x, i64 %.idx
804 %wide.vec = load <32 x i16>, ptr %0, align 2
805 %strided.vec = shufflevector <32 x i16> %wide.vec, <32 x i16> poison, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
806 %strided.vec36 = shufflevector <32 x i16> %wide.vec, <32 x i16> poison, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
807 %strided.vec37 = shufflevector <32 x i16> %wide.vec, <32 x i16> poison, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
808 %strided.vec38 = shufflevector <32 x i16> %wide.vec, <32 x i16> poison, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
809 %1 = uitofp <8 x i16> %strided.vec to <8 x double>
810 %2 = uitofp <8 x i16> %strided.vec36 to <8 x double>
811 %3 = fadd fast <8 x double> %2, %1
812 %4 = uitofp <8 x i16> %strided.vec37 to <8 x double>
813 %5 = fadd fast <8 x double> %3, %4
814 %6 = uitofp <8 x i16> %strided.vec38 to <8 x double>
815 %7 = fadd fast <8 x double> %5, %6
816 %8 = getelementptr inbounds double, ptr %y, i64 %index
817 store <8 x double> %7, ptr %8, align 8
818 %index.next = add nuw i64 %index, 8
819 %9 = icmp eq i64 %index.next, 1024
820 br i1 %9, label %for.cond.cleanup, label %vector.body