1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -o - %s | FileCheck %s
4 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
5 target triple = "arm64-apple-ios"
7 ; CHECK-LABEL: lCPI0_0:
8 ; CHECK-NEXT: .byte 0 ; 0x0
9 ; CHECK-NEXT: .byte 4 ; 0x4
10 ; CHECK-NEXT: .byte 8 ; 0x8
11 ; CHECK-NEXT: .byte 12 ; 0xc
12 ; CHECK-NEXT: .byte 16 ; 0x10
13 ; CHECK-NEXT: .byte 20 ; 0x14
14 ; CHECK-NEXT: .byte 24 ; 0x18
15 ; CHECK-NEXT: .byte 28 ; 0x1c
16 ; CHECK-NEXT: .byte 255 ; 0xff
17 ; CHECK-NEXT: .byte 255 ; 0xff
18 ; CHECK-NEXT: .byte 255 ; 0xff
19 ; CHECK-NEXT: .byte 255 ; 0xff
20 ; CHECK-NEXT: .byte 255 ; 0xff
21 ; CHECK-NEXT: .byte 255 ; 0xff
22 ; CHECK-NEXT: .byte 255 ; 0xff
23 ; CHECK-NEXT: .byte 255 ; 0xff
25 ; It's profitable to convert the fptoui float -> i8 to first convert from
26 ; float -> i32 and then use tbl for the truncate in a loop, so the mask can be
27 ; materialized outside the loop.
28 define void @fptoui_v8f32_to_v8i8_in_loop(ptr %A, ptr %dst) {
29 ; CHECK-LABEL: fptoui_v8f32_to_v8i8_in_loop:
30 ; CHECK: ; %bb.0: ; %entry
32 ; CHECK-NEXT: adrp x8, lCPI0_0@PAGE
34 ; CHECK-NEXT: ldr q0, [x8, lCPI0_0@PAGEOFF]
35 ; CHECK-NEXT: mov x8, xzr
36 ; CHECK-NEXT: LBB0_1: ; %loop
37 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
38 ; CHECK-NEXT: add x9, x0, x8, lsl #5
39 ; CHECK-NEXT: add x8, x8, #1
40 ; CHECK-NEXT: cmp x8, #1000
41 ; CHECK-NEXT: ldp q2, q1, [x9]
42 ; CHECK-NEXT: fcvtzu.4s v4, v1
43 ; CHECK-NEXT: fcvtzu.4s v3, v2
44 ; CHECK-NEXT: tbl.16b v1, { v3, v4 }, v0
45 ; CHECK-NEXT: str d1, [x1], #16
46 ; CHECK-NEXT: b.eq LBB0_1
47 ; CHECK-NEXT: ; %bb.2: ; %exit
49 ; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1
54 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
55 %gep.A = getelementptr inbounds <8 x float>, ptr %A, i64 %iv
56 %l.A = load <8 x float>, ptr %gep.A
57 %c = fptoui <8 x float> %l.A to <8 x i8>
58 %gep.dst = getelementptr inbounds <16 x i8>, ptr %dst, i64 %iv
59 store <8 x i8> %c, ptr %gep.dst
60 %iv.next = add i64 %iv, 1
61 %ec = icmp eq i64 %iv.next, 1000
62 br i1 %ec, label %loop, label %exit
68 ; Not profitable to use tbl, as materializing the masks requires more
70 define void @fptoui_v8f32_to_v8i8_no_loop(ptr %A, ptr %dst) {
71 ; CHECK-LABEL: fptoui_v8f32_to_v8i8_no_loop:
72 ; CHECK: ; %bb.0: ; %entry
73 ; CHECK-NEXT: ldp q0, q1, [x0]
74 ; CHECK-NEXT: fcvtzs.4s v1, v1
75 ; CHECK-NEXT: fcvtzs.4s v0, v0
76 ; CHECK-NEXT: xtn.4h v1, v1
77 ; CHECK-NEXT: xtn.4h v0, v0
78 ; CHECK-NEXT: uzp1.8b v0, v0, v1
79 ; CHECK-NEXT: str d0, [x1]
82 %l.A = load <8 x float>, ptr %A
83 %c = fptoui <8 x float> %l.A to <8 x i8>
84 store <8 x i8> %c, ptr %dst
88 ; CHECK-LABEL: lCPI2_0:
89 ; CHECK-NEXT: .byte 0 ; 0x0
90 ; CHECK-NEXT: .byte 4 ; 0x4
91 ; CHECK-NEXT: .byte 8 ; 0x8
92 ; CHECK-NEXT: .byte 12 ; 0xc
93 ; CHECK-NEXT: .byte 16 ; 0x10
94 ; CHECK-NEXT: .byte 20 ; 0x14
95 ; CHECK-NEXT: .byte 24 ; 0x18
96 ; CHECK-NEXT: .byte 28 ; 0x1c
97 ; CHECK-NEXT: .byte 32 ; 0x20
98 ; CHECK-NEXT: .byte 36 ; 0x24
99 ; CHECK-NEXT: .byte 40 ; 0x28
100 ; CHECK-NEXT: .byte 44 ; 0x2c
101 ; CHECK-NEXT: .byte 48 ; 0x30
102 ; CHECK-NEXT: .byte 52 ; 0x34
103 ; CHECK-NEXT: .byte 56 ; 0x38
104 ; CHECK-NEXT: .byte 60 ; 0x3c
106 ; Tbl can also be used when combining multiple fptoui using a shuffle. The loop
107 ; vectorizer may create such patterns.
108 define void @fptoui_2x_v8f32_to_v8i8_in_loop(ptr %A, ptr %B, ptr %dst) {
109 ; CHECK-LABEL: fptoui_2x_v8f32_to_v8i8_in_loop:
110 ; CHECK: ; %bb.0: ; %entry
112 ; CHECK-NEXT: adrp x8, lCPI2_0@PAGE
114 ; CHECK-NEXT: ldr q0, [x8, lCPI2_0@PAGEOFF]
115 ; CHECK-NEXT: mov x8, xzr
116 ; CHECK-NEXT: LBB2_1: ; %loop
117 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
118 ; CHECK-NEXT: lsl x9, x8, #5
119 ; CHECK-NEXT: add x10, x0, x9
120 ; CHECK-NEXT: add x9, x1, x9
121 ; CHECK-NEXT: ldp q2, q1, [x10]
122 ; CHECK-NEXT: fcvtzu.4s v5, v1
123 ; CHECK-NEXT: ldp q1, q3, [x9]
124 ; CHECK-NEXT: fcvtzu.4s v4, v2
125 ; CHECK-NEXT: fcvtzu.4s v7, v3
126 ; CHECK-NEXT: fcvtzu.4s v6, v1
127 ; CHECK-NEXT: tbl.16b v1, { v4, v5, v6, v7 }, v0
128 ; CHECK-NEXT: str q1, [x2, x8, lsl #4]
129 ; CHECK-NEXT: add x8, x8, #1
130 ; CHECK-NEXT: cmp x8, #1000
131 ; CHECK-NEXT: b.eq LBB2_1
132 ; CHECK-NEXT: ; %bb.2: ; %exit
134 ; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh3
139 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
140 %gep.A = getelementptr inbounds <8 x float>, ptr %A, i64 %iv
141 %gep.B = getelementptr inbounds <8x float>, ptr %B, i64 %iv
142 %l.A = load <8 x float>, ptr %gep.A
143 %l.B = load <8 x float>, ptr %gep.B
144 %c1 = fptoui <8 x float> %l.A to <8 x i8>
145 %c2 = fptoui <8 x float> %l.B to <8 x i8>
146 %s = shufflevector <8 x i8> %c1, <8 x i8> %c2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
147 %gep.dst = getelementptr inbounds <16 x i8>, ptr %dst, i64 %iv
148 store <16 x i8> %s, ptr %gep.dst
149 %iv.next = add i64 %iv, 1
150 %ec = icmp eq i64 %iv.next, 1000
151 br i1 %ec, label %loop, label %exit
157 ; CHECK-LABEL: lCPI3_0:
158 ; CHECK-NEXT: .byte 0 ; 0x0
159 ; CHECK-NEXT: .byte 36 ; 0x24
160 ; CHECK-NEXT: .byte 8 ; 0x8
161 ; CHECK-NEXT: .byte 12 ; 0xc
162 ; CHECK-NEXT: .byte 16 ; 0x10
163 ; CHECK-NEXT: .byte 20 ; 0x14
164 ; CHECK-NEXT: .byte 24 ; 0x18
165 ; CHECK-NEXT: .byte 44 ; 0x2c
166 ; CHECK-NEXT: .byte 32 ; 0x20
167 ; CHECK-NEXT: .byte 36 ; 0x24
168 ; CHECK-NEXT: .byte 40 ; 0x28
169 ; CHECK-NEXT: .byte 44 ; 0x2c
170 ; CHECK-NEXT: .byte 48 ; 0x30
171 ; CHECK-NEXT: .byte 12 ; 0xc
172 ; CHECK-NEXT: .byte 56 ; 0x38
173 ; CHECK-NEXT: .byte 60 ; 0x3c
175 define void @fptoui_2x_v8f32_to_v8i8_in_loop_no_concat_shuffle(ptr %A, ptr %B, ptr %dst) {
176 ; CHECK-LABEL: fptoui_2x_v8f32_to_v8i8_in_loop_no_concat_shuffle:
177 ; CHECK: ; %bb.0: ; %entry
179 ; CHECK-NEXT: adrp x8, lCPI3_0@PAGE
181 ; CHECK-NEXT: ldr q0, [x8, lCPI3_0@PAGEOFF]
182 ; CHECK-NEXT: mov x8, xzr
183 ; CHECK-NEXT: LBB3_1: ; %loop
184 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
185 ; CHECK-NEXT: lsl x9, x8, #5
186 ; CHECK-NEXT: add x10, x0, x9
187 ; CHECK-NEXT: add x9, x1, x9
188 ; CHECK-NEXT: ldp q2, q1, [x10]
189 ; CHECK-NEXT: fcvtzu.4s v5, v1
190 ; CHECK-NEXT: ldp q1, q3, [x9]
191 ; CHECK-NEXT: fcvtzu.4s v4, v2
192 ; CHECK-NEXT: fcvtzu.4s v7, v3
193 ; CHECK-NEXT: fcvtzu.4s v6, v1
194 ; CHECK-NEXT: tbl.16b v1, { v4, v5, v6, v7 }, v0
195 ; CHECK-NEXT: str q1, [x2, x8, lsl #4]
196 ; CHECK-NEXT: add x8, x8, #1
197 ; CHECK-NEXT: cmp x8, #1000
198 ; CHECK-NEXT: b.eq LBB3_1
199 ; CHECK-NEXT: ; %bb.2: ; %exit
201 ; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh5
206 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
207 %gep.A = getelementptr inbounds <8 x float>, ptr %A, i64 %iv
208 %gep.B = getelementptr inbounds <8x float>, ptr %B, i64 %iv
209 %l.A = load <8 x float>, ptr %gep.A
210 %l.B = load <8 x float>, ptr %gep.B
211 %c1 = fptoui <8 x float> %l.A to <8 x i8>
212 %c2 = fptoui <8 x float> %l.B to <8 x i8>
213 %s = shufflevector <8 x i8> %c1, <8 x i8> %c2, <16 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 11, i32 8, i32 9, i32 10, i32 11, i32 12, i32 3, i32 14, i32 15>
214 %gep.dst = getelementptr inbounds <16 x i8>, ptr %dst, i64 %iv
215 store <16 x i8> %s, ptr %gep.dst
216 %iv.next = add i64 %iv, 1
217 %ec = icmp eq i64 %iv.next, 1000
218 br i1 %ec, label %loop, label %exit
224 ; CHECK-LABEL: lCPI4_0:
225 ; CHECK-NEXT: .byte 0 ; 0x0
226 ; CHECK-NEXT: .byte 4 ; 0x4
227 ; CHECK-NEXT: .byte 8 ; 0x8
228 ; CHECK-NEXT: .byte 12 ; 0xc
229 ; CHECK-NEXT: .byte 16 ; 0x10
230 ; CHECK-NEXT: .byte 20 ; 0x14
231 ; CHECK-NEXT: .byte 24 ; 0x18
232 ; CHECK-NEXT: .byte 28 ; 0x1c
233 ; CHECK-NEXT: .byte 32 ; 0x20
234 ; CHECK-NEXT: .byte 36 ; 0x24
235 ; CHECK-NEXT: .byte 40 ; 0x28
236 ; CHECK-NEXT: .byte 44 ; 0x2c
237 ; CHECK-NEXT: .byte 48 ; 0x30
238 ; CHECK-NEXT: .byte 52 ; 0x34
239 ; CHECK-NEXT: .byte 56 ; 0x38
240 ; CHECK-NEXT: .byte 60 ; 0x3c
242 define void @fptoui_v16f32_to_v16i8_in_loop(ptr %A, ptr %dst) {
243 ; CHECK-LABEL: fptoui_v16f32_to_v16i8_in_loop:
244 ; CHECK: ; %bb.0: ; %entry
246 ; CHECK-NEXT: adrp x8, lCPI4_0@PAGE
248 ; CHECK-NEXT: ldr q0, [x8, lCPI4_0@PAGEOFF]
249 ; CHECK-NEXT: mov x8, xzr
250 ; CHECK-NEXT: LBB4_1: ; %loop
251 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
252 ; CHECK-NEXT: add x9, x0, x8, lsl #6
253 ; CHECK-NEXT: add x8, x8, #1
254 ; CHECK-NEXT: cmp x8, #1000
255 ; CHECK-NEXT: ldp q2, q1, [x9, #32]
256 ; CHECK-NEXT: fcvtzu.4s v7, v1
257 ; CHECK-NEXT: ldp q1, q3, [x9]
258 ; CHECK-NEXT: fcvtzu.4s v6, v2
259 ; CHECK-NEXT: fcvtzu.4s v5, v3
260 ; CHECK-NEXT: fcvtzu.4s v4, v1
261 ; CHECK-NEXT: tbl.16b v1, { v4, v5, v6, v7 }, v0
262 ; CHECK-NEXT: str q1, [x1], #32
263 ; CHECK-NEXT: b.eq LBB4_1
264 ; CHECK-NEXT: ; %bb.2: ; %exit
266 ; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh7
271 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
272 %gep.A = getelementptr inbounds <16 x float>, ptr %A, i64 %iv
273 %l.A = load <16 x float>, ptr %gep.A
274 %c = fptoui <16 x float> %l.A to <16 x i8>
275 %gep.dst = getelementptr inbounds <32 x i8>, ptr %dst, i64 %iv
276 store <16 x i8> %c, ptr %gep.dst
277 %iv.next = add i64 %iv, 1
278 %ec = icmp eq i64 %iv.next, 1000
279 br i1 %ec, label %loop, label %exit
285 ; CHECK-LABEL: lCPI5_0:
286 ; CHECK-NEXT: .byte 0 ; 0x0
287 ; CHECK-NEXT: .byte 4 ; 0x4
288 ; CHECK-NEXT: .byte 8 ; 0x8
289 ; CHECK-NEXT: .byte 12 ; 0xc
290 ; CHECK-NEXT: .byte 16 ; 0x10
291 ; CHECK-NEXT: .byte 20 ; 0x14
292 ; CHECK-NEXT: .byte 24 ; 0x18
293 ; CHECK-NEXT: .byte 28 ; 0x1c
294 ; CHECK-NEXT: .byte 32 ; 0x20
295 ; CHECK-NEXT: .byte 36 ; 0x24
296 ; CHECK-NEXT: .byte 40 ; 0x28
297 ; CHECK-NEXT: .byte 44 ; 0x2c
298 ; CHECK-NEXT: .byte 48 ; 0x30
299 ; CHECK-NEXT: .byte 52 ; 0x34
300 ; CHECK-NEXT: .byte 56 ; 0x38
301 ; CHECK-NEXT: .byte 60 ; 0x3c
303 define void @fptoui_2x_v16f32_to_v16i8_in_loop(ptr %A, ptr %B, ptr %dst) {
304 ; CHECK-LABEL: fptoui_2x_v16f32_to_v16i8_in_loop:
305 ; CHECK: ; %bb.0: ; %entry
307 ; CHECK-NEXT: adrp x8, lCPI5_0@PAGE
309 ; CHECK-NEXT: ldr q0, [x8, lCPI5_0@PAGEOFF]
310 ; CHECK-NEXT: mov x8, xzr
311 ; CHECK-NEXT: LBB5_1: ; %loop
312 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
313 ; CHECK-NEXT: lsl x9, x8, #6
314 ; CHECK-NEXT: add x10, x1, x9
315 ; CHECK-NEXT: add x9, x0, x9
316 ; CHECK-NEXT: ldp q2, q1, [x10, #32]
317 ; CHECK-NEXT: ldp q3, q4, [x9, #32]
318 ; CHECK-NEXT: ldp q5, q6, [x10]
319 ; CHECK-NEXT: fcvtzu.4s v19, v1
320 ; CHECK-NEXT: fcvtzu.4s v18, v2
321 ; CHECK-NEXT: ldp q2, q1, [x9]
322 ; CHECK-NEXT: fcvtzu.4s v23, v4
323 ; CHECK-NEXT: fcvtzu.4s v17, v6
324 ; CHECK-NEXT: add x9, x2, x8, lsl #5
325 ; CHECK-NEXT: fcvtzu.4s v22, v3
326 ; CHECK-NEXT: fcvtzu.4s v16, v5
327 ; CHECK-NEXT: add x8, x8, #1
328 ; CHECK-NEXT: fcvtzu.4s v21, v1
329 ; CHECK-NEXT: cmp x8, #1000
330 ; CHECK-NEXT: fcvtzu.4s v20, v2
331 ; CHECK-NEXT: tbl.16b v1, { v16, v17, v18, v19 }, v0
332 ; CHECK-NEXT: tbl.16b v2, { v20, v21, v22, v23 }, v0
333 ; CHECK-NEXT: stp q2, q1, [x9]
334 ; CHECK-NEXT: b.eq LBB5_1
335 ; CHECK-NEXT: ; %bb.2: ; %exit
337 ; CHECK-NEXT: .loh AdrpLdr Lloh8, Lloh9
342 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
343 %gep.A = getelementptr inbounds <16 x float>, ptr %A, i64 %iv
344 %gep.B = getelementptr inbounds <16 x float>, ptr %B, i64 %iv
345 %l.A = load <16 x float>, ptr %gep.A
346 %l.B = load <16 x float>, ptr %gep.B
347 %c1 = fptoui <16 x float> %l.A to <16 x i8>
348 %c2 = fptoui <16 x float> %l.B to <16 x i8>
349 %s = shufflevector <16 x i8> %c1, <16 x i8> %c2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
350 %gep.dst = getelementptr inbounds <32 x i8>, ptr %dst, i64 %iv
351 store <32 x i8> %s, ptr %gep.dst
352 %iv.next = add i64 %iv, 1
353 %ec = icmp eq i64 %iv.next, 1000
354 br i1 %ec, label %loop, label %exit
360 define void @fptoui_v8f32_to_v8i16_in_loop(ptr %A, ptr %dst) {
361 ; CHECK-LABEL: fptoui_v8f32_to_v8i16_in_loop:
362 ; CHECK: ; %bb.0: ; %entry
363 ; CHECK-NEXT: mov x8, xzr
364 ; CHECK-NEXT: LBB6_1: ; %loop
365 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
366 ; CHECK-NEXT: add x9, x0, x8, lsl #5
367 ; CHECK-NEXT: ldp q0, q1, [x9]
368 ; CHECK-NEXT: fcvtzu.4s v1, v1
369 ; CHECK-NEXT: fcvtzu.4s v0, v0
370 ; CHECK-NEXT: uzp1.8h v0, v0, v1
371 ; CHECK-NEXT: str q0, [x1, x8, lsl #4]
372 ; CHECK-NEXT: add x8, x8, #1
373 ; CHECK-NEXT: cmp x8, #1000
374 ; CHECK-NEXT: b.eq LBB6_1
375 ; CHECK-NEXT: ; %bb.2: ; %exit
381 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
382 %gep.A = getelementptr inbounds <8 x float>, ptr %A, i64 %iv
383 %l.A = load <8 x float>, ptr %gep.A
384 %c = fptoui <8 x float> %l.A to <8 x i16>
385 %gep.dst = getelementptr inbounds <8 x i16>, ptr %dst, i64 %iv
386 store <8 x i16> %c, ptr %gep.dst
387 %iv.next = add i64 %iv, 1
388 %ec = icmp eq i64 %iv.next, 1000
389 br i1 %ec, label %loop, label %exit
395 define void @fptoui_2x_v8f32_to_v8i16_in_loop(ptr %A, ptr %B, ptr %dst) {
396 ; CHECK-LABEL: fptoui_2x_v8f32_to_v8i16_in_loop:
397 ; CHECK: ; %bb.0: ; %entry
398 ; CHECK-NEXT: mov x8, xzr
399 ; CHECK-NEXT: LBB7_1: ; %loop
400 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
401 ; CHECK-NEXT: lsl x9, x8, #5
402 ; CHECK-NEXT: add x8, x8, #1
403 ; CHECK-NEXT: cmp x8, #1000
404 ; CHECK-NEXT: add x10, x0, x9
405 ; CHECK-NEXT: add x11, x1, x9
406 ; CHECK-NEXT: add x9, x2, x9
407 ; CHECK-NEXT: ldp q0, q1, [x10]
408 ; CHECK-NEXT: ldp q2, q3, [x11]
409 ; CHECK-NEXT: fcvtzu.4s v1, v1
410 ; CHECK-NEXT: fcvtzu.4s v0, v0
411 ; CHECK-NEXT: fcvtzu.4s v3, v3
412 ; CHECK-NEXT: fcvtzu.4s v2, v2
413 ; CHECK-NEXT: uzp1.8h v0, v0, v1
414 ; CHECK-NEXT: uzp1.8h v1, v2, v3
415 ; CHECK-NEXT: stp q0, q1, [x9]
416 ; CHECK-NEXT: b.eq LBB7_1
417 ; CHECK-NEXT: ; %bb.2: ; %exit
423 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
424 %gep.A = getelementptr inbounds <8 x float>, ptr %A, i64 %iv
425 %gep.B = getelementptr inbounds <8 x float>, ptr %B, i64 %iv
426 %l.A = load <8 x float>, ptr %gep.A
427 %l.B = load <8 x float>, ptr %gep.B
428 %c1 = fptoui <8 x float> %l.A to <8 x i16>
429 %c2 = fptoui <8 x float> %l.B to <8 x i16>
430 %s = shufflevector <8 x i16> %c1, <8 x i16> %c2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
431 %gep.dst = getelementptr inbounds <16 x i16>, ptr %dst, i64 %iv
432 store <16 x i16> %s, ptr %gep.dst
433 %iv.next = add i64 %iv, 1
434 %ec = icmp eq i64 %iv.next, 1000
435 br i1 %ec, label %loop, label %exit
441 ; CHECK-LABEL: lCPI8_0:
442 ; CHECK-NEXT: .byte 4 ; 0x4
443 ; CHECK-NEXT: .byte 255 ; 0xff
444 ; CHECK-NEXT: .byte 255 ; 0xff
445 ; CHECK-NEXT: .byte 255 ; 0xff
446 ; CHECK-NEXT: .byte 5 ; 0x5
447 ; CHECK-NEXT: .byte 255 ; 0xff
448 ; CHECK-NEXT: .byte 255 ; 0xff
449 ; CHECK-NEXT: .byte 255 ; 0xff
450 ; CHECK-NEXT: .byte 6 ; 0x6
451 ; CHECK-NEXT: .byte 255 ; 0xff
452 ; CHECK-NEXT: .byte 255 ; 0xff
453 ; CHECK-NEXT: .byte 255 ; 0xff
454 ; CHECK-NEXT: .byte 7 ; 0x7
455 ; CHECK-NEXT: .byte 255 ; 0xff
456 ; CHECK-NEXT: .byte 255 ; 0xff
457 ; CHECK-NEXT: .byte 255 ; 0xff
458 ; CHECK-NEXT: lCPI8_1:
459 ; CHECK-NEXT: .byte 0 ; 0x0
460 ; CHECK-NEXT: .byte 255 ; 0xff
461 ; CHECK-NEXT: .byte 255 ; 0xff
462 ; CHECK-NEXT: .byte 255 ; 0xff
463 ; CHECK-NEXT: .byte 1 ; 0x1
464 ; CHECK-NEXT: .byte 255 ; 0xff
465 ; CHECK-NEXT: .byte 255 ; 0xff
466 ; CHECK-NEXT: .byte 255 ; 0xff
467 ; CHECK-NEXT: .byte 2 ; 0x2
468 ; CHECK-NEXT: .byte 255 ; 0xff
469 ; CHECK-NEXT: .byte 255 ; 0xff
470 ; CHECK-NEXT: .byte 255 ; 0xff
471 ; CHECK-NEXT: .byte 3 ; 0x3
472 ; CHECK-NEXT: .byte 255 ; 0xff
473 ; CHECK-NEXT: .byte 255 ; 0xff
474 ; CHECK-NEXT: .byte 255 ; 0xff
476 define void @uitofp_v8i8_to_v8f32(ptr %src, ptr %dst) {
477 ; CHECK-LABEL: uitofp_v8i8_to_v8f32:
478 ; CHECK: ; %bb.0: ; %entry
479 ; CHECK-NEXT: Lloh10:
480 ; CHECK-NEXT: adrp x8, lCPI8_0@PAGE
481 ; CHECK-NEXT: Lloh11:
482 ; CHECK-NEXT: adrp x9, lCPI8_1@PAGE
483 ; CHECK-NEXT: Lloh12:
484 ; CHECK-NEXT: ldr q0, [x8, lCPI8_0@PAGEOFF]
485 ; CHECK-NEXT: Lloh13:
486 ; CHECK-NEXT: ldr q1, [x9, lCPI8_1@PAGEOFF]
487 ; CHECK-NEXT: mov x8, xzr
488 ; CHECK-NEXT: LBB8_1: ; %loop
489 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
490 ; CHECK-NEXT: ldr d2, [x0, x8, lsl #3]
491 ; CHECK-NEXT: add x9, x1, x8, lsl #5
492 ; CHECK-NEXT: add x8, x8, #1
493 ; CHECK-NEXT: cmp x8, #1000
494 ; CHECK-NEXT: tbl.16b v3, { v2 }, v0
495 ; CHECK-NEXT: tbl.16b v2, { v2 }, v1
496 ; CHECK-NEXT: ucvtf.4s v3, v3
497 ; CHECK-NEXT: ucvtf.4s v2, v2
498 ; CHECK-NEXT: stp q2, q3, [x9]
499 ; CHECK-NEXT: b.eq LBB8_1
500 ; CHECK-NEXT: ; %bb.2: ; %exit
502 ; CHECK-NEXT: .loh AdrpLdr Lloh11, Lloh13
503 ; CHECK-NEXT: .loh AdrpLdr Lloh10, Lloh12
508 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
509 %gep.src = getelementptr inbounds <8 x i8>, ptr %src, i64 %iv
510 %l = load <8 x i8>, ptr %gep.src
511 %conv = uitofp <8 x i8> %l to <8 x float>
512 %gep.dst = getelementptr inbounds <8 x float>, ptr %dst, i64 %iv
513 store <8 x float> %conv, ptr %gep.dst
514 %iv.next = add i64 %iv, 1
515 %ec = icmp eq i64 %iv.next, 1000
516 br i1 %ec, label %loop, label %exit
522 ; CHECK-LABEL: lCPI9_0:
523 ; CHECK-NEXT: .byte 12 ; 0xc
524 ; CHECK-NEXT: .byte 255 ; 0xff
525 ; CHECK-NEXT: .byte 255 ; 0xff
526 ; CHECK-NEXT: .byte 255 ; 0xff
527 ; CHECK-NEXT: .byte 13 ; 0xd
528 ; CHECK-NEXT: .byte 255 ; 0xff
529 ; CHECK-NEXT: .byte 255 ; 0xff
530 ; CHECK-NEXT: .byte 255 ; 0xff
531 ; CHECK-NEXT: .byte 14 ; 0xe
532 ; CHECK-NEXT: .byte 255 ; 0xff
533 ; CHECK-NEXT: .byte 255 ; 0xff
534 ; CHECK-NEXT: .byte 255 ; 0xff
535 ; CHECK-NEXT: .byte 15 ; 0xf
536 ; CHECK-NEXT: .byte 255 ; 0xff
537 ; CHECK-NEXT: .byte 255 ; 0xff
538 ; CHECK-NEXT: .byte 255 ; 0xff
539 ; CHECK-NEXT: lCPI9_1:
540 ; CHECK-NEXT: .byte 8 ; 0x8
541 ; CHECK-NEXT: .byte 255 ; 0xff
542 ; CHECK-NEXT: .byte 255 ; 0xff
543 ; CHECK-NEXT: .byte 255 ; 0xff
544 ; CHECK-NEXT: .byte 9 ; 0x9
545 ; CHECK-NEXT: .byte 255 ; 0xff
546 ; CHECK-NEXT: .byte 255 ; 0xff
547 ; CHECK-NEXT: .byte 255 ; 0xff
548 ; CHECK-NEXT: .byte 10 ; 0xa
549 ; CHECK-NEXT: .byte 255 ; 0xff
550 ; CHECK-NEXT: .byte 255 ; 0xff
551 ; CHECK-NEXT: .byte 255 ; 0xff
552 ; CHECK-NEXT: .byte 11 ; 0xb
553 ; CHECK-NEXT: .byte 255 ; 0xff
554 ; CHECK-NEXT: .byte 255 ; 0xff
555 ; CHECK-NEXT: .byte 255 ; 0xff
556 ; CHECK-NEXT: lCPI9_2:
557 ; CHECK-NEXT: .byte 4 ; 0x4
558 ; CHECK-NEXT: .byte 255 ; 0xff
559 ; CHECK-NEXT: .byte 255 ; 0xff
560 ; CHECK-NEXT: .byte 255 ; 0xff
561 ; CHECK-NEXT: .byte 5 ; 0x5
562 ; CHECK-NEXT: .byte 255 ; 0xff
563 ; CHECK-NEXT: .byte 255 ; 0xff
564 ; CHECK-NEXT: .byte 255 ; 0xff
565 ; CHECK-NEXT: .byte 6 ; 0x6
566 ; CHECK-NEXT: .byte 255 ; 0xff
567 ; CHECK-NEXT: .byte 255 ; 0xff
568 ; CHECK-NEXT: .byte 255 ; 0xff
569 ; CHECK-NEXT: .byte 7 ; 0x7
570 ; CHECK-NEXT: .byte 255 ; 0xff
571 ; CHECK-NEXT: .byte 255 ; 0xff
572 ; CHECK-NEXT: .byte 255 ; 0xff
573 ; CHECK-NEXT: lCPI9_3:
574 ; CHECK-NEXT: .byte 0 ; 0x0
575 ; CHECK-NEXT: .byte 255 ; 0xff
576 ; CHECK-NEXT: .byte 255 ; 0xff
577 ; CHECK-NEXT: .byte 255 ; 0xff
578 ; CHECK-NEXT: .byte 1 ; 0x1
579 ; CHECK-NEXT: .byte 255 ; 0xff
580 ; CHECK-NEXT: .byte 255 ; 0xff
581 ; CHECK-NEXT: .byte 255 ; 0xff
582 ; CHECK-NEXT: .byte 2 ; 0x2
583 ; CHECK-NEXT: .byte 255 ; 0xff
584 ; CHECK-NEXT: .byte 255 ; 0xff
585 ; CHECK-NEXT: .byte 255 ; 0xff
586 ; CHECK-NEXT: .byte 3 ; 0x3
587 ; CHECK-NEXT: .byte 255 ; 0xff
588 ; CHECK-NEXT: .byte 255 ; 0xff
589 ; CHECK-NEXT: .byte 255 ; 0xff
591 define void @uitofp_v16i8_to_v16f32(ptr %src, ptr %dst) {
592 ; CHECK-LABEL: uitofp_v16i8_to_v16f32:
593 ; CHECK: ; %bb.0: ; %entry
594 ; CHECK-NEXT: Lloh14:
595 ; CHECK-NEXT: adrp x8, lCPI9_0@PAGE
596 ; CHECK-NEXT: Lloh15:
597 ; CHECK-NEXT: adrp x9, lCPI9_1@PAGE
598 ; CHECK-NEXT: Lloh16:
599 ; CHECK-NEXT: adrp x10, lCPI9_2@PAGE
600 ; CHECK-NEXT: Lloh17:
601 ; CHECK-NEXT: ldr q0, [x8, lCPI9_0@PAGEOFF]
602 ; CHECK-NEXT: Lloh18:
603 ; CHECK-NEXT: adrp x8, lCPI9_3@PAGE
604 ; CHECK-NEXT: Lloh19:
605 ; CHECK-NEXT: ldr q1, [x9, lCPI9_1@PAGEOFF]
606 ; CHECK-NEXT: Lloh20:
607 ; CHECK-NEXT: ldr q2, [x10, lCPI9_2@PAGEOFF]
608 ; CHECK-NEXT: Lloh21:
609 ; CHECK-NEXT: ldr q3, [x8, lCPI9_3@PAGEOFF]
610 ; CHECK-NEXT: mov x8, xzr
611 ; CHECK-NEXT: LBB9_1: ; %loop
612 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
613 ; CHECK-NEXT: ldr q4, [x0, x8, lsl #4]
614 ; CHECK-NEXT: add x9, x1, x8, lsl #6
615 ; CHECK-NEXT: add x8, x8, #1
616 ; CHECK-NEXT: cmp x8, #1000
617 ; CHECK-NEXT: tbl.16b v5, { v4 }, v0
618 ; CHECK-NEXT: tbl.16b v6, { v4 }, v1
619 ; CHECK-NEXT: tbl.16b v7, { v4 }, v2
620 ; CHECK-NEXT: tbl.16b v4, { v4 }, v3
621 ; CHECK-NEXT: ucvtf.4s v5, v5
622 ; CHECK-NEXT: ucvtf.4s v6, v6
623 ; CHECK-NEXT: ucvtf.4s v7, v7
624 ; CHECK-NEXT: ucvtf.4s v4, v4
625 ; CHECK-NEXT: stp q6, q5, [x9, #32]
626 ; CHECK-NEXT: stp q4, q7, [x9]
627 ; CHECK-NEXT: b.eq LBB9_1
628 ; CHECK-NEXT: ; %bb.2: ; %exit
630 ; CHECK-NEXT: .loh AdrpLdr Lloh18, Lloh21
631 ; CHECK-NEXT: .loh AdrpLdr Lloh16, Lloh20
632 ; CHECK-NEXT: .loh AdrpLdr Lloh15, Lloh19
633 ; CHECK-NEXT: .loh AdrpAdrp Lloh14, Lloh18
634 ; CHECK-NEXT: .loh AdrpLdr Lloh14, Lloh17
639 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
640 %gep.src = getelementptr inbounds <16 x i8>, ptr %src, i64 %iv
641 %l = load <16 x i8>, ptr %gep.src
642 %conv = uitofp <16 x i8> %l to <16 x float>
643 %gep.dst = getelementptr inbounds <16 x float>, ptr %dst, i64 %iv
644 store <16 x float> %conv, ptr %gep.dst
645 %iv.next = add i64 %iv, 1
646 %ec = icmp eq i64 %iv.next, 1000
647 br i1 %ec, label %loop, label %exit