1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s
3 ; RUN: llc -mtriple=aarch64_be-unknown-linux -o - %s | FileCheck --check-prefix=CHECK-BE %s
4 ; RUN: llc -mtriple=aarch64_be-unknown-linux -aarch64-enable-ext-to-tbl=false -o - %s | FileCheck --check-prefix=CHECK-DISABLE %s
6 ; CHECK-LABEL: lCPI0_0:
7 ; CHECK-NEXT: .byte 0 ; 0x0
8 ; CHECK-NEXT: .byte 4 ; 0x4
9 ; CHECK-NEXT: .byte 8 ; 0x8
10 ; CHECK-NEXT: .byte 12 ; 0xc
11 ; CHECK-NEXT: .byte 16 ; 0x10
12 ; CHECK-NEXT: .byte 20 ; 0x14
13 ; CHECK-NEXT: .byte 24 ; 0x18
14 ; CHECK-NEXT: .byte 28 ; 0x1c
15 ; CHECK-NEXT: .byte 32 ; 0x20
16 ; CHECK-NEXT: .byte 36 ; 0x24
17 ; CHECK-NEXT: .byte 40 ; 0x28
18 ; CHECK-NEXT: .byte 44 ; 0x2c
19 ; CHECK-NEXT: .byte 48 ; 0x30
20 ; CHECK-NEXT: .byte 52 ; 0x34
21 ; CHECK-NEXT: .byte 56 ; 0x38
22 ; CHECK-NEXT: .byte 60 ; 0x3c
24 ; CHECK-BE-LABEL: .LCPI0_0:
25 ; CHECK-BE-NEXT: .byte 3 // 0x3
26 ; CHECK-BE-NEXT: .byte 7 // 0x7
27 ; CHECK-BE-NEXT: .byte 11 // 0xb
28 ; CHECK-BE-NEXT: .byte 15 // 0xf
29 ; CHECK-BE-NEXT: .byte 19 // 0x13
30 ; CHECK-BE-NEXT: .byte 23 // 0x17
31 ; CHECK-BE-NEXT: .byte 27 // 0x1b
32 ; CHECK-BE-NEXT: .byte 31 // 0x1f
33 ; CHECK-BE-NEXT: .byte 35 // 0x23
34 ; CHECK-BE-NEXT: .byte 39 // 0x27
35 ; CHECK-BE-NEXT: .byte 43 // 0x2b
36 ; CHECK-BE-NEXT: .byte 47 // 0x2f
37 ; CHECK-BE-NEXT: .byte 51 // 0x33
38 ; CHECK-BE-NEXT: .byte 55 // 0x37
39 ; CHECK-BE-NEXT: .byte 59 // 0x3b
40 ; CHECK-BE-NEXT: .byte 63 // 0x3f
42 ; It's profitable to use a single tbl.4 instruction to lower the truncate.
43 define void @trunc_v16i32_to_v16i8_in_loop(ptr %A, ptr %dst) {
44 ; CHECK-LABEL: trunc_v16i32_to_v16i8_in_loop:
45 ; CHECK: ; %bb.0: ; %entry
47 ; CHECK-NEXT: adrp x8, lCPI0_0@PAGE
49 ; CHECK-NEXT: ldr q0, [x8, lCPI0_0@PAGEOFF]
50 ; CHECK-NEXT: mov x8, xzr
51 ; CHECK-NEXT: LBB0_1: ; %loop
52 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
53 ; CHECK-NEXT: add x9, x0, x8, lsl #6
54 ; CHECK-NEXT: ldp q1, q2, [x9]
55 ; CHECK-NEXT: ldp q3, q4, [x9, #32]
56 ; CHECK-NEXT: tbl.16b v1, { v1, v2, v3, v4 }, v0
57 ; CHECK-NEXT: str q1, [x1, x8, lsl #4]
58 ; CHECK-NEXT: add x8, x8, #1
59 ; CHECK-NEXT: cmp x8, #1000
60 ; CHECK-NEXT: b.eq LBB0_1
61 ; CHECK-NEXT: ; %bb.2: ; %exit
63 ; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1
65 ; CHECK-BE-LABEL: trunc_v16i32_to_v16i8_in_loop:
66 ; CHECK-BE: // %bb.0: // %entry
67 ; CHECK-BE-NEXT: adrp x8, .LCPI0_0
68 ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI0_0
69 ; CHECK-BE-NEXT: ld1 { v0.16b }, [x8]
70 ; CHECK-BE-NEXT: mov x8, xzr
71 ; CHECK-BE-NEXT: .LBB0_1: // %loop
72 ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
73 ; CHECK-BE-NEXT: add x9, x0, x8, lsl #6
74 ; CHECK-BE-NEXT: add x10, x9, #16
75 ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9]
76 ; CHECK-BE-NEXT: add x11, x9, #32
77 ; CHECK-BE-NEXT: ld1 { v2.16b }, [x10]
78 ; CHECK-BE-NEXT: add x9, x9, #48
79 ; CHECK-BE-NEXT: ld1 { v3.16b }, [x11]
80 ; CHECK-BE-NEXT: ld1 { v4.16b }, [x9]
81 ; CHECK-BE-NEXT: add x9, x1, x8, lsl #4
82 ; CHECK-BE-NEXT: add x8, x8, #1
83 ; CHECK-BE-NEXT: cmp x8, #1000
84 ; CHECK-BE-NEXT: tbl v1.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b
85 ; CHECK-BE-NEXT: st1 { v1.16b }, [x9]
86 ; CHECK-BE-NEXT: b.eq .LBB0_1
87 ; CHECK-BE-NEXT: // %bb.2: // %exit
90 ; CHECK-DISABLE-LABEL: trunc_v16i32_to_v16i8_in_loop:
91 ; CHECK-DISABLE: // %bb.0: // %entry
92 ; CHECK-DISABLE-NEXT: mov x8, xzr
93 ; CHECK-DISABLE-NEXT: .LBB0_1: // %loop
94 ; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
95 ; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #6
96 ; CHECK-DISABLE-NEXT: ld1 { v0.4s }, [x9]
97 ; CHECK-DISABLE-NEXT: add x10, x9, #16
98 ; CHECK-DISABLE-NEXT: add x11, x9, #48
99 ; CHECK-DISABLE-NEXT: add x9, x9, #32
100 ; CHECK-DISABLE-NEXT: ld1 { v1.4s }, [x10]
101 ; CHECK-DISABLE-NEXT: ld1 { v2.4s }, [x11]
102 ; CHECK-DISABLE-NEXT: ld1 { v3.4s }, [x9]
103 ; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #4
104 ; CHECK-DISABLE-NEXT: add x8, x8, #1
105 ; CHECK-DISABLE-NEXT: cmp x8, #1000
106 ; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
107 ; CHECK-DISABLE-NEXT: uzp1 v2.8h, v3.8h, v2.8h
108 ; CHECK-DISABLE-NEXT: uzp1 v0.16b, v0.16b, v2.16b
109 ; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x9]
110 ; CHECK-DISABLE-NEXT: b.eq .LBB0_1
111 ; CHECK-DISABLE-NEXT: // %bb.2: // %exit
112 ; CHECK-DISABLE-NEXT: ret
117 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
118 %gep.A = getelementptr inbounds <16 x i32>, ptr %A, i64 %iv
119 %l.A = load <16 x i32>, ptr %gep.A
120 %trunc = trunc <16 x i32> %l.A to <16 x i8>
121 %gep.dst = getelementptr inbounds <16 x i8>, ptr %dst, i64 %iv
122 store <16 x i8> %trunc, ptr %gep.dst
123 %iv.next = add i64 %iv, 1
124 %ec = icmp eq i64 %iv.next, 1000
125 br i1 %ec, label %loop, label %exit
131 ; Not profitable to use tbl, as materializing the masks requires more
133 define void @trunc_v16i32_to_v16i8_no_loop(ptr %A, ptr %dst) {
134 ; CHECK-LABEL: trunc_v16i32_to_v16i8_no_loop:
135 ; CHECK: ; %bb.0: ; %entry
136 ; CHECK-NEXT: ldp q1, q0, [x0]
137 ; CHECK-NEXT: ldp q3, q2, [x0, #32]
138 ; CHECK-NEXT: uzp1.8h v0, v1, v0
139 ; CHECK-NEXT: uzp1.8h v2, v3, v2
140 ; CHECK-NEXT: uzp1.16b v0, v0, v2
141 ; CHECK-NEXT: str q0, [x1]
144 ; CHECK-BE-LABEL: trunc_v16i32_to_v16i8_no_loop:
145 ; CHECK-BE: // %bb.0: // %entry
146 ; CHECK-BE-NEXT: add x8, x0, #16
147 ; CHECK-BE-NEXT: add x9, x0, #48
148 ; CHECK-BE-NEXT: add x10, x0, #32
149 ; CHECK-BE-NEXT: ld1 { v0.4s }, [x0]
150 ; CHECK-BE-NEXT: ld1 { v1.4s }, [x8]
151 ; CHECK-BE-NEXT: ld1 { v2.4s }, [x9]
152 ; CHECK-BE-NEXT: ld1 { v3.4s }, [x10]
153 ; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
154 ; CHECK-BE-NEXT: uzp1 v2.8h, v3.8h, v2.8h
155 ; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v2.16b
156 ; CHECK-BE-NEXT: st1 { v0.16b }, [x1]
159 ; CHECK-DISABLE-LABEL: trunc_v16i32_to_v16i8_no_loop:
160 ; CHECK-DISABLE: // %bb.0: // %entry
161 ; CHECK-DISABLE-NEXT: add x8, x0, #16
162 ; CHECK-DISABLE-NEXT: add x9, x0, #48
163 ; CHECK-DISABLE-NEXT: add x10, x0, #32
164 ; CHECK-DISABLE-NEXT: ld1 { v0.4s }, [x0]
165 ; CHECK-DISABLE-NEXT: ld1 { v1.4s }, [x8]
166 ; CHECK-DISABLE-NEXT: ld1 { v2.4s }, [x9]
167 ; CHECK-DISABLE-NEXT: ld1 { v3.4s }, [x10]
168 ; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
169 ; CHECK-DISABLE-NEXT: uzp1 v2.8h, v3.8h, v2.8h
170 ; CHECK-DISABLE-NEXT: uzp1 v0.16b, v0.16b, v2.16b
171 ; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x1]
172 ; CHECK-DISABLE-NEXT: ret
174 %l.A = load <16 x i32>, ptr %A
175 %trunc = trunc <16 x i32> %l.A to <16 x i8>
176 store <16 x i8> %trunc, ptr %dst
181 ; CHECK-LABEL: lCPI2_0:
182 ; CHECK-NEXT: .byte 0 ; 0x0
183 ; CHECK-NEXT: .byte 4 ; 0x4
184 ; CHECK-NEXT: .byte 8 ; 0x8
185 ; CHECK-NEXT: .byte 12 ; 0xc
186 ; CHECK-NEXT: .byte 16 ; 0x10
187 ; CHECK-NEXT: .byte 20 ; 0x14
188 ; CHECK-NEXT: .byte 24 ; 0x18
189 ; CHECK-NEXT: .byte 28 ; 0x1c
190 ; CHECK-NEXT: .byte 255 ; 0xff
191 ; CHECK-NEXT: .byte 255 ; 0xff
192 ; CHECK-NEXT: .byte 255 ; 0xff
193 ; CHECK-NEXT: .byte 255 ; 0xff
194 ; CHECK-NEXT: .byte 255 ; 0xff
195 ; CHECK-NEXT: .byte 255 ; 0xff
196 ; CHECK-NEXT: .byte 255 ; 0xff
197 ; CHECK-NEXT: .byte 255 ; 0xff
199 ; CHECK-BE-LABEL: .LCPI2_0:
200 ; CHECK-BE-NEXT: .byte 3 // 0x3
201 ; CHECK-BE-NEXT: .byte 7 // 0x7
202 ; CHECK-BE-NEXT: .byte 11 // 0xb
203 ; CHECK-BE-NEXT: .byte 15 // 0xf
204 ; CHECK-BE-NEXT: .byte 19 // 0x13
205 ; CHECK-BE-NEXT: .byte 23 // 0x17
206 ; CHECK-BE-NEXT: .byte 27 // 0x1b
207 ; CHECK-BE-NEXT: .byte 31 // 0x1f
208 ; CHECK-BE-NEXT: .byte 255 // 0xff
209 ; CHECK-BE-NEXT: .byte 255 // 0xff
210 ; CHECK-BE-NEXT: .byte 255 // 0xff
211 ; CHECK-BE-NEXT: .byte 255 // 0xff
212 ; CHECK-BE-NEXT: .byte 255 // 0xff
213 ; CHECK-BE-NEXT: .byte 255 // 0xff
214 ; CHECK-BE-NEXT: .byte 255 // 0xff
215 ; CHECK-BE-NEXT: .byte 255 // 0xff
216 ; It's profitable to use a single tbl.2 instruction to lower the truncate.
217 define void @trunc_v8i32_to_v8i8_in_loop(ptr %A, ptr %dst) {
218 ; CHECK-LABEL: trunc_v8i32_to_v8i8_in_loop:
219 ; CHECK: ; %bb.0: ; %entry
221 ; CHECK-NEXT: adrp x8, lCPI2_0@PAGE
223 ; CHECK-NEXT: ldr q0, [x8, lCPI2_0@PAGEOFF]
224 ; CHECK-NEXT: mov x8, xzr
225 ; CHECK-NEXT: LBB2_1: ; %loop
226 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
227 ; CHECK-NEXT: add x9, x0, x8, lsl #5
228 ; CHECK-NEXT: ldp q1, q2, [x9]
229 ; CHECK-NEXT: tbl.16b v1, { v1, v2 }, v0
230 ; CHECK-NEXT: str d1, [x1, x8, lsl #3]
231 ; CHECK-NEXT: add x8, x8, #1
232 ; CHECK-NEXT: cmp x8, #1000
233 ; CHECK-NEXT: b.eq LBB2_1
234 ; CHECK-NEXT: ; %bb.2: ; %exit
236 ; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh3
238 ; CHECK-BE-LABEL: trunc_v8i32_to_v8i8_in_loop:
239 ; CHECK-BE: // %bb.0: // %entry
240 ; CHECK-BE-NEXT: adrp x8, .LCPI2_0
241 ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI2_0
242 ; CHECK-BE-NEXT: ld1 { v0.16b }, [x8]
243 ; CHECK-BE-NEXT: mov x8, xzr
244 ; CHECK-BE-NEXT: .LBB2_1: // %loop
245 ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
246 ; CHECK-BE-NEXT: add x9, x0, x8, lsl #5
247 ; CHECK-BE-NEXT: add x10, x9, #16
248 ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9]
249 ; CHECK-BE-NEXT: add x9, x1, x8, lsl #3
250 ; CHECK-BE-NEXT: ld1 { v2.16b }, [x10]
251 ; CHECK-BE-NEXT: add x8, x8, #1
252 ; CHECK-BE-NEXT: cmp x8, #1000
253 ; CHECK-BE-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v0.16b
254 ; CHECK-BE-NEXT: st1 { v1.8b }, [x9]
255 ; CHECK-BE-NEXT: b.eq .LBB2_1
256 ; CHECK-BE-NEXT: // %bb.2: // %exit
259 ; CHECK-DISABLE-LABEL: trunc_v8i32_to_v8i8_in_loop:
260 ; CHECK-DISABLE: // %bb.0: // %entry
261 ; CHECK-DISABLE-NEXT: mov x8, xzr
262 ; CHECK-DISABLE-NEXT: .LBB2_1: // %loop
263 ; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
264 ; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #5
265 ; CHECK-DISABLE-NEXT: add x10, x9, #16
266 ; CHECK-DISABLE-NEXT: ld1 { v0.4s }, [x9]
267 ; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3
268 ; CHECK-DISABLE-NEXT: ld1 { v1.4s }, [x10]
269 ; CHECK-DISABLE-NEXT: add x8, x8, #1
270 ; CHECK-DISABLE-NEXT: cmp x8, #1000
271 ; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
272 ; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
273 ; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9]
274 ; CHECK-DISABLE-NEXT: b.eq .LBB2_1
275 ; CHECK-DISABLE-NEXT: // %bb.2: // %exit
276 ; CHECK-DISABLE-NEXT: ret
281 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
282 %gep.A = getelementptr inbounds <8 x i32>, ptr %A, i64 %iv
283 %l.A = load <8 x i32>, ptr %gep.A
284 %trunc = trunc <8 x i32> %l.A to <8 x i8>
285 %gep.dst = getelementptr inbounds <8 x i8>, ptr %dst, i64 %iv
286 store <8 x i8> %trunc, ptr %gep.dst
287 %iv.next = add i64 %iv, 1
288 %ec = icmp eq i64 %iv.next, 1000
289 br i1 %ec, label %loop, label %exit
295 ; CHECK-LABEL: lCPI3_0:
296 ; CHECK-NEXT: .byte 0 ; 0x0
297 ; CHECK-NEXT: .byte 8 ; 0x8
298 ; CHECK-NEXT: .byte 16 ; 0x10
299 ; CHECK-NEXT: .byte 24 ; 0x18
300 ; CHECK-NEXT: .byte 32 ; 0x20
301 ; CHECK-NEXT: .byte 40 ; 0x28
302 ; CHECK-NEXT: .byte 48 ; 0x30
303 ; CHECK-NEXT: .byte 56 ; 0x38
304 ; CHECK-NEXT: .byte 64 ; 0x40
305 ; CHECK-NEXT: .byte 72 ; 0x48
306 ; CHECK-NEXT: .byte 80 ; 0x50
307 ; CHECK-NEXT: .byte 88 ; 0x58
308 ; CHECK-NEXT: .byte 96 ; 0x60
309 ; CHECK-NEXT: .byte 104 ; 0x68
310 ; CHECK-NEXT: .byte 112 ; 0x70
311 ; CHECK-NEXT: .byte 120 ; 0x78
313 ; CHECK-BE-LABEL: .LCPI3_0:
314 ; CHECK-BE-NEXT: .byte 7 // 0x7
315 ; CHECK-BE-NEXT: .byte 15 // 0xf
316 ; CHECK-BE-NEXT: .byte 23 // 0x17
317 ; CHECK-BE-NEXT: .byte 31 // 0x1f
318 ; CHECK-BE-NEXT: .byte 39 // 0x27
319 ; CHECK-BE-NEXT: .byte 47 // 0x2f
320 ; CHECK-BE-NEXT: .byte 55 // 0x37
321 ; CHECK-BE-NEXT: .byte 63 // 0x3f
322 ; CHECK-BE-NEXT: .byte 71 // 0x47
323 ; CHECK-BE-NEXT: .byte 79 // 0x4f
324 ; CHECK-BE-NEXT: .byte 87 // 0x57
325 ; CHECK-BE-NEXT: .byte 95 // 0x5f
326 ; CHECK-BE-NEXT: .byte 103 // 0x67
327 ; CHECK-BE-NEXT: .byte 111 // 0x6f
328 ; CHECK-BE-NEXT: .byte 119 // 0x77
329 ; CHECK-BE-NEXT: .byte 127 // 0x7f
330 define void @trunc_v16i64_to_v16i8_in_loop(ptr %A, ptr %dst) {
331 ; CHECK-LABEL: trunc_v16i64_to_v16i8_in_loop:
332 ; CHECK: ; %bb.0: ; %entry
334 ; CHECK-NEXT: adrp x8, lCPI3_0@PAGE
336 ; CHECK-NEXT: ldr q0, [x8, lCPI3_0@PAGEOFF]
337 ; CHECK-NEXT: mov x8, xzr
338 ; CHECK-NEXT: LBB3_1: ; %loop
339 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
340 ; CHECK-NEXT: add x9, x0, x8, lsl #7
341 ; CHECK-NEXT: ldp q1, q2, [x9]
342 ; CHECK-NEXT: ldp q16, q17, [x9, #64]
343 ; CHECK-NEXT: ldp q3, q4, [x9, #32]
344 ; CHECK-NEXT: ldp q18, q19, [x9, #96]
345 ; CHECK-NEXT: tbl.16b v1, { v1, v2, v3, v4 }, v0
346 ; CHECK-NEXT: tbl.16b v2, { v16, v17, v18, v19 }, v0
347 ; CHECK-NEXT: mov.d v1[1], v2[0]
348 ; CHECK-NEXT: str q1, [x1, x8, lsl #4]
349 ; CHECK-NEXT: add x8, x8, #1
350 ; CHECK-NEXT: cmp x8, #1000
351 ; CHECK-NEXT: b.eq LBB3_1
352 ; CHECK-NEXT: ; %bb.2: ; %exit
354 ; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh5
356 ; CHECK-BE-LABEL: trunc_v16i64_to_v16i8_in_loop:
357 ; CHECK-BE: // %bb.0: // %entry
358 ; CHECK-BE-NEXT: adrp x8, .LCPI3_0
359 ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI3_0
360 ; CHECK-BE-NEXT: ld1 { v0.16b }, [x8]
361 ; CHECK-BE-NEXT: mov x8, xzr
362 ; CHECK-BE-NEXT: .LBB3_1: // %loop
363 ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
364 ; CHECK-BE-NEXT: add x9, x0, x8, lsl #7
365 ; CHECK-BE-NEXT: add x13, x9, #64
366 ; CHECK-BE-NEXT: add x12, x9, #80
367 ; CHECK-BE-NEXT: add x14, x9, #16
368 ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9]
369 ; CHECK-BE-NEXT: ld1 { v16.16b }, [x13]
370 ; CHECK-BE-NEXT: add x11, x9, #96
371 ; CHECK-BE-NEXT: add x13, x9, #32
372 ; CHECK-BE-NEXT: ld1 { v2.16b }, [x14]
373 ; CHECK-BE-NEXT: ld1 { v17.16b }, [x12]
374 ; CHECK-BE-NEXT: add x10, x9, #112
375 ; CHECK-BE-NEXT: add x9, x9, #48
376 ; CHECK-BE-NEXT: ld1 { v3.16b }, [x13]
377 ; CHECK-BE-NEXT: ld1 { v18.16b }, [x11]
378 ; CHECK-BE-NEXT: ld1 { v4.16b }, [x9]
379 ; CHECK-BE-NEXT: add x9, x1, x8, lsl #4
380 ; CHECK-BE-NEXT: ld1 { v19.16b }, [x10]
381 ; CHECK-BE-NEXT: add x8, x8, #1
382 ; CHECK-BE-NEXT: cmp x8, #1000
383 ; CHECK-BE-NEXT: tbl v1.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b
384 ; CHECK-BE-NEXT: tbl v2.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.16b
385 ; CHECK-BE-NEXT: mov v1.d[1], v2.d[0]
386 ; CHECK-BE-NEXT: st1 { v1.16b }, [x9]
387 ; CHECK-BE-NEXT: b.eq .LBB3_1
388 ; CHECK-BE-NEXT: // %bb.2: // %exit
391 ; CHECK-DISABLE-LABEL: trunc_v16i64_to_v16i8_in_loop:
392 ; CHECK-DISABLE: // %bb.0: // %entry
393 ; CHECK-DISABLE-NEXT: mov x8, xzr
394 ; CHECK-DISABLE-NEXT: .LBB3_1: // %loop
395 ; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
396 ; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #7
397 ; CHECK-DISABLE-NEXT: add x10, x9, #16
398 ; CHECK-DISABLE-NEXT: add x11, x9, #48
399 ; CHECK-DISABLE-NEXT: ld1 { v0.2d }, [x9]
400 ; CHECK-DISABLE-NEXT: ld1 { v1.2d }, [x10]
401 ; CHECK-DISABLE-NEXT: add x10, x9, #112
402 ; CHECK-DISABLE-NEXT: ld1 { v2.2d }, [x11]
403 ; CHECK-DISABLE-NEXT: ld1 { v3.2d }, [x10]
404 ; CHECK-DISABLE-NEXT: add x10, x9, #96
405 ; CHECK-DISABLE-NEXT: add x11, x9, #32
406 ; CHECK-DISABLE-NEXT: ld1 { v4.2d }, [x10]
407 ; CHECK-DISABLE-NEXT: add x10, x9, #80
408 ; CHECK-DISABLE-NEXT: add x9, x9, #64
409 ; CHECK-DISABLE-NEXT: ld1 { v5.2d }, [x11]
410 ; CHECK-DISABLE-NEXT: ld1 { v6.2d }, [x10]
411 ; CHECK-DISABLE-NEXT: ld1 { v7.2d }, [x9]
412 ; CHECK-DISABLE-NEXT: uzp1 v0.4s, v0.4s, v1.4s
413 ; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #4
414 ; CHECK-DISABLE-NEXT: add x8, x8, #1
415 ; CHECK-DISABLE-NEXT: uzp1 v3.4s, v4.4s, v3.4s
416 ; CHECK-DISABLE-NEXT: cmp x8, #1000
417 ; CHECK-DISABLE-NEXT: uzp1 v4.4s, v7.4s, v6.4s
418 ; CHECK-DISABLE-NEXT: uzp1 v2.4s, v5.4s, v2.4s
419 ; CHECK-DISABLE-NEXT: uzp1 v1.8h, v4.8h, v3.8h
420 ; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
421 ; CHECK-DISABLE-NEXT: uzp1 v0.16b, v0.16b, v1.16b
422 ; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x9]
423 ; CHECK-DISABLE-NEXT: b.eq .LBB3_1
424 ; CHECK-DISABLE-NEXT: // %bb.2: // %exit
425 ; CHECK-DISABLE-NEXT: ret
430 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
431 %gep.A = getelementptr inbounds <16 x i64>, ptr %A, i64 %iv
432 %l.A = load <16 x i64>, ptr %gep.A
433 %trunc = trunc <16 x i64> %l.A to <16 x i8>
434 %gep.dst = getelementptr inbounds <16 x i8>, ptr %dst, i64 %iv
435 store <16 x i8> %trunc, ptr %gep.dst
436 %iv.next = add i64 %iv, 1
437 %ec = icmp eq i64 %iv.next, 1000
438 br i1 %ec, label %loop, label %exit
444 ; CHECK-LABEL: lCPI4_0:
445 ; CHECK-NEXT: .byte 0 ; 0x0
446 ; CHECK-NEXT: .byte 8 ; 0x8
447 ; CHECK-NEXT: .byte 16 ; 0x10
448 ; CHECK-NEXT: .byte 24 ; 0x18
449 ; CHECK-NEXT: .byte 32 ; 0x20
450 ; CHECK-NEXT: .byte 40 ; 0x28
451 ; CHECK-NEXT: .byte 48 ; 0x30
452 ; CHECK-NEXT: .byte 56 ; 0x38
453 ; CHECK-NEXT: .byte 255 ; 0xff
454 ; CHECK-NEXT: .byte 255 ; 0xff
455 ; CHECK-NEXT: .byte 255 ; 0xff
456 ; CHECK-NEXT: .byte 255 ; 0xff
457 ; CHECK-NEXT: .byte 255 ; 0xff
458 ; CHECK-NEXT: .byte 255 ; 0xff
459 ; CHECK-NEXT: .byte 255 ; 0xff
460 ; CHECK-NEXT: .byte 255 ; 0xff
462 ; CHECK-BE-LABEL: .LCPI4_0:
463 ; CHECK-BE-NEXT: .byte 7 // 0x7
464 ; CHECK-BE-NEXT: .byte 15 // 0xf
465 ; CHECK-BE-NEXT: .byte 23 // 0x17
466 ; CHECK-BE-NEXT: .byte 31 // 0x1f
467 ; CHECK-BE-NEXT: .byte 39 // 0x27
468 ; CHECK-BE-NEXT: .byte 47 // 0x2f
469 ; CHECK-BE-NEXT: .byte 55 // 0x37
470 ; CHECK-BE-NEXT: .byte 63 // 0x3f
471 ; CHECK-BE-NEXT: .byte 255 // 0xff
472 ; CHECK-BE-NEXT: .byte 255 // 0xff
473 ; CHECK-BE-NEXT: .byte 255 // 0xff
474 ; CHECK-BE-NEXT: .byte 255 // 0xff
475 ; CHECK-BE-NEXT: .byte 255 // 0xff
476 ; CHECK-BE-NEXT: .byte 255 // 0xff
477 ; CHECK-BE-NEXT: .byte 255 // 0xff
478 ; CHECK-BE-NEXT: .byte 255 // 0xff
479 define void @trunc_v8i64_to_v8i8_in_loop(ptr %A, ptr %dst) {
480 ; CHECK-LABEL: trunc_v8i64_to_v8i8_in_loop:
481 ; CHECK: ; %bb.0: ; %entry
483 ; CHECK-NEXT: adrp x8, lCPI4_0@PAGE
485 ; CHECK-NEXT: ldr q0, [x8, lCPI4_0@PAGEOFF]
486 ; CHECK-NEXT: mov x8, xzr
487 ; CHECK-NEXT: LBB4_1: ; %loop
488 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
489 ; CHECK-NEXT: add x9, x0, x8, lsl #6
490 ; CHECK-NEXT: ldp q1, q2, [x9]
491 ; CHECK-NEXT: ldp q3, q4, [x9, #32]
492 ; CHECK-NEXT: tbl.16b v1, { v1, v2, v3, v4 }, v0
493 ; CHECK-NEXT: str d1, [x1, x8, lsl #3]
494 ; CHECK-NEXT: add x8, x8, #1
495 ; CHECK-NEXT: cmp x8, #1000
496 ; CHECK-NEXT: b.eq LBB4_1
497 ; CHECK-NEXT: ; %bb.2: ; %exit
499 ; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh7
501 ; CHECK-BE-LABEL: trunc_v8i64_to_v8i8_in_loop:
502 ; CHECK-BE: // %bb.0: // %entry
503 ; CHECK-BE-NEXT: adrp x8, .LCPI4_0
504 ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI4_0
505 ; CHECK-BE-NEXT: ld1 { v0.16b }, [x8]
506 ; CHECK-BE-NEXT: mov x8, xzr
507 ; CHECK-BE-NEXT: .LBB4_1: // %loop
508 ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
509 ; CHECK-BE-NEXT: add x9, x0, x8, lsl #6
510 ; CHECK-BE-NEXT: add x10, x9, #16
511 ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9]
512 ; CHECK-BE-NEXT: add x11, x9, #32
513 ; CHECK-BE-NEXT: ld1 { v2.16b }, [x10]
514 ; CHECK-BE-NEXT: add x9, x9, #48
515 ; CHECK-BE-NEXT: ld1 { v3.16b }, [x11]
516 ; CHECK-BE-NEXT: ld1 { v4.16b }, [x9]
517 ; CHECK-BE-NEXT: add x9, x1, x8, lsl #3
518 ; CHECK-BE-NEXT: add x8, x8, #1
519 ; CHECK-BE-NEXT: cmp x8, #1000
520 ; CHECK-BE-NEXT: tbl v1.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b
521 ; CHECK-BE-NEXT: st1 { v1.8b }, [x9]
522 ; CHECK-BE-NEXT: b.eq .LBB4_1
523 ; CHECK-BE-NEXT: // %bb.2: // %exit
526 ; CHECK-DISABLE-LABEL: trunc_v8i64_to_v8i8_in_loop:
527 ; CHECK-DISABLE: // %bb.0: // %entry
528 ; CHECK-DISABLE-NEXT: mov x8, xzr
529 ; CHECK-DISABLE-NEXT: .LBB4_1: // %loop
530 ; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
531 ; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #6
532 ; CHECK-DISABLE-NEXT: ld1 { v0.2d }, [x9]
533 ; CHECK-DISABLE-NEXT: add x10, x9, #16
534 ; CHECK-DISABLE-NEXT: add x11, x9, #48
535 ; CHECK-DISABLE-NEXT: add x9, x9, #32
536 ; CHECK-DISABLE-NEXT: ld1 { v1.2d }, [x10]
537 ; CHECK-DISABLE-NEXT: ld1 { v2.2d }, [x11]
538 ; CHECK-DISABLE-NEXT: ld1 { v3.2d }, [x9]
539 ; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3
540 ; CHECK-DISABLE-NEXT: add x8, x8, #1
541 ; CHECK-DISABLE-NEXT: cmp x8, #1000
542 ; CHECK-DISABLE-NEXT: uzp1 v0.4s, v0.4s, v1.4s
543 ; CHECK-DISABLE-NEXT: uzp1 v2.4s, v3.4s, v2.4s
544 ; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
545 ; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
546 ; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9]
547 ; CHECK-DISABLE-NEXT: b.eq .LBB4_1
548 ; CHECK-DISABLE-NEXT: // %bb.2: // %exit
549 ; CHECK-DISABLE-NEXT: ret
554 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
555 %gep.A = getelementptr inbounds <8 x i64>, ptr %A, i64 %iv
556 %l.A = load <8 x i64>, ptr %gep.A
557 %trunc = trunc <8 x i64> %l.A to <8 x i8>
558 %gep.dst = getelementptr inbounds <8 x i8>, ptr %dst, i64 %iv
559 store <8 x i8> %trunc, ptr %gep.dst
560 %iv.next = add i64 %iv, 1
561 %ec = icmp eq i64 %iv.next, 1000
562 br i1 %ec, label %loop, label %exit
568 define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
569 ; CHECK-LABEL: trunc_v8i19_to_v8i8_in_loop:
570 ; CHECK: ; %bb.0: ; %entry
571 ; CHECK-NEXT: mov x8, xzr
572 ; CHECK-NEXT: LBB5_1: ; %loop
573 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
574 ; CHECK-NEXT: ldp x10, x9, [x0]
575 ; CHECK-NEXT: ldrb w13, [x0, #18]
576 ; CHECK-NEXT: ldrh w14, [x0, #16]
577 ; CHECK-NEXT: add x0, x0, #32
578 ; CHECK-NEXT: ubfx x12, x9, #12, #20
579 ; CHECK-NEXT: fmov s0, w10
580 ; CHECK-NEXT: lsr x11, x10, #19
581 ; CHECK-NEXT: lsr x15, x9, #31
582 ; CHECK-NEXT: fmov s1, w12
583 ; CHECK-NEXT: lsr x12, x9, #50
584 ; CHECK-NEXT: mov.s v0[1], w11
585 ; CHECK-NEXT: orr w11, w14, w13, lsl #16
586 ; CHECK-NEXT: lsr x13, x10, #38
587 ; CHECK-NEXT: lsr x10, x10, #57
588 ; CHECK-NEXT: mov.s v1[1], w15
589 ; CHECK-NEXT: orr w12, w12, w11, lsl #14
590 ; CHECK-NEXT: orr w9, w10, w9, lsl #7
591 ; CHECK-NEXT: lsr w10, w11, #5
592 ; CHECK-NEXT: mov.s v0[2], w13
593 ; CHECK-NEXT: mov.s v1[2], w12
594 ; CHECK-NEXT: mov.s v0[3], w9
595 ; CHECK-NEXT: mov.s v1[3], w10
596 ; CHECK-NEXT: uzp1.8h v0, v0, v1
597 ; CHECK-NEXT: xtn.8b v0, v0
598 ; CHECK-NEXT: str d0, [x1, x8, lsl #3]
599 ; CHECK-NEXT: add x8, x8, #1
600 ; CHECK-NEXT: cmp x8, #1000
601 ; CHECK-NEXT: b.eq LBB5_1
602 ; CHECK-NEXT: ; %bb.2: ; %exit
605 ; CHECK-BE-LABEL: trunc_v8i19_to_v8i8_in_loop:
606 ; CHECK-BE: // %bb.0: // %entry
607 ; CHECK-BE-NEXT: mov x8, xzr
608 ; CHECK-BE-NEXT: .LBB5_1: // %loop
609 ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
610 ; CHECK-BE-NEXT: ldp x10, x9, [x0]
611 ; CHECK-BE-NEXT: ldrb w16, [x0, #18]
612 ; CHECK-BE-NEXT: lsr x11, x9, #40
613 ; CHECK-BE-NEXT: ubfx x12, x9, #33, #7
614 ; CHECK-BE-NEXT: lsr x15, x10, #45
615 ; CHECK-BE-NEXT: lsr x13, x10, #40
616 ; CHECK-BE-NEXT: ubfx x14, x10, #26, #14
617 ; CHECK-BE-NEXT: orr w11, w12, w11, lsl #7
618 ; CHECK-BE-NEXT: ldrh w12, [x0, #16]
619 ; CHECK-BE-NEXT: fmov s0, w15
620 ; CHECK-BE-NEXT: orr w13, w14, w13, lsl #14
621 ; CHECK-BE-NEXT: ubfx x14, x9, #14, #18
622 ; CHECK-BE-NEXT: add x0, x0, #32
623 ; CHECK-BE-NEXT: fmov s1, w11
624 ; CHECK-BE-NEXT: orr w11, w16, w12, lsl #8
625 ; CHECK-BE-NEXT: lsl x12, x9, #24
626 ; CHECK-BE-NEXT: mov v0.s[1], w13
627 ; CHECK-BE-NEXT: ubfx x13, x10, #7, #25
628 ; CHECK-BE-NEXT: extr x9, x10, x9, #40
629 ; CHECK-BE-NEXT: orr w12, w11, w12
630 ; CHECK-BE-NEXT: mov v1.s[1], w14
631 ; CHECK-BE-NEXT: lsr w12, w12, #19
632 ; CHECK-BE-NEXT: ubfx x9, x9, #12, #20
633 ; CHECK-BE-NEXT: mov v0.s[2], w13
634 ; CHECK-BE-NEXT: mov v1.s[2], w12
635 ; CHECK-BE-NEXT: mov v0.s[3], w9
636 ; CHECK-BE-NEXT: add x9, x1, x8, lsl #3
637 ; CHECK-BE-NEXT: add x8, x8, #1
638 ; CHECK-BE-NEXT: cmp x8, #1000
639 ; CHECK-BE-NEXT: mov v1.s[3], w11
640 ; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
641 ; CHECK-BE-NEXT: xtn v0.8b, v0.8h
642 ; CHECK-BE-NEXT: st1 { v0.8b }, [x9]
643 ; CHECK-BE-NEXT: b.eq .LBB5_1
644 ; CHECK-BE-NEXT: // %bb.2: // %exit
647 ; CHECK-DISABLE-LABEL: trunc_v8i19_to_v8i8_in_loop:
648 ; CHECK-DISABLE: // %bb.0: // %entry
649 ; CHECK-DISABLE-NEXT: mov x8, xzr
650 ; CHECK-DISABLE-NEXT: .LBB5_1: // %loop
651 ; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
652 ; CHECK-DISABLE-NEXT: ldp x10, x9, [x0]
653 ; CHECK-DISABLE-NEXT: ldrb w16, [x0, #18]
654 ; CHECK-DISABLE-NEXT: lsr x11, x9, #40
655 ; CHECK-DISABLE-NEXT: ubfx x12, x9, #33, #7
656 ; CHECK-DISABLE-NEXT: lsr x15, x10, #45
657 ; CHECK-DISABLE-NEXT: lsr x13, x10, #40
658 ; CHECK-DISABLE-NEXT: ubfx x14, x10, #26, #14
659 ; CHECK-DISABLE-NEXT: orr w11, w12, w11, lsl #7
660 ; CHECK-DISABLE-NEXT: ldrh w12, [x0, #16]
661 ; CHECK-DISABLE-NEXT: fmov s0, w15
662 ; CHECK-DISABLE-NEXT: orr w13, w14, w13, lsl #14
663 ; CHECK-DISABLE-NEXT: ubfx x14, x9, #14, #18
664 ; CHECK-DISABLE-NEXT: add x0, x0, #32
665 ; CHECK-DISABLE-NEXT: fmov s1, w11
666 ; CHECK-DISABLE-NEXT: orr w11, w16, w12, lsl #8
667 ; CHECK-DISABLE-NEXT: lsl x12, x9, #24
668 ; CHECK-DISABLE-NEXT: mov v0.s[1], w13
669 ; CHECK-DISABLE-NEXT: ubfx x13, x10, #7, #25
670 ; CHECK-DISABLE-NEXT: extr x9, x10, x9, #40
671 ; CHECK-DISABLE-NEXT: orr w12, w11, w12
672 ; CHECK-DISABLE-NEXT: mov v1.s[1], w14
673 ; CHECK-DISABLE-NEXT: lsr w12, w12, #19
674 ; CHECK-DISABLE-NEXT: ubfx x9, x9, #12, #20
675 ; CHECK-DISABLE-NEXT: mov v0.s[2], w13
676 ; CHECK-DISABLE-NEXT: mov v1.s[2], w12
677 ; CHECK-DISABLE-NEXT: mov v0.s[3], w9
678 ; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3
679 ; CHECK-DISABLE-NEXT: add x8, x8, #1
680 ; CHECK-DISABLE-NEXT: cmp x8, #1000
681 ; CHECK-DISABLE-NEXT: mov v1.s[3], w11
682 ; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
683 ; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
684 ; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9]
685 ; CHECK-DISABLE-NEXT: b.eq .LBB5_1
686 ; CHECK-DISABLE-NEXT: // %bb.2: // %exit
687 ; CHECK-DISABLE-NEXT: ret
692 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
693 %gep.A = getelementptr inbounds <8 x i19>, ptr %A, i64 %iv
694 %l.A = load <8 x i19>, ptr %gep.A
695 %trunc = trunc <8 x i19> %l.A to <8 x i8>
696 %gep.dst = getelementptr inbounds <8 x i8>, ptr %dst, i64 %iv
697 store <8 x i8> %trunc, ptr %gep.dst
698 %iv.next = add i64 %iv, 1
699 %ec = icmp eq i64 %iv.next, 1000
700 br i1 %ec, label %loop, label %exit
706 define void @trunc_v11i64_to_v11i8_in_loop(ptr %A, ptr %dst) {
707 ; CHECK-LABEL: trunc_v11i64_to_v11i8_in_loop:
708 ; CHECK: ; %bb.0: ; %entry
709 ; CHECK-NEXT: mov w8, #1000 ; =0x3e8
710 ; CHECK-NEXT: LBB6_1: ; %loop
711 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
712 ; CHECK-NEXT: ldp q4, q1, [x0, #48]
713 ; CHECK-NEXT: add x9, x1, #8
714 ; CHECK-NEXT: ldp q3, q2, [x0]
715 ; CHECK-NEXT: subs x8, x8, #1
716 ; CHECK-NEXT: ldr d0, [x0, #80]
717 ; CHECK-NEXT: ldr q5, [x0, #32]
718 ; CHECK-NEXT: add x0, x0, #128
719 ; CHECK-NEXT: uzp1.4s v4, v5, v4
720 ; CHECK-NEXT: uzp1.4s v2, v3, v2
721 ; CHECK-NEXT: uzp1.4s v0, v1, v0
722 ; CHECK-NEXT: uzp1.8h v1, v2, v4
723 ; CHECK-NEXT: xtn.4h v0, v0
724 ; CHECK-NEXT: uzp1.16b v1, v1, v0
725 ; CHECK-NEXT: xtn.8b v0, v0
726 ; CHECK-NEXT: st1.h { v1 }[4], [x9]
727 ; CHECK-NEXT: add x9, x1, #10
728 ; CHECK-NEXT: st1.b { v0 }[2], [x9]
729 ; CHECK-NEXT: str d1, [x1], #16
730 ; CHECK-NEXT: b.eq LBB6_1
731 ; CHECK-NEXT: ; %bb.2: ; %exit
734 ; CHECK-BE-LABEL: trunc_v11i64_to_v11i8_in_loop:
735 ; CHECK-BE: // %bb.0: // %entry
736 ; CHECK-BE-NEXT: mov w8, #1000 // =0x3e8
737 ; CHECK-BE-NEXT: .LBB6_1: // %loop
738 ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
739 ; CHECK-BE-NEXT: add x9, x0, #64
740 ; CHECK-BE-NEXT: add x10, x0, #16
741 ; CHECK-BE-NEXT: ld1 { v3.2d }, [x0]
742 ; CHECK-BE-NEXT: ld1 { v0.2d }, [x9]
743 ; CHECK-BE-NEXT: add x9, x0, #48
744 ; CHECK-BE-NEXT: ld1 { v1.2d }, [x10]
745 ; CHECK-BE-NEXT: add x10, x0, #32
746 ; CHECK-BE-NEXT: ld1 { v2.2d }, [x9]
747 ; CHECK-BE-NEXT: ldr d5, [x0, #80]
748 ; CHECK-BE-NEXT: ld1 { v4.2d }, [x10]
749 ; CHECK-BE-NEXT: add x9, x1, #10
750 ; CHECK-BE-NEXT: subs x8, x8, #1
751 ; CHECK-BE-NEXT: uzp1 v1.4s, v3.4s, v1.4s
752 ; CHECK-BE-NEXT: uzp1 v0.4s, v0.4s, v5.4s
753 ; CHECK-BE-NEXT: add x0, x0, #128
754 ; CHECK-BE-NEXT: uzp1 v2.4s, v4.4s, v2.4s
755 ; CHECK-BE-NEXT: xtn v0.4h, v0.4s
756 ; CHECK-BE-NEXT: uzp1 v1.8h, v1.8h, v2.8h
757 ; CHECK-BE-NEXT: uzp1 v1.16b, v1.16b, v0.16b
758 ; CHECK-BE-NEXT: xtn v0.8b, v0.8h
759 ; CHECK-BE-NEXT: rev16 v2.16b, v1.16b
760 ; CHECK-BE-NEXT: rev64 v1.16b, v1.16b
761 ; CHECK-BE-NEXT: st1 { v0.b }[2], [x9]
762 ; CHECK-BE-NEXT: add x9, x1, #8
763 ; CHECK-BE-NEXT: st1 { v2.h }[4], [x9]
764 ; CHECK-BE-NEXT: str d1, [x1], #16
765 ; CHECK-BE-NEXT: b.eq .LBB6_1
766 ; CHECK-BE-NEXT: // %bb.2: // %exit
769 ; CHECK-DISABLE-LABEL: trunc_v11i64_to_v11i8_in_loop:
770 ; CHECK-DISABLE: // %bb.0: // %entry
771 ; CHECK-DISABLE-NEXT: mov w8, #1000 // =0x3e8
772 ; CHECK-DISABLE-NEXT: .LBB6_1: // %loop
773 ; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
774 ; CHECK-DISABLE-NEXT: add x9, x0, #64
775 ; CHECK-DISABLE-NEXT: add x10, x0, #16
776 ; CHECK-DISABLE-NEXT: ld1 { v3.2d }, [x0]
777 ; CHECK-DISABLE-NEXT: ld1 { v0.2d }, [x9]
778 ; CHECK-DISABLE-NEXT: add x9, x0, #48
779 ; CHECK-DISABLE-NEXT: ld1 { v1.2d }, [x10]
780 ; CHECK-DISABLE-NEXT: add x10, x0, #32
781 ; CHECK-DISABLE-NEXT: ld1 { v2.2d }, [x9]
782 ; CHECK-DISABLE-NEXT: ldr d5, [x0, #80]
783 ; CHECK-DISABLE-NEXT: ld1 { v4.2d }, [x10]
784 ; CHECK-DISABLE-NEXT: add x9, x1, #10
785 ; CHECK-DISABLE-NEXT: subs x8, x8, #1
786 ; CHECK-DISABLE-NEXT: uzp1 v1.4s, v3.4s, v1.4s
787 ; CHECK-DISABLE-NEXT: uzp1 v0.4s, v0.4s, v5.4s
788 ; CHECK-DISABLE-NEXT: add x0, x0, #128
789 ; CHECK-DISABLE-NEXT: uzp1 v2.4s, v4.4s, v2.4s
790 ; CHECK-DISABLE-NEXT: xtn v0.4h, v0.4s
791 ; CHECK-DISABLE-NEXT: uzp1 v1.8h, v1.8h, v2.8h
792 ; CHECK-DISABLE-NEXT: uzp1 v1.16b, v1.16b, v0.16b
793 ; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
794 ; CHECK-DISABLE-NEXT: rev16 v2.16b, v1.16b
795 ; CHECK-DISABLE-NEXT: rev64 v1.16b, v1.16b
796 ; CHECK-DISABLE-NEXT: st1 { v0.b }[2], [x9]
797 ; CHECK-DISABLE-NEXT: add x9, x1, #8
798 ; CHECK-DISABLE-NEXT: st1 { v2.h }[4], [x9]
799 ; CHECK-DISABLE-NEXT: str d1, [x1], #16
800 ; CHECK-DISABLE-NEXT: b.eq .LBB6_1
801 ; CHECK-DISABLE-NEXT: // %bb.2: // %exit
802 ; CHECK-DISABLE-NEXT: ret
807 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
808 %gep.A = getelementptr inbounds <11 x i64>, ptr %A, i64 %iv
809 %l.A = load <11 x i64>, ptr %gep.A
810 %trunc = trunc <11 x i64> %l.A to <11 x i8>
811 %gep.dst = getelementptr inbounds <11 x i8>, ptr %dst, i64 %iv
812 store <11 x i8> %trunc, ptr %gep.dst
813 %iv.next = add i64 %iv, 1
814 %ec = icmp eq i64 %iv.next, 1000
815 br i1 %ec, label %loop, label %exit
821 define void @trunc_v16i16_to_v16i8_in_loop(ptr %A, ptr %dst) {
822 ; CHECK-LABEL: trunc_v16i16_to_v16i8_in_loop:
823 ; CHECK: ; %bb.0: ; %entry
824 ; CHECK-NEXT: mov x8, xzr
825 ; CHECK-NEXT: LBB7_1: ; %loop
826 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
827 ; CHECK-NEXT: add x9, x0, x8, lsl #5
828 ; CHECK-NEXT: ldp q1, q0, [x9]
829 ; CHECK-NEXT: uzp1.16b v0, v1, v0
830 ; CHECK-NEXT: str q0, [x1, x8, lsl #4]
831 ; CHECK-NEXT: add x8, x8, #1
832 ; CHECK-NEXT: cmp x8, #1000
833 ; CHECK-NEXT: b.eq LBB7_1
834 ; CHECK-NEXT: ; %bb.2: ; %exit
837 ; CHECK-BE-LABEL: trunc_v16i16_to_v16i8_in_loop:
838 ; CHECK-BE: // %bb.0: // %entry
839 ; CHECK-BE-NEXT: mov x8, xzr
840 ; CHECK-BE-NEXT: .LBB7_1: // %loop
841 ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
842 ; CHECK-BE-NEXT: add x9, x0, x8, lsl #5
843 ; CHECK-BE-NEXT: add x10, x9, #16
844 ; CHECK-BE-NEXT: ld1 { v0.8h }, [x9]
845 ; CHECK-BE-NEXT: add x9, x1, x8, lsl #4
846 ; CHECK-BE-NEXT: ld1 { v1.8h }, [x10]
847 ; CHECK-BE-NEXT: add x8, x8, #1
848 ; CHECK-BE-NEXT: cmp x8, #1000
849 ; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v1.16b
850 ; CHECK-BE-NEXT: st1 { v0.16b }, [x9]
851 ; CHECK-BE-NEXT: b.eq .LBB7_1
852 ; CHECK-BE-NEXT: // %bb.2: // %exit
855 ; CHECK-DISABLE-LABEL: trunc_v16i16_to_v16i8_in_loop:
856 ; CHECK-DISABLE: // %bb.0: // %entry
857 ; CHECK-DISABLE-NEXT: mov x8, xzr
858 ; CHECK-DISABLE-NEXT: .LBB7_1: // %loop
859 ; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
860 ; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #5
861 ; CHECK-DISABLE-NEXT: add x10, x9, #16
862 ; CHECK-DISABLE-NEXT: ld1 { v0.8h }, [x9]
863 ; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #4
864 ; CHECK-DISABLE-NEXT: ld1 { v1.8h }, [x10]
865 ; CHECK-DISABLE-NEXT: add x8, x8, #1
866 ; CHECK-DISABLE-NEXT: cmp x8, #1000
867 ; CHECK-DISABLE-NEXT: uzp1 v0.16b, v0.16b, v1.16b
868 ; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x9]
869 ; CHECK-DISABLE-NEXT: b.eq .LBB7_1
870 ; CHECK-DISABLE-NEXT: // %bb.2: // %exit
871 ; CHECK-DISABLE-NEXT: ret
876 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
877 %gep.A = getelementptr inbounds <16 x i16>, ptr %A, i64 %iv
878 %l.A = load <16 x i16>, ptr %gep.A
879 %trunc = trunc <16 x i16> %l.A to <16 x i8>
880 %gep.dst = getelementptr inbounds <16 x i8>, ptr %dst, i64 %iv
881 store <16 x i8> %trunc, ptr %gep.dst
882 %iv.next = add i64 %iv, 1
883 %ec = icmp eq i64 %iv.next, 1000
884 br i1 %ec, label %loop, label %exit
890 define void @trunc_v8i16_to_v8i8_in_loop(ptr %A, ptr %dst) {
891 ; CHECK-LABEL: trunc_v8i16_to_v8i8_in_loop:
892 ; CHECK: ; %bb.0: ; %entry
893 ; CHECK-NEXT: mov x8, xzr
894 ; CHECK-NEXT: LBB8_1: ; %loop
895 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
896 ; CHECK-NEXT: ldr q0, [x0, x8, lsl #4]
897 ; CHECK-NEXT: xtn.8b v0, v0
898 ; CHECK-NEXT: str d0, [x1, x8, lsl #3]
899 ; CHECK-NEXT: add x8, x8, #1
900 ; CHECK-NEXT: cmp x8, #1000
901 ; CHECK-NEXT: b.eq LBB8_1
902 ; CHECK-NEXT: ; %bb.2: ; %exit
905 ; CHECK-BE-LABEL: trunc_v8i16_to_v8i8_in_loop:
906 ; CHECK-BE: // %bb.0: // %entry
907 ; CHECK-BE-NEXT: mov x8, xzr
908 ; CHECK-BE-NEXT: .LBB8_1: // %loop
909 ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
910 ; CHECK-BE-NEXT: add x9, x0, x8, lsl #4
911 ; CHECK-BE-NEXT: ld1 { v0.8h }, [x9]
912 ; CHECK-BE-NEXT: add x9, x1, x8, lsl #3
913 ; CHECK-BE-NEXT: add x8, x8, #1
914 ; CHECK-BE-NEXT: cmp x8, #1000
915 ; CHECK-BE-NEXT: xtn v0.8b, v0.8h
916 ; CHECK-BE-NEXT: st1 { v0.8b }, [x9]
917 ; CHECK-BE-NEXT: b.eq .LBB8_1
918 ; CHECK-BE-NEXT: // %bb.2: // %exit
921 ; CHECK-DISABLE-LABEL: trunc_v8i16_to_v8i8_in_loop:
922 ; CHECK-DISABLE: // %bb.0: // %entry
923 ; CHECK-DISABLE-NEXT: mov x8, xzr
924 ; CHECK-DISABLE-NEXT: .LBB8_1: // %loop
925 ; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
926 ; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #4
927 ; CHECK-DISABLE-NEXT: ld1 { v0.8h }, [x9]
928 ; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3
929 ; CHECK-DISABLE-NEXT: add x8, x8, #1
930 ; CHECK-DISABLE-NEXT: cmp x8, #1000
931 ; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
932 ; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9]
933 ; CHECK-DISABLE-NEXT: b.eq .LBB8_1
934 ; CHECK-DISABLE-NEXT: // %bb.2: // %exit
935 ; CHECK-DISABLE-NEXT: ret
940 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
941 %gep.A = getelementptr inbounds <8 x i16>, ptr %A, i64 %iv
942 %l.A = load <8 x i16>, ptr %gep.A
943 %trunc = trunc <8 x i16> %l.A to <8 x i8>
944 %gep.dst = getelementptr inbounds <8 x i8>, ptr %dst, i64 %iv
945 store <8 x i8> %trunc, ptr %gep.dst
946 %iv.next = add i64 %iv, 1
947 %ec = icmp eq i64 %iv.next, 1000
948 br i1 %ec, label %loop, label %exit