1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s
3 ; RUN: llc -mtriple=aarch64_be-unknown-linux -o - %s | FileCheck --check-prefix=CHECK-BE %s
4 ; RUN: llc -mtriple=aarch64_be-unknown-linux -aarch64-enable-ext-to-tbl=false -o - %s | FileCheck --check-prefix=CHECK-DISABLE %s
6 ; CHECK-LABEL: lCPI0_0:
7 ; CHECK-NEXT: .byte 0 ; 0x0
8 ; CHECK-NEXT: .byte 4 ; 0x4
9 ; CHECK-NEXT: .byte 8 ; 0x8
10 ; CHECK-NEXT: .byte 12 ; 0xc
11 ; CHECK-NEXT: .byte 16 ; 0x10
12 ; CHECK-NEXT: .byte 20 ; 0x14
13 ; CHECK-NEXT: .byte 24 ; 0x18
14 ; CHECK-NEXT: .byte 28 ; 0x1c
15 ; CHECK-NEXT: .byte 32 ; 0x20
16 ; CHECK-NEXT: .byte 36 ; 0x24
17 ; CHECK-NEXT: .byte 40 ; 0x28
18 ; CHECK-NEXT: .byte 44 ; 0x2c
19 ; CHECK-NEXT: .byte 48 ; 0x30
20 ; CHECK-NEXT: .byte 52 ; 0x34
21 ; CHECK-NEXT: .byte 56 ; 0x38
22 ; CHECK-NEXT: .byte 60 ; 0x3c
24 ; CHECK-BE-LABEL: .LCPI0_0:
25 ; CHECK-BE-NEXT: .byte 3 // 0x3
26 ; CHECK-BE-NEXT: .byte 7 // 0x7
27 ; CHECK-BE-NEXT: .byte 11 // 0xb
28 ; CHECK-BE-NEXT: .byte 15 // 0xf
29 ; CHECK-BE-NEXT: .byte 19 // 0x13
30 ; CHECK-BE-NEXT: .byte 23 // 0x17
31 ; CHECK-BE-NEXT: .byte 27 // 0x1b
32 ; CHECK-BE-NEXT: .byte 31 // 0x1f
33 ; CHECK-BE-NEXT: .byte 35 // 0x23
34 ; CHECK-BE-NEXT: .byte 39 // 0x27
35 ; CHECK-BE-NEXT: .byte 43 // 0x2b
36 ; CHECK-BE-NEXT: .byte 47 // 0x2f
37 ; CHECK-BE-NEXT: .byte 51 // 0x33
38 ; CHECK-BE-NEXT: .byte 55 // 0x37
39 ; CHECK-BE-NEXT: .byte 59 // 0x3b
40 ; CHECK-BE-NEXT: .byte 63 // 0x3f
42 ; It's profitable to use a single tbl.4 instruction to lower the truncate.
43 define void @trunc_v16i32_to_v16i8_in_loop(ptr %A, ptr %dst) {
44 ; CHECK-LABEL: trunc_v16i32_to_v16i8_in_loop:
45 ; CHECK: ; %bb.0: ; %entry
47 ; CHECK-NEXT: adrp x8, lCPI0_0@PAGE
49 ; CHECK-NEXT: ldr q0, [x8, lCPI0_0@PAGEOFF]
50 ; CHECK-NEXT: mov x8, xzr
51 ; CHECK-NEXT: LBB0_1: ; %loop
52 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
53 ; CHECK-NEXT: add x9, x0, x8, lsl #6
54 ; CHECK-NEXT: ldp q1, q2, [x9]
55 ; CHECK-NEXT: ldp q3, q4, [x9, #32]
56 ; CHECK-NEXT: tbl.16b v1, { v1, v2, v3, v4 }, v0
57 ; CHECK-NEXT: str q1, [x1, x8, lsl #4]
58 ; CHECK-NEXT: add x8, x8, #1
59 ; CHECK-NEXT: cmp x8, #1000
60 ; CHECK-NEXT: b.eq LBB0_1
61 ; CHECK-NEXT: ; %bb.2: ; %exit
63 ; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1
65 ; CHECK-BE-LABEL: trunc_v16i32_to_v16i8_in_loop:
66 ; CHECK-BE: // %bb.0: // %entry
67 ; CHECK-BE-NEXT: adrp x8, .LCPI0_0
68 ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI0_0
69 ; CHECK-BE-NEXT: ld1 { v0.16b }, [x8]
70 ; CHECK-BE-NEXT: mov x8, xzr
71 ; CHECK-BE-NEXT: .LBB0_1: // %loop
72 ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
73 ; CHECK-BE-NEXT: add x9, x0, x8, lsl #6
74 ; CHECK-BE-NEXT: add x10, x9, #16
75 ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9]
76 ; CHECK-BE-NEXT: add x11, x9, #32
77 ; CHECK-BE-NEXT: ld1 { v2.16b }, [x10]
78 ; CHECK-BE-NEXT: add x9, x9, #48
79 ; CHECK-BE-NEXT: ld1 { v3.16b }, [x11]
80 ; CHECK-BE-NEXT: ld1 { v4.16b }, [x9]
81 ; CHECK-BE-NEXT: add x9, x1, x8, lsl #4
82 ; CHECK-BE-NEXT: add x8, x8, #1
83 ; CHECK-BE-NEXT: cmp x8, #1000
84 ; CHECK-BE-NEXT: tbl v1.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b
85 ; CHECK-BE-NEXT: st1 { v1.16b }, [x9]
86 ; CHECK-BE-NEXT: b.eq .LBB0_1
87 ; CHECK-BE-NEXT: // %bb.2: // %exit
90 ; CHECK-DISABLE-LABEL: trunc_v16i32_to_v16i8_in_loop:
91 ; CHECK-DISABLE: // %bb.0: // %entry
92 ; CHECK-DISABLE-NEXT: mov x8, xzr
93 ; CHECK-DISABLE-NEXT: .LBB0_1: // %loop
94 ; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
95 ; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #6
96 ; CHECK-DISABLE-NEXT: ld1 { v0.4s }, [x9]
97 ; CHECK-DISABLE-NEXT: add x10, x9, #16
98 ; CHECK-DISABLE-NEXT: add x11, x9, #48
99 ; CHECK-DISABLE-NEXT: add x9, x9, #32
100 ; CHECK-DISABLE-NEXT: ld1 { v1.4s }, [x10]
101 ; CHECK-DISABLE-NEXT: ld1 { v2.4s }, [x11]
102 ; CHECK-DISABLE-NEXT: ld1 { v3.4s }, [x9]
103 ; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #4
104 ; CHECK-DISABLE-NEXT: add x8, x8, #1
105 ; CHECK-DISABLE-NEXT: cmp x8, #1000
106 ; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
107 ; CHECK-DISABLE-NEXT: uzp1 v2.8h, v3.8h, v2.8h
108 ; CHECK-DISABLE-NEXT: uzp1 v0.16b, v0.16b, v2.16b
109 ; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x9]
110 ; CHECK-DISABLE-NEXT: b.eq .LBB0_1
111 ; CHECK-DISABLE-NEXT: // %bb.2: // %exit
112 ; CHECK-DISABLE-NEXT: ret
117 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
118 %gep.A = getelementptr inbounds <16 x i32>, ptr %A, i64 %iv
119 %l.A = load <16 x i32>, ptr %gep.A
120 %trunc = trunc <16 x i32> %l.A to <16 x i8>
121 %gep.dst = getelementptr inbounds <16 x i8>, ptr %dst, i64 %iv
122 store <16 x i8> %trunc, ptr %gep.dst
123 %iv.next = add i64 %iv, 1
124 %ec = icmp eq i64 %iv.next, 1000
125 br i1 %ec, label %loop, label %exit
131 ; Not profitable to use tbl, as materializing the masks requires more
133 define void @trunc_v16i32_to_v16i8_no_loop(ptr %A, ptr %dst) {
134 ; CHECK-LABEL: trunc_v16i32_to_v16i8_no_loop:
135 ; CHECK: ; %bb.0: ; %entry
136 ; CHECK-NEXT: ldp q1, q0, [x0]
137 ; CHECK-NEXT: ldp q3, q2, [x0, #32]
138 ; CHECK-NEXT: uzp1.8h v0, v1, v0
139 ; CHECK-NEXT: uzp1.8h v2, v3, v2
140 ; CHECK-NEXT: uzp1.16b v0, v0, v2
141 ; CHECK-NEXT: str q0, [x1]
144 ; CHECK-BE-LABEL: trunc_v16i32_to_v16i8_no_loop:
145 ; CHECK-BE: // %bb.0: // %entry
146 ; CHECK-BE-NEXT: add x8, x0, #16
147 ; CHECK-BE-NEXT: add x9, x0, #48
148 ; CHECK-BE-NEXT: add x10, x0, #32
149 ; CHECK-BE-NEXT: ld1 { v0.4s }, [x0]
150 ; CHECK-BE-NEXT: ld1 { v1.4s }, [x8]
151 ; CHECK-BE-NEXT: ld1 { v2.4s }, [x9]
152 ; CHECK-BE-NEXT: ld1 { v3.4s }, [x10]
153 ; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
154 ; CHECK-BE-NEXT: uzp1 v2.8h, v3.8h, v2.8h
155 ; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v2.16b
156 ; CHECK-BE-NEXT: st1 { v0.16b }, [x1]
159 ; CHECK-DISABLE-LABEL: trunc_v16i32_to_v16i8_no_loop:
160 ; CHECK-DISABLE: // %bb.0: // %entry
161 ; CHECK-DISABLE-NEXT: add x8, x0, #16
162 ; CHECK-DISABLE-NEXT: add x9, x0, #48
163 ; CHECK-DISABLE-NEXT: add x10, x0, #32
164 ; CHECK-DISABLE-NEXT: ld1 { v0.4s }, [x0]
165 ; CHECK-DISABLE-NEXT: ld1 { v1.4s }, [x8]
166 ; CHECK-DISABLE-NEXT: ld1 { v2.4s }, [x9]
167 ; CHECK-DISABLE-NEXT: ld1 { v3.4s }, [x10]
168 ; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
169 ; CHECK-DISABLE-NEXT: uzp1 v2.8h, v3.8h, v2.8h
170 ; CHECK-DISABLE-NEXT: uzp1 v0.16b, v0.16b, v2.16b
171 ; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x1]
172 ; CHECK-DISABLE-NEXT: ret
174 %l.A = load <16 x i32>, ptr %A
175 %trunc = trunc <16 x i32> %l.A to <16 x i8>
176 store <16 x i8> %trunc, ptr %dst
181 ; CHECK-LABEL: lCPI2_0:
182 ; CHECK-NEXT: .byte 0 ; 0x0
183 ; CHECK-NEXT: .byte 4 ; 0x4
184 ; CHECK-NEXT: .byte 8 ; 0x8
185 ; CHECK-NEXT: .byte 12 ; 0xc
186 ; CHECK-NEXT: .byte 16 ; 0x10
187 ; CHECK-NEXT: .byte 20 ; 0x14
188 ; CHECK-NEXT: .byte 24 ; 0x18
189 ; CHECK-NEXT: .byte 28 ; 0x1c
190 ; CHECK-NEXT: .byte 255 ; 0xff
191 ; CHECK-NEXT: .byte 255 ; 0xff
192 ; CHECK-NEXT: .byte 255 ; 0xff
193 ; CHECK-NEXT: .byte 255 ; 0xff
194 ; CHECK-NEXT: .byte 255 ; 0xff
195 ; CHECK-NEXT: .byte 255 ; 0xff
196 ; CHECK-NEXT: .byte 255 ; 0xff
197 ; CHECK-NEXT: .byte 255 ; 0xff
199 ; CHECK-BE-LABEL: .LCPI2_0:
200 ; CHECK-BE-NEXT: .byte 3 // 0x3
201 ; CHECK-BE-NEXT: .byte 7 // 0x7
202 ; CHECK-BE-NEXT: .byte 11 // 0xb
203 ; CHECK-BE-NEXT: .byte 15 // 0xf
204 ; CHECK-BE-NEXT: .byte 19 // 0x13
205 ; CHECK-BE-NEXT: .byte 23 // 0x17
206 ; CHECK-BE-NEXT: .byte 27 // 0x1b
207 ; CHECK-BE-NEXT: .byte 31 // 0x1f
208 ; CHECK-BE-NEXT: .byte 255 // 0xff
209 ; CHECK-BE-NEXT: .byte 255 // 0xff
210 ; CHECK-BE-NEXT: .byte 255 // 0xff
211 ; CHECK-BE-NEXT: .byte 255 // 0xff
212 ; CHECK-BE-NEXT: .byte 255 // 0xff
213 ; CHECK-BE-NEXT: .byte 255 // 0xff
214 ; CHECK-BE-NEXT: .byte 255 // 0xff
215 ; CHECK-BE-NEXT: .byte 255 // 0xff
216 ; It's profitable to use a single tbl.2 instruction to lower the truncate.
217 define void @trunc_v8i32_to_v8i8_in_loop(ptr %A, ptr %dst) {
218 ; CHECK-LABEL: trunc_v8i32_to_v8i8_in_loop:
219 ; CHECK: ; %bb.0: ; %entry
221 ; CHECK-NEXT: adrp x8, lCPI2_0@PAGE
223 ; CHECK-NEXT: ldr q0, [x8, lCPI2_0@PAGEOFF]
224 ; CHECK-NEXT: mov x8, xzr
225 ; CHECK-NEXT: LBB2_1: ; %loop
226 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
227 ; CHECK-NEXT: add x9, x0, x8, lsl #5
228 ; CHECK-NEXT: ldp q1, q2, [x9]
229 ; CHECK-NEXT: tbl.16b v1, { v1, v2 }, v0
230 ; CHECK-NEXT: str d1, [x1, x8, lsl #3]
231 ; CHECK-NEXT: add x8, x8, #1
232 ; CHECK-NEXT: cmp x8, #1000
233 ; CHECK-NEXT: b.eq LBB2_1
234 ; CHECK-NEXT: ; %bb.2: ; %exit
236 ; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh3
238 ; CHECK-BE-LABEL: trunc_v8i32_to_v8i8_in_loop:
239 ; CHECK-BE: // %bb.0: // %entry
240 ; CHECK-BE-NEXT: adrp x8, .LCPI2_0
241 ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI2_0
242 ; CHECK-BE-NEXT: ld1 { v0.16b }, [x8]
243 ; CHECK-BE-NEXT: mov x8, xzr
244 ; CHECK-BE-NEXT: .LBB2_1: // %loop
245 ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
246 ; CHECK-BE-NEXT: add x9, x0, x8, lsl #5
247 ; CHECK-BE-NEXT: add x10, x9, #16
248 ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9]
249 ; CHECK-BE-NEXT: add x9, x1, x8, lsl #3
250 ; CHECK-BE-NEXT: ld1 { v2.16b }, [x10]
251 ; CHECK-BE-NEXT: add x8, x8, #1
252 ; CHECK-BE-NEXT: cmp x8, #1000
253 ; CHECK-BE-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v0.16b
254 ; CHECK-BE-NEXT: st1 { v1.8b }, [x9]
255 ; CHECK-BE-NEXT: b.eq .LBB2_1
256 ; CHECK-BE-NEXT: // %bb.2: // %exit
259 ; CHECK-DISABLE-LABEL: trunc_v8i32_to_v8i8_in_loop:
260 ; CHECK-DISABLE: // %bb.0: // %entry
261 ; CHECK-DISABLE-NEXT: mov x8, xzr
262 ; CHECK-DISABLE-NEXT: .LBB2_1: // %loop
263 ; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
264 ; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #5
265 ; CHECK-DISABLE-NEXT: add x10, x9, #16
266 ; CHECK-DISABLE-NEXT: ld1 { v0.4s }, [x9]
267 ; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3
268 ; CHECK-DISABLE-NEXT: ld1 { v1.4s }, [x10]
269 ; CHECK-DISABLE-NEXT: add x8, x8, #1
270 ; CHECK-DISABLE-NEXT: cmp x8, #1000
271 ; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
272 ; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
273 ; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9]
274 ; CHECK-DISABLE-NEXT: b.eq .LBB2_1
275 ; CHECK-DISABLE-NEXT: // %bb.2: // %exit
276 ; CHECK-DISABLE-NEXT: ret
281 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
282 %gep.A = getelementptr inbounds <8 x i32>, ptr %A, i64 %iv
283 %l.A = load <8 x i32>, ptr %gep.A
284 %trunc = trunc <8 x i32> %l.A to <8 x i8>
285 %gep.dst = getelementptr inbounds <8 x i8>, ptr %dst, i64 %iv
286 store <8 x i8> %trunc, ptr %gep.dst
287 %iv.next = add i64 %iv, 1
288 %ec = icmp eq i64 %iv.next, 1000
289 br i1 %ec, label %loop, label %exit
295 ; CHECK-LABEL: lCPI3_0:
296 ; CHECK-NEXT: .byte 0 ; 0x0
297 ; CHECK-NEXT: .byte 8 ; 0x8
298 ; CHECK-NEXT: .byte 16 ; 0x10
299 ; CHECK-NEXT: .byte 24 ; 0x18
300 ; CHECK-NEXT: .byte 32 ; 0x20
301 ; CHECK-NEXT: .byte 40 ; 0x28
302 ; CHECK-NEXT: .byte 48 ; 0x30
303 ; CHECK-NEXT: .byte 56 ; 0x38
304 ; CHECK-NEXT: .byte 64 ; 0x40
305 ; CHECK-NEXT: .byte 72 ; 0x48
306 ; CHECK-NEXT: .byte 80 ; 0x50
307 ; CHECK-NEXT: .byte 88 ; 0x58
308 ; CHECK-NEXT: .byte 96 ; 0x60
309 ; CHECK-NEXT: .byte 104 ; 0x68
310 ; CHECK-NEXT: .byte 112 ; 0x70
311 ; CHECK-NEXT: .byte 120 ; 0x78
313 ; CHECK-BE-LABEL: .LCPI3_0:
314 ; CHECK-BE-NEXT: .byte 7 // 0x7
315 ; CHECK-BE-NEXT: .byte 15 // 0xf
316 ; CHECK-BE-NEXT: .byte 23 // 0x17
317 ; CHECK-BE-NEXT: .byte 31 // 0x1f
318 ; CHECK-BE-NEXT: .byte 39 // 0x27
319 ; CHECK-BE-NEXT: .byte 47 // 0x2f
320 ; CHECK-BE-NEXT: .byte 55 // 0x37
321 ; CHECK-BE-NEXT: .byte 63 // 0x3f
322 ; CHECK-BE-NEXT: .byte 71 // 0x47
323 ; CHECK-BE-NEXT: .byte 79 // 0x4f
324 ; CHECK-BE-NEXT: .byte 87 // 0x57
325 ; CHECK-BE-NEXT: .byte 95 // 0x5f
326 ; CHECK-BE-NEXT: .byte 103 // 0x67
327 ; CHECK-BE-NEXT: .byte 111 // 0x6f
328 ; CHECK-BE-NEXT: .byte 119 // 0x77
329 ; CHECK-BE-NEXT: .byte 127 // 0x7f
330 define void @trunc_v16i64_to_v16i8_in_loop(ptr %A, ptr %dst) {
331 ; CHECK-LABEL: trunc_v16i64_to_v16i8_in_loop:
332 ; CHECK: ; %bb.0: ; %entry
334 ; CHECK-NEXT: adrp x8, lCPI3_0@PAGE
336 ; CHECK-NEXT: ldr q0, [x8, lCPI3_0@PAGEOFF]
337 ; CHECK-NEXT: mov x8, xzr
338 ; CHECK-NEXT: LBB3_1: ; %loop
339 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
340 ; CHECK-NEXT: add x9, x0, x8, lsl #7
341 ; CHECK-NEXT: ldp q1, q2, [x9]
342 ; CHECK-NEXT: ldp q16, q17, [x9, #64]
343 ; CHECK-NEXT: ldp q3, q4, [x9, #32]
344 ; CHECK-NEXT: ldp q18, q19, [x9, #96]
345 ; CHECK-NEXT: tbl.16b v1, { v1, v2, v3, v4 }, v0
346 ; CHECK-NEXT: tbl.16b v2, { v16, v17, v18, v19 }, v0
347 ; CHECK-NEXT: mov.d v1[1], v2[0]
348 ; CHECK-NEXT: str q1, [x1, x8, lsl #4]
349 ; CHECK-NEXT: add x8, x8, #1
350 ; CHECK-NEXT: cmp x8, #1000
351 ; CHECK-NEXT: b.eq LBB3_1
352 ; CHECK-NEXT: ; %bb.2: ; %exit
354 ; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh5
356 ; CHECK-BE-LABEL: trunc_v16i64_to_v16i8_in_loop:
357 ; CHECK-BE: // %bb.0: // %entry
358 ; CHECK-BE-NEXT: adrp x8, .LCPI3_0
359 ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI3_0
360 ; CHECK-BE-NEXT: ld1 { v0.16b }, [x8]
361 ; CHECK-BE-NEXT: mov x8, xzr
362 ; CHECK-BE-NEXT: .LBB3_1: // %loop
363 ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
364 ; CHECK-BE-NEXT: add x9, x0, x8, lsl #7
365 ; CHECK-BE-NEXT: add x13, x9, #64
366 ; CHECK-BE-NEXT: add x12, x9, #80
367 ; CHECK-BE-NEXT: add x14, x9, #16
368 ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9]
369 ; CHECK-BE-NEXT: ld1 { v16.16b }, [x13]
370 ; CHECK-BE-NEXT: add x11, x9, #96
371 ; CHECK-BE-NEXT: add x13, x9, #32
372 ; CHECK-BE-NEXT: ld1 { v2.16b }, [x14]
373 ; CHECK-BE-NEXT: ld1 { v17.16b }, [x12]
374 ; CHECK-BE-NEXT: add x10, x9, #112
375 ; CHECK-BE-NEXT: add x9, x9, #48
376 ; CHECK-BE-NEXT: ld1 { v3.16b }, [x13]
377 ; CHECK-BE-NEXT: ld1 { v18.16b }, [x11]
378 ; CHECK-BE-NEXT: ld1 { v4.16b }, [x9]
379 ; CHECK-BE-NEXT: add x9, x1, x8, lsl #4
380 ; CHECK-BE-NEXT: ld1 { v19.16b }, [x10]
381 ; CHECK-BE-NEXT: add x8, x8, #1
382 ; CHECK-BE-NEXT: cmp x8, #1000
383 ; CHECK-BE-NEXT: tbl v1.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b
384 ; CHECK-BE-NEXT: tbl v2.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.16b
385 ; CHECK-BE-NEXT: mov v1.d[1], v2.d[0]
386 ; CHECK-BE-NEXT: st1 { v1.16b }, [x9]
387 ; CHECK-BE-NEXT: b.eq .LBB3_1
388 ; CHECK-BE-NEXT: // %bb.2: // %exit
391 ; CHECK-DISABLE-LABEL: trunc_v16i64_to_v16i8_in_loop:
392 ; CHECK-DISABLE: // %bb.0: // %entry
393 ; CHECK-DISABLE-NEXT: mov x8, xzr
394 ; CHECK-DISABLE-NEXT: .LBB3_1: // %loop
395 ; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
396 ; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #7
397 ; CHECK-DISABLE-NEXT: add x10, x9, #16
398 ; CHECK-DISABLE-NEXT: add x11, x9, #48
399 ; CHECK-DISABLE-NEXT: ld1 { v0.2d }, [x9]
400 ; CHECK-DISABLE-NEXT: ld1 { v1.2d }, [x10]
401 ; CHECK-DISABLE-NEXT: add x10, x9, #112
402 ; CHECK-DISABLE-NEXT: ld1 { v2.2d }, [x11]
403 ; CHECK-DISABLE-NEXT: ld1 { v3.2d }, [x10]
404 ; CHECK-DISABLE-NEXT: add x10, x9, #96
405 ; CHECK-DISABLE-NEXT: add x11, x9, #32
406 ; CHECK-DISABLE-NEXT: ld1 { v4.2d }, [x10]
407 ; CHECK-DISABLE-NEXT: add x10, x9, #80
408 ; CHECK-DISABLE-NEXT: add x9, x9, #64
409 ; CHECK-DISABLE-NEXT: ld1 { v5.2d }, [x11]
410 ; CHECK-DISABLE-NEXT: ld1 { v6.2d }, [x10]
411 ; CHECK-DISABLE-NEXT: ld1 { v7.2d }, [x9]
412 ; CHECK-DISABLE-NEXT: uzp1 v0.4s, v0.4s, v1.4s
413 ; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #4
414 ; CHECK-DISABLE-NEXT: add x8, x8, #1
415 ; CHECK-DISABLE-NEXT: uzp1 v3.4s, v4.4s, v3.4s
416 ; CHECK-DISABLE-NEXT: cmp x8, #1000
417 ; CHECK-DISABLE-NEXT: uzp1 v4.4s, v7.4s, v6.4s
418 ; CHECK-DISABLE-NEXT: uzp1 v2.4s, v5.4s, v2.4s
419 ; CHECK-DISABLE-NEXT: uzp1 v1.8h, v4.8h, v3.8h
420 ; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
421 ; CHECK-DISABLE-NEXT: uzp1 v0.16b, v0.16b, v1.16b
422 ; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x9]
423 ; CHECK-DISABLE-NEXT: b.eq .LBB3_1
424 ; CHECK-DISABLE-NEXT: // %bb.2: // %exit
425 ; CHECK-DISABLE-NEXT: ret
430 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
431 %gep.A = getelementptr inbounds <16 x i64>, ptr %A, i64 %iv
432 %l.A = load <16 x i64>, ptr %gep.A
433 %trunc = trunc <16 x i64> %l.A to <16 x i8>
434 %gep.dst = getelementptr inbounds <16 x i8>, ptr %dst, i64 %iv
435 store <16 x i8> %trunc, ptr %gep.dst
436 %iv.next = add i64 %iv, 1
437 %ec = icmp eq i64 %iv.next, 1000
438 br i1 %ec, label %loop, label %exit
444 ; CHECK-LABEL: lCPI4_0:
445 ; CHECK-NEXT: .byte 0 ; 0x0
446 ; CHECK-NEXT: .byte 8 ; 0x8
447 ; CHECK-NEXT: .byte 16 ; 0x10
448 ; CHECK-NEXT: .byte 24 ; 0x18
449 ; CHECK-NEXT: .byte 32 ; 0x20
450 ; CHECK-NEXT: .byte 40 ; 0x28
451 ; CHECK-NEXT: .byte 48 ; 0x30
452 ; CHECK-NEXT: .byte 56 ; 0x38
453 ; CHECK-NEXT: .byte 255 ; 0xff
454 ; CHECK-NEXT: .byte 255 ; 0xff
455 ; CHECK-NEXT: .byte 255 ; 0xff
456 ; CHECK-NEXT: .byte 255 ; 0xff
457 ; CHECK-NEXT: .byte 255 ; 0xff
458 ; CHECK-NEXT: .byte 255 ; 0xff
459 ; CHECK-NEXT: .byte 255 ; 0xff
460 ; CHECK-NEXT: .byte 255 ; 0xff
462 ; CHECK-BE-LABEL: .LCPI4_0:
463 ; CHECK-BE-NEXT: .byte 7 // 0x7
464 ; CHECK-BE-NEXT: .byte 15 // 0xf
465 ; CHECK-BE-NEXT: .byte 23 // 0x17
466 ; CHECK-BE-NEXT: .byte 31 // 0x1f
467 ; CHECK-BE-NEXT: .byte 39 // 0x27
468 ; CHECK-BE-NEXT: .byte 47 // 0x2f
469 ; CHECK-BE-NEXT: .byte 55 // 0x37
470 ; CHECK-BE-NEXT: .byte 63 // 0x3f
471 ; CHECK-BE-NEXT: .byte 255 // 0xff
472 ; CHECK-BE-NEXT: .byte 255 // 0xff
473 ; CHECK-BE-NEXT: .byte 255 // 0xff
474 ; CHECK-BE-NEXT: .byte 255 // 0xff
475 ; CHECK-BE-NEXT: .byte 255 // 0xff
476 ; CHECK-BE-NEXT: .byte 255 // 0xff
477 ; CHECK-BE-NEXT: .byte 255 // 0xff
478 ; CHECK-BE-NEXT: .byte 255 // 0xff
479 define void @trunc_v8i64_to_v8i8_in_loop(ptr %A, ptr %dst) {
480 ; CHECK-LABEL: trunc_v8i64_to_v8i8_in_loop:
481 ; CHECK: ; %bb.0: ; %entry
483 ; CHECK-NEXT: adrp x8, lCPI4_0@PAGE
485 ; CHECK-NEXT: ldr q0, [x8, lCPI4_0@PAGEOFF]
486 ; CHECK-NEXT: mov x8, xzr
487 ; CHECK-NEXT: LBB4_1: ; %loop
488 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
489 ; CHECK-NEXT: add x9, x0, x8, lsl #6
490 ; CHECK-NEXT: ldp q1, q2, [x9]
491 ; CHECK-NEXT: ldp q3, q4, [x9, #32]
492 ; CHECK-NEXT: tbl.16b v1, { v1, v2, v3, v4 }, v0
493 ; CHECK-NEXT: str d1, [x1, x8, lsl #3]
494 ; CHECK-NEXT: add x8, x8, #1
495 ; CHECK-NEXT: cmp x8, #1000
496 ; CHECK-NEXT: b.eq LBB4_1
497 ; CHECK-NEXT: ; %bb.2: ; %exit
499 ; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh7
501 ; CHECK-BE-LABEL: trunc_v8i64_to_v8i8_in_loop:
502 ; CHECK-BE: // %bb.0: // %entry
503 ; CHECK-BE-NEXT: adrp x8, .LCPI4_0
504 ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI4_0
505 ; CHECK-BE-NEXT: ld1 { v0.16b }, [x8]
506 ; CHECK-BE-NEXT: mov x8, xzr
507 ; CHECK-BE-NEXT: .LBB4_1: // %loop
508 ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
509 ; CHECK-BE-NEXT: add x9, x0, x8, lsl #6
510 ; CHECK-BE-NEXT: add x10, x9, #16
511 ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9]
512 ; CHECK-BE-NEXT: add x11, x9, #32
513 ; CHECK-BE-NEXT: ld1 { v2.16b }, [x10]
514 ; CHECK-BE-NEXT: add x9, x9, #48
515 ; CHECK-BE-NEXT: ld1 { v3.16b }, [x11]
516 ; CHECK-BE-NEXT: ld1 { v4.16b }, [x9]
517 ; CHECK-BE-NEXT: add x9, x1, x8, lsl #3
518 ; CHECK-BE-NEXT: add x8, x8, #1
519 ; CHECK-BE-NEXT: cmp x8, #1000
520 ; CHECK-BE-NEXT: tbl v1.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b
521 ; CHECK-BE-NEXT: st1 { v1.8b }, [x9]
522 ; CHECK-BE-NEXT: b.eq .LBB4_1
523 ; CHECK-BE-NEXT: // %bb.2: // %exit
526 ; CHECK-DISABLE-LABEL: trunc_v8i64_to_v8i8_in_loop:
527 ; CHECK-DISABLE: // %bb.0: // %entry
528 ; CHECK-DISABLE-NEXT: mov x8, xzr
529 ; CHECK-DISABLE-NEXT: .LBB4_1: // %loop
530 ; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
531 ; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #6
532 ; CHECK-DISABLE-NEXT: ld1 { v0.2d }, [x9]
533 ; CHECK-DISABLE-NEXT: add x10, x9, #16
534 ; CHECK-DISABLE-NEXT: add x11, x9, #48
535 ; CHECK-DISABLE-NEXT: add x9, x9, #32
536 ; CHECK-DISABLE-NEXT: ld1 { v1.2d }, [x10]
537 ; CHECK-DISABLE-NEXT: ld1 { v2.2d }, [x11]
538 ; CHECK-DISABLE-NEXT: ld1 { v3.2d }, [x9]
539 ; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3
540 ; CHECK-DISABLE-NEXT: add x8, x8, #1
541 ; CHECK-DISABLE-NEXT: cmp x8, #1000
542 ; CHECK-DISABLE-NEXT: uzp1 v0.4s, v0.4s, v1.4s
543 ; CHECK-DISABLE-NEXT: uzp1 v2.4s, v3.4s, v2.4s
544 ; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
545 ; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
546 ; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9]
547 ; CHECK-DISABLE-NEXT: b.eq .LBB4_1
548 ; CHECK-DISABLE-NEXT: // %bb.2: // %exit
549 ; CHECK-DISABLE-NEXT: ret
554 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
555 %gep.A = getelementptr inbounds <8 x i64>, ptr %A, i64 %iv
556 %l.A = load <8 x i64>, ptr %gep.A
557 %trunc = trunc <8 x i64> %l.A to <8 x i8>
558 %gep.dst = getelementptr inbounds <8 x i8>, ptr %dst, i64 %iv
559 store <8 x i8> %trunc, ptr %gep.dst
560 %iv.next = add i64 %iv, 1
561 %ec = icmp eq i64 %iv.next, 1000
562 br i1 %ec, label %loop, label %exit
568 define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
569 ; CHECK-LABEL: trunc_v8i19_to_v8i8_in_loop:
570 ; CHECK: ; %bb.0: ; %entry
571 ; CHECK-NEXT: mov x8, xzr
572 ; CHECK-NEXT: LBB5_1: ; %loop
573 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
574 ; CHECK-NEXT: ldp x9, x10, [x0]
575 ; CHECK-NEXT: ldrb w14, [x0, #18]
576 ; CHECK-NEXT: ldrh w15, [x0, #16]
577 ; CHECK-NEXT: add x0, x0, #32
578 ; CHECK-NEXT: lsr w12, w10, #12
579 ; CHECK-NEXT: fmov s1, w9
580 ; CHECK-NEXT: lsr x11, x9, #19
581 ; CHECK-NEXT: lsr x13, x10, #31
582 ; CHECK-NEXT: fmov s0, w12
583 ; CHECK-NEXT: lsr x12, x9, #38
584 ; CHECK-NEXT: extr x9, x10, x9, #57
585 ; CHECK-NEXT: mov.s v1[1], w11
586 ; CHECK-NEXT: orr x11, x15, x14, lsl #16
587 ; CHECK-NEXT: mov.s v0[1], w13
588 ; CHECK-NEXT: extr x13, x11, x10, #50
589 ; CHECK-NEXT: lsr w10, w11, #5
590 ; CHECK-NEXT: mov.s v1[2], w12
591 ; CHECK-NEXT: mov.s v0[2], w13
592 ; CHECK-NEXT: mov.s v1[3], w9
593 ; CHECK-NEXT: mov.s v0[3], w10
594 ; CHECK-NEXT: uzp1.8h v0, v1, v0
595 ; CHECK-NEXT: xtn.8b v0, v0
596 ; CHECK-NEXT: str d0, [x1, x8, lsl #3]
597 ; CHECK-NEXT: add x8, x8, #1
598 ; CHECK-NEXT: cmp x8, #1000
599 ; CHECK-NEXT: b.eq LBB5_1
600 ; CHECK-NEXT: ; %bb.2: ; %exit
603 ; CHECK-BE-LABEL: trunc_v8i19_to_v8i8_in_loop:
604 ; CHECK-BE: // %bb.0: // %entry
605 ; CHECK-BE-NEXT: mov x8, xzr
606 ; CHECK-BE-NEXT: .LBB5_1: // %loop
607 ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
608 ; CHECK-BE-NEXT: ldp x10, x9, [x0]
609 ; CHECK-BE-NEXT: ldrh w16, [x0, #16]
610 ; CHECK-BE-NEXT: ldrb w17, [x0, #18]
611 ; CHECK-BE-NEXT: add x0, x0, #32
612 ; CHECK-BE-NEXT: lsl x11, x9, #24
613 ; CHECK-BE-NEXT: lsr x12, x9, #40
614 ; CHECK-BE-NEXT: lsr x13, x10, #45
615 ; CHECK-BE-NEXT: lsl x14, x10, #24
616 ; CHECK-BE-NEXT: lsr x15, x10, #40
617 ; CHECK-BE-NEXT: extr x12, x12, x11, #57
618 ; CHECK-BE-NEXT: fmov s0, w13
619 ; CHECK-BE-NEXT: lsr w13, w10, #7
620 ; CHECK-BE-NEXT: extr x14, x15, x14, #50
621 ; CHECK-BE-NEXT: lsr w15, w9, #14
622 ; CHECK-BE-NEXT: extr x9, x10, x9, #40
623 ; CHECK-BE-NEXT: fmov s1, w12
624 ; CHECK-BE-NEXT: orr w12, w17, w16, lsl #8
625 ; CHECK-BE-NEXT: mov v0.s[1], w14
626 ; CHECK-BE-NEXT: lsr w9, w9, #12
627 ; CHECK-BE-NEXT: orr w11, w12, w11
628 ; CHECK-BE-NEXT: mov v1.s[1], w15
629 ; CHECK-BE-NEXT: lsr w11, w11, #19
630 ; CHECK-BE-NEXT: mov v0.s[2], w13
631 ; CHECK-BE-NEXT: mov v1.s[2], w11
632 ; CHECK-BE-NEXT: mov v0.s[3], w9
633 ; CHECK-BE-NEXT: add x9, x1, x8, lsl #3
634 ; CHECK-BE-NEXT: add x8, x8, #1
635 ; CHECK-BE-NEXT: cmp x8, #1000
636 ; CHECK-BE-NEXT: mov v1.s[3], w12
637 ; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
638 ; CHECK-BE-NEXT: xtn v0.8b, v0.8h
639 ; CHECK-BE-NEXT: st1 { v0.8b }, [x9]
640 ; CHECK-BE-NEXT: b.eq .LBB5_1
641 ; CHECK-BE-NEXT: // %bb.2: // %exit
644 ; CHECK-DISABLE-LABEL: trunc_v8i19_to_v8i8_in_loop:
645 ; CHECK-DISABLE: // %bb.0: // %entry
646 ; CHECK-DISABLE-NEXT: mov x8, xzr
647 ; CHECK-DISABLE-NEXT: .LBB5_1: // %loop
648 ; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
649 ; CHECK-DISABLE-NEXT: ldp x10, x9, [x0]
650 ; CHECK-DISABLE-NEXT: ldrh w16, [x0, #16]
651 ; CHECK-DISABLE-NEXT: ldrb w17, [x0, #18]
652 ; CHECK-DISABLE-NEXT: add x0, x0, #32
653 ; CHECK-DISABLE-NEXT: lsl x11, x9, #24
654 ; CHECK-DISABLE-NEXT: lsr x12, x9, #40
655 ; CHECK-DISABLE-NEXT: lsr x13, x10, #45
656 ; CHECK-DISABLE-NEXT: lsl x14, x10, #24
657 ; CHECK-DISABLE-NEXT: lsr x15, x10, #40
658 ; CHECK-DISABLE-NEXT: extr x12, x12, x11, #57
659 ; CHECK-DISABLE-NEXT: fmov s0, w13
660 ; CHECK-DISABLE-NEXT: lsr w13, w10, #7
661 ; CHECK-DISABLE-NEXT: extr x14, x15, x14, #50
662 ; CHECK-DISABLE-NEXT: lsr w15, w9, #14
663 ; CHECK-DISABLE-NEXT: extr x9, x10, x9, #40
664 ; CHECK-DISABLE-NEXT: fmov s1, w12
665 ; CHECK-DISABLE-NEXT: orr w12, w17, w16, lsl #8
666 ; CHECK-DISABLE-NEXT: mov v0.s[1], w14
667 ; CHECK-DISABLE-NEXT: lsr w9, w9, #12
668 ; CHECK-DISABLE-NEXT: orr w11, w12, w11
669 ; CHECK-DISABLE-NEXT: mov v1.s[1], w15
670 ; CHECK-DISABLE-NEXT: lsr w11, w11, #19
671 ; CHECK-DISABLE-NEXT: mov v0.s[2], w13
672 ; CHECK-DISABLE-NEXT: mov v1.s[2], w11
673 ; CHECK-DISABLE-NEXT: mov v0.s[3], w9
674 ; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3
675 ; CHECK-DISABLE-NEXT: add x8, x8, #1
676 ; CHECK-DISABLE-NEXT: cmp x8, #1000
677 ; CHECK-DISABLE-NEXT: mov v1.s[3], w12
678 ; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
679 ; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
680 ; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9]
681 ; CHECK-DISABLE-NEXT: b.eq .LBB5_1
682 ; CHECK-DISABLE-NEXT: // %bb.2: // %exit
683 ; CHECK-DISABLE-NEXT: ret
688 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
689 %gep.A = getelementptr inbounds <8 x i19>, ptr %A, i64 %iv
690 %l.A = load <8 x i19>, ptr %gep.A
691 %trunc = trunc <8 x i19> %l.A to <8 x i8>
692 %gep.dst = getelementptr inbounds <8 x i8>, ptr %dst, i64 %iv
693 store <8 x i8> %trunc, ptr %gep.dst
694 %iv.next = add i64 %iv, 1
695 %ec = icmp eq i64 %iv.next, 1000
696 br i1 %ec, label %loop, label %exit
702 define void @trunc_v11i64_to_v11i8_in_loop(ptr %A, ptr %dst) {
703 ; CHECK-LABEL: trunc_v11i64_to_v11i8_in_loop:
704 ; CHECK: ; %bb.0: ; %entry
705 ; CHECK-NEXT: mov w8, #1000 ; =0x3e8
706 ; CHECK-NEXT: LBB6_1: ; %loop
707 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
708 ; CHECK-NEXT: ldp q4, q0, [x0, #48]
709 ; CHECK-NEXT: add x9, x1, #10
710 ; CHECK-NEXT: ldr d1, [x0, #80]
711 ; CHECK-NEXT: ldp q3, q2, [x0]
712 ; CHECK-NEXT: ldr q5, [x0, #32]
713 ; CHECK-NEXT: subs x8, x8, #1
714 ; CHECK-NEXT: add x0, x0, #128
715 ; CHECK-NEXT: uzp1.4s v0, v0, v1
716 ; CHECK-NEXT: uzp1.4s v1, v5, v4
717 ; CHECK-NEXT: uzp1.4s v2, v3, v2
718 ; CHECK-NEXT: xtn.4h v0, v0
719 ; CHECK-NEXT: uzp1.8h v1, v2, v1
720 ; CHECK-NEXT: uzp1.8b v2, v0, v0
721 ; CHECK-NEXT: uzp1.16b v0, v1, v0
722 ; CHECK-NEXT: st1.b { v2 }[2], [x9]
723 ; CHECK-NEXT: add x9, x1, #8
724 ; CHECK-NEXT: st1.h { v0 }[4], [x9]
725 ; CHECK-NEXT: str d0, [x1], #16
726 ; CHECK-NEXT: b.eq LBB6_1
727 ; CHECK-NEXT: ; %bb.2: ; %exit
730 ; CHECK-BE-LABEL: trunc_v11i64_to_v11i8_in_loop:
731 ; CHECK-BE: // %bb.0: // %entry
732 ; CHECK-BE-NEXT: mov w8, #1000 // =0x3e8
733 ; CHECK-BE-NEXT: .LBB6_1: // %loop
734 ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
735 ; CHECK-BE-NEXT: add x9, x0, #64
736 ; CHECK-BE-NEXT: add x10, x0, #16
737 ; CHECK-BE-NEXT: ld1 { v3.2d }, [x0]
738 ; CHECK-BE-NEXT: ld1 { v0.2d }, [x9]
739 ; CHECK-BE-NEXT: add x9, x0, #48
740 ; CHECK-BE-NEXT: ld1 { v1.2d }, [x10]
741 ; CHECK-BE-NEXT: add x10, x0, #32
742 ; CHECK-BE-NEXT: ld1 { v2.2d }, [x9]
743 ; CHECK-BE-NEXT: ldr d5, [x0, #80]
744 ; CHECK-BE-NEXT: ld1 { v4.2d }, [x10]
745 ; CHECK-BE-NEXT: add x9, x1, #10
746 ; CHECK-BE-NEXT: subs x8, x8, #1
747 ; CHECK-BE-NEXT: uzp1 v1.4s, v3.4s, v1.4s
748 ; CHECK-BE-NEXT: uzp1 v0.4s, v0.4s, v5.4s
749 ; CHECK-BE-NEXT: add x0, x0, #128
750 ; CHECK-BE-NEXT: uzp1 v2.4s, v4.4s, v2.4s
751 ; CHECK-BE-NEXT: xtn v0.4h, v0.4s
752 ; CHECK-BE-NEXT: uzp1 v1.8h, v1.8h, v2.8h
753 ; CHECK-BE-NEXT: uzp1 v1.16b, v1.16b, v0.16b
754 ; CHECK-BE-NEXT: uzp1 v0.8b, v0.8b, v0.8b
755 ; CHECK-BE-NEXT: rev16 v2.16b, v1.16b
756 ; CHECK-BE-NEXT: rev64 v1.16b, v1.16b
757 ; CHECK-BE-NEXT: st1 { v0.b }[2], [x9]
758 ; CHECK-BE-NEXT: add x9, x1, #8
759 ; CHECK-BE-NEXT: st1 { v2.h }[4], [x9]
760 ; CHECK-BE-NEXT: str d1, [x1], #16
761 ; CHECK-BE-NEXT: b.eq .LBB6_1
762 ; CHECK-BE-NEXT: // %bb.2: // %exit
765 ; CHECK-DISABLE-LABEL: trunc_v11i64_to_v11i8_in_loop:
766 ; CHECK-DISABLE: // %bb.0: // %entry
767 ; CHECK-DISABLE-NEXT: mov w8, #1000 // =0x3e8
768 ; CHECK-DISABLE-NEXT: .LBB6_1: // %loop
769 ; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
770 ; CHECK-DISABLE-NEXT: add x9, x0, #64
771 ; CHECK-DISABLE-NEXT: add x10, x0, #16
772 ; CHECK-DISABLE-NEXT: ld1 { v3.2d }, [x0]
773 ; CHECK-DISABLE-NEXT: ld1 { v0.2d }, [x9]
774 ; CHECK-DISABLE-NEXT: add x9, x0, #48
775 ; CHECK-DISABLE-NEXT: ld1 { v1.2d }, [x10]
776 ; CHECK-DISABLE-NEXT: add x10, x0, #32
777 ; CHECK-DISABLE-NEXT: ld1 { v2.2d }, [x9]
778 ; CHECK-DISABLE-NEXT: ldr d5, [x0, #80]
779 ; CHECK-DISABLE-NEXT: ld1 { v4.2d }, [x10]
780 ; CHECK-DISABLE-NEXT: add x9, x1, #10
781 ; CHECK-DISABLE-NEXT: subs x8, x8, #1
782 ; CHECK-DISABLE-NEXT: uzp1 v1.4s, v3.4s, v1.4s
783 ; CHECK-DISABLE-NEXT: uzp1 v0.4s, v0.4s, v5.4s
784 ; CHECK-DISABLE-NEXT: add x0, x0, #128
785 ; CHECK-DISABLE-NEXT: uzp1 v2.4s, v4.4s, v2.4s
786 ; CHECK-DISABLE-NEXT: xtn v0.4h, v0.4s
787 ; CHECK-DISABLE-NEXT: uzp1 v1.8h, v1.8h, v2.8h
788 ; CHECK-DISABLE-NEXT: uzp1 v1.16b, v1.16b, v0.16b
789 ; CHECK-DISABLE-NEXT: uzp1 v0.8b, v0.8b, v0.8b
790 ; CHECK-DISABLE-NEXT: rev16 v2.16b, v1.16b
791 ; CHECK-DISABLE-NEXT: rev64 v1.16b, v1.16b
792 ; CHECK-DISABLE-NEXT: st1 { v0.b }[2], [x9]
793 ; CHECK-DISABLE-NEXT: add x9, x1, #8
794 ; CHECK-DISABLE-NEXT: st1 { v2.h }[4], [x9]
795 ; CHECK-DISABLE-NEXT: str d1, [x1], #16
796 ; CHECK-DISABLE-NEXT: b.eq .LBB6_1
797 ; CHECK-DISABLE-NEXT: // %bb.2: // %exit
798 ; CHECK-DISABLE-NEXT: ret
803 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
804 %gep.A = getelementptr inbounds <11 x i64>, ptr %A, i64 %iv
805 %l.A = load <11 x i64>, ptr %gep.A
806 %trunc = trunc <11 x i64> %l.A to <11 x i8>
807 %gep.dst = getelementptr inbounds <11 x i8>, ptr %dst, i64 %iv
808 store <11 x i8> %trunc, ptr %gep.dst
809 %iv.next = add i64 %iv, 1
810 %ec = icmp eq i64 %iv.next, 1000
811 br i1 %ec, label %loop, label %exit
817 define void @trunc_v16i16_to_v16i8_in_loop(ptr %A, ptr %dst) {
818 ; CHECK-LABEL: trunc_v16i16_to_v16i8_in_loop:
819 ; CHECK: ; %bb.0: ; %entry
820 ; CHECK-NEXT: mov x8, xzr
821 ; CHECK-NEXT: LBB7_1: ; %loop
822 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
823 ; CHECK-NEXT: add x9, x0, x8, lsl #5
824 ; CHECK-NEXT: ldp q1, q0, [x9]
825 ; CHECK-NEXT: uzp1.16b v0, v1, v0
826 ; CHECK-NEXT: str q0, [x1, x8, lsl #4]
827 ; CHECK-NEXT: add x8, x8, #1
828 ; CHECK-NEXT: cmp x8, #1000
829 ; CHECK-NEXT: b.eq LBB7_1
830 ; CHECK-NEXT: ; %bb.2: ; %exit
833 ; CHECK-BE-LABEL: trunc_v16i16_to_v16i8_in_loop:
834 ; CHECK-BE: // %bb.0: // %entry
835 ; CHECK-BE-NEXT: mov x8, xzr
836 ; CHECK-BE-NEXT: .LBB7_1: // %loop
837 ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
838 ; CHECK-BE-NEXT: add x9, x0, x8, lsl #5
839 ; CHECK-BE-NEXT: add x10, x9, #16
840 ; CHECK-BE-NEXT: ld1 { v0.8h }, [x9]
841 ; CHECK-BE-NEXT: add x9, x1, x8, lsl #4
842 ; CHECK-BE-NEXT: ld1 { v1.8h }, [x10]
843 ; CHECK-BE-NEXT: add x8, x8, #1
844 ; CHECK-BE-NEXT: cmp x8, #1000
845 ; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v1.16b
846 ; CHECK-BE-NEXT: st1 { v0.16b }, [x9]
847 ; CHECK-BE-NEXT: b.eq .LBB7_1
848 ; CHECK-BE-NEXT: // %bb.2: // %exit
851 ; CHECK-DISABLE-LABEL: trunc_v16i16_to_v16i8_in_loop:
852 ; CHECK-DISABLE: // %bb.0: // %entry
853 ; CHECK-DISABLE-NEXT: mov x8, xzr
854 ; CHECK-DISABLE-NEXT: .LBB7_1: // %loop
855 ; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
856 ; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #5
857 ; CHECK-DISABLE-NEXT: add x10, x9, #16
858 ; CHECK-DISABLE-NEXT: ld1 { v0.8h }, [x9]
859 ; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #4
860 ; CHECK-DISABLE-NEXT: ld1 { v1.8h }, [x10]
861 ; CHECK-DISABLE-NEXT: add x8, x8, #1
862 ; CHECK-DISABLE-NEXT: cmp x8, #1000
863 ; CHECK-DISABLE-NEXT: uzp1 v0.16b, v0.16b, v1.16b
864 ; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x9]
865 ; CHECK-DISABLE-NEXT: b.eq .LBB7_1
866 ; CHECK-DISABLE-NEXT: // %bb.2: // %exit
867 ; CHECK-DISABLE-NEXT: ret
872 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
873 %gep.A = getelementptr inbounds <16 x i16>, ptr %A, i64 %iv
874 %l.A = load <16 x i16>, ptr %gep.A
875 %trunc = trunc <16 x i16> %l.A to <16 x i8>
876 %gep.dst = getelementptr inbounds <16 x i8>, ptr %dst, i64 %iv
877 store <16 x i8> %trunc, ptr %gep.dst
878 %iv.next = add i64 %iv, 1
879 %ec = icmp eq i64 %iv.next, 1000
880 br i1 %ec, label %loop, label %exit
886 define void @trunc_v8i16_to_v8i8_in_loop(ptr %A, ptr %dst) {
887 ; CHECK-LABEL: trunc_v8i16_to_v8i8_in_loop:
888 ; CHECK: ; %bb.0: ; %entry
889 ; CHECK-NEXT: mov x8, xzr
890 ; CHECK-NEXT: LBB8_1: ; %loop
891 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
892 ; CHECK-NEXT: ldr q0, [x0, x8, lsl #4]
893 ; CHECK-NEXT: xtn.8b v0, v0
894 ; CHECK-NEXT: str d0, [x1, x8, lsl #3]
895 ; CHECK-NEXT: add x8, x8, #1
896 ; CHECK-NEXT: cmp x8, #1000
897 ; CHECK-NEXT: b.eq LBB8_1
898 ; CHECK-NEXT: ; %bb.2: ; %exit
901 ; CHECK-BE-LABEL: trunc_v8i16_to_v8i8_in_loop:
902 ; CHECK-BE: // %bb.0: // %entry
903 ; CHECK-BE-NEXT: mov x8, xzr
904 ; CHECK-BE-NEXT: .LBB8_1: // %loop
905 ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
906 ; CHECK-BE-NEXT: add x9, x0, x8, lsl #4
907 ; CHECK-BE-NEXT: ld1 { v0.8h }, [x9]
908 ; CHECK-BE-NEXT: add x9, x1, x8, lsl #3
909 ; CHECK-BE-NEXT: add x8, x8, #1
910 ; CHECK-BE-NEXT: cmp x8, #1000
911 ; CHECK-BE-NEXT: xtn v0.8b, v0.8h
912 ; CHECK-BE-NEXT: st1 { v0.8b }, [x9]
913 ; CHECK-BE-NEXT: b.eq .LBB8_1
914 ; CHECK-BE-NEXT: // %bb.2: // %exit
917 ; CHECK-DISABLE-LABEL: trunc_v8i16_to_v8i8_in_loop:
918 ; CHECK-DISABLE: // %bb.0: // %entry
919 ; CHECK-DISABLE-NEXT: mov x8, xzr
920 ; CHECK-DISABLE-NEXT: .LBB8_1: // %loop
921 ; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
922 ; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #4
923 ; CHECK-DISABLE-NEXT: ld1 { v0.8h }, [x9]
924 ; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3
925 ; CHECK-DISABLE-NEXT: add x8, x8, #1
926 ; CHECK-DISABLE-NEXT: cmp x8, #1000
927 ; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
928 ; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9]
929 ; CHECK-DISABLE-NEXT: b.eq .LBB8_1
930 ; CHECK-DISABLE-NEXT: // %bb.2: // %exit
931 ; CHECK-DISABLE-NEXT: ret
936 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
937 %gep.A = getelementptr inbounds <8 x i16>, ptr %A, i64 %iv
938 %l.A = load <8 x i16>, ptr %gep.A
939 %trunc = trunc <8 x i16> %l.A to <8 x i8>
940 %gep.dst = getelementptr inbounds <8 x i8>, ptr %dst, i64 %iv
941 store <8 x i8> %trunc, ptr %gep.dst
942 %iv.next = add i64 %iv, 1
943 %ec = icmp eq i64 %iv.next, 1000
944 br i1 %ec, label %loop, label %exit