1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3 ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6 target triple = "aarch64-unknown-linux-gnu"
8 define void @store_trunc_v2i64i8(ptr %ap, ptr %dest) vscale_range(2,0) #0 {
9 ; CHECK-LABEL: store_trunc_v2i64i8:
11 ; CHECK-NEXT: ptrue p0.d, vl2
12 ; CHECK-NEXT: ldr q0, [x0]
13 ; CHECK-NEXT: st1b { z0.d }, p0, [x1]
15 %a = load <2 x i64>, ptr %ap
16 %val = trunc <2 x i64> %a to <2 x i8>
17 store <2 x i8> %val, ptr %dest
21 define void @store_trunc_v4i64i8(ptr %ap, ptr %dest) vscale_range(2,0) #0 {
22 ; CHECK-LABEL: store_trunc_v4i64i8:
24 ; CHECK-NEXT: ptrue p0.d, vl4
25 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
26 ; CHECK-NEXT: st1b { z0.d }, p0, [x1]
28 %a = load <4 x i64>, ptr %ap
29 %val = trunc <4 x i64> %a to <4 x i8>
30 store <4 x i8> %val, ptr %dest
34 define void @store_trunc_v8i64i8(ptr %ap, ptr %dest) #0 {
35 ; VBITS_GE_256-LABEL: store_trunc_v8i64i8:
36 ; VBITS_GE_256: // %bb.0:
37 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
38 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
39 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
40 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
41 ; VBITS_GE_256-NEXT: ptrue p0.s, vl4
42 ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
43 ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
44 ; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s
45 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
46 ; VBITS_GE_256-NEXT: st1b { z1.s }, p0, [x1]
47 ; VBITS_GE_256-NEXT: ret
49 ; VBITS_GE_512-LABEL: store_trunc_v8i64i8:
50 ; VBITS_GE_512: // %bb.0:
51 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
52 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
53 ; VBITS_GE_512-NEXT: st1b { z0.d }, p0, [x1]
54 ; VBITS_GE_512-NEXT: ret
55 %a = load <8 x i64>, ptr %ap
56 %val = trunc <8 x i64> %a to <8 x i8>
57 store <8 x i8> %val, ptr %dest
61 define void @store_trunc_v16i64i8(ptr %ap, ptr %dest) vscale_range(8,0) #0 {
62 ; CHECK-LABEL: store_trunc_v16i64i8:
64 ; CHECK-NEXT: ptrue p0.d, vl16
65 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
66 ; CHECK-NEXT: st1b { z0.d }, p0, [x1]
68 %a = load <16 x i64>, ptr %ap
69 %val = trunc <16 x i64> %a to <16 x i8>
70 store <16 x i8> %val, ptr %dest
74 define void @store_trunc_v32i64i8(ptr %ap, ptr %dest) vscale_range(16,0) #0 {
75 ; CHECK-LABEL: store_trunc_v32i64i8:
77 ; CHECK-NEXT: ptrue p0.d, vl32
78 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
79 ; CHECK-NEXT: st1b { z0.d }, p0, [x1]
81 %a = load <32 x i64>, ptr %ap
82 %val = trunc <32 x i64> %a to <32 x i8>
83 store <32 x i8> %val, ptr %dest
87 define void @store_trunc_v8i64i16(ptr %ap, ptr %dest) #0 {
88 ; Currently does not use the truncating store
89 ; VBITS_GE_256-LABEL: store_trunc_v8i64i16:
90 ; VBITS_GE_256: // %bb.0:
91 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
92 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
93 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
94 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
95 ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
96 ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
97 ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
98 ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
99 ; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0]
100 ; VBITS_GE_256-NEXT: str q1, [x1]
101 ; VBITS_GE_256-NEXT: ret
103 ; VBITS_GE_512-LABEL: store_trunc_v8i64i16:
104 ; VBITS_GE_512: // %bb.0:
105 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
106 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
107 ; VBITS_GE_512-NEXT: st1h { z0.d }, p0, [x1]
108 ; VBITS_GE_512-NEXT: ret
109 %a = load <8 x i64>, ptr %ap
110 %val = trunc <8 x i64> %a to <8 x i16>
111 store <8 x i16> %val, ptr %dest
115 define void @store_trunc_v8i64i32(ptr %ap, ptr %dest) #0 {
116 ; VBITS_GE_256-LABEL: store_trunc_v8i64i32:
117 ; VBITS_GE_256: // %bb.0:
118 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
119 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
120 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
121 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
122 ; VBITS_GE_256-NEXT: ptrue p0.s, vl4
123 ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
124 ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
125 ; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s
126 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
127 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
128 ; VBITS_GE_256-NEXT: ret
130 ; VBITS_GE_512-LABEL: store_trunc_v8i64i32:
131 ; VBITS_GE_512: // %bb.0:
132 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
133 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
134 ; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x1]
135 ; VBITS_GE_512-NEXT: ret
136 %a = load <8 x i64>, ptr %ap
137 %val = trunc <8 x i64> %a to <8 x i32>
138 store <8 x i32> %val, ptr %dest
142 define void @store_trunc_v16i32i8(ptr %ap, ptr %dest) #0 {
143 ; Currently does not use the truncating store
144 ; VBITS_GE_256-LABEL: store_trunc_v16i32i8:
145 ; VBITS_GE_256: // %bb.0:
146 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
147 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
148 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
149 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
150 ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
151 ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
152 ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
153 ; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b
154 ; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0]
155 ; VBITS_GE_256-NEXT: str q1, [x1]
156 ; VBITS_GE_256-NEXT: ret
158 ; VBITS_GE_512-LABEL: store_trunc_v16i32i8:
159 ; VBITS_GE_512: // %bb.0:
160 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
161 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
162 ; VBITS_GE_512-NEXT: st1b { z0.s }, p0, [x1]
163 ; VBITS_GE_512-NEXT: ret
164 %a = load <16 x i32>, ptr %ap
165 %val = trunc <16 x i32> %a to <16 x i8>
166 store <16 x i8> %val, ptr %dest
170 define void @store_trunc_v16i32i16(ptr %ap, ptr %dest) #0 {
171 ; VBITS_GE_256-LABEL: store_trunc_v16i32i16:
172 ; VBITS_GE_256: // %bb.0:
173 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
174 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
175 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
176 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
177 ; VBITS_GE_256-NEXT: ptrue p0.h, vl8
178 ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
179 ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
180 ; VBITS_GE_256-NEXT: splice z1.h, p0, z1.h, z0.h
181 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16
182 ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1]
183 ; VBITS_GE_256-NEXT: ret
185 ; VBITS_GE_512-LABEL: store_trunc_v16i32i16:
186 ; VBITS_GE_512: // %bb.0:
187 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
188 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
189 ; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x1]
190 ; VBITS_GE_512-NEXT: ret
191 %a = load <16 x i32>, ptr %ap
192 %val = trunc <16 x i32> %a to <16 x i16>
193 store <16 x i16> %val, ptr %dest
197 define void @store_trunc_v32i16i8(ptr %ap, ptr %dest) #0 {
198 ; VBITS_GE_256-LABEL: store_trunc_v32i16i8:
199 ; VBITS_GE_256: // %bb.0:
200 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16
201 ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
202 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
203 ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
204 ; VBITS_GE_256-NEXT: ptrue p0.b, vl16
205 ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
206 ; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b
207 ; VBITS_GE_256-NEXT: splice z1.b, p0, z1.b, z0.b
208 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32
209 ; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x1]
210 ; VBITS_GE_256-NEXT: ret
212 ; VBITS_GE_512-LABEL: store_trunc_v32i16i8:
213 ; VBITS_GE_512: // %bb.0:
214 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
215 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
216 ; VBITS_GE_512-NEXT: st1b { z0.h }, p0, [x1]
217 ; VBITS_GE_512-NEXT: ret
218 %a = load <32 x i16>, ptr %ap
219 %val = trunc <32 x i16> %a to <32 x i8>
220 store <32 x i8> %val, ptr %dest
224 attributes #0 = { "target-features"="+sve" }