1 ; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s -D#VBYTES=16 -check-prefix=NO_SVE
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK,VBITS_EQ_256
3 ; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK
4 ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
5 ; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
6 ; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
7 ; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
8 ; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
9 ; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
10 ; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
11 ; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
12 ; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
13 ; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
14 ; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
15 ; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
16 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -D#VBYTES=256 -check-prefixes=CHECK,VBITS_GE_2048,VBITS_GE_1024,VBITS_GE_512
18 target triple = "aarch64-unknown-linux-gnu"
20 ; Don't use SVE when its registers are no bigger than NEON.
23 define <4 x i32> @load_zext_v4i16i32(<4 x i16>* %ap) #0 {
24 ; CHECK-LABEL: load_zext_v4i16i32
25 ; CHECK: ldr d[[D0:[0-9]+]], [x0]
26 ; CHECK-NEXT: ushll v[[D0]].4s, v[[D0]].4h, #0
28 %a = load <4 x i16>, <4 x i16>* %ap
29 %val = zext <4 x i16> %a to <4 x i32>
33 define <8 x i32> @load_zext_v8i16i32(<8 x i16>* %ap) #0 {
34 ; CHECK-LABEL: load_zext_v8i16i32
35 ; CHECK: ptrue [[P0:p[0-9]+]].s, vl8
36 ; CHECK-NEXT: ld1h { [[Z0:z[0-9]+]].s }, [[P0]]/z, [x0]
37 ; CHECK-NEXT: st1w { [[Z0]].s }, [[P0]], [x8]
39 %a = load <8 x i16>, <8 x i16>* %ap
40 %val = zext <8 x i16> %a to <8 x i32>
44 define <16 x i32> @load_zext_v16i16i32(<16 x i16>* %ap) #0 {
45 ; CHECK-LABEL: load_zext_v16i16i32
46 ; VBITS_GE_512: ptrue [[P0:p[0-9]+]].s, vl16
47 ; VBITS_GE_512-NEXT: ld1h { [[Z0:z[0-9]+]].s }, [[P0]]/z, [x0]
48 ; VBITS_GE_512-NEXT: st1w { [[Z0]].s }, [[P0]], [x8]
49 ; VBITS_GE_512-NEXT: ret
51 ; Ensure sensible type legalistaion
52 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
53 ; VBITS_EQ_256-DAG: ld1h { [[Z0:z[0-9]+]].h }, [[PG]]/z, [x0]
54 ; VBITS_EQ_256-DAG: mov x9, sp
55 ; VBITS_EQ_256-DAG: st1h { [[Z0]].h }, [[PG]], [x9]
56 ; VBITS_EQ_256-DAG: ldp q[[R0:[0-9]+]], q[[R1:[0-9]+]], [sp]
57 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].s, vl8
58 ; VBITS_EQ_256-DAG: uunpklo z[[R0]].s, z[[R0]].h
59 ; VBITS_EQ_256-DAG: uunpklo z[[R1]].s, z[[R1]].h
60 ; VBITS_EQ_256-DAG: st1w { z[[R1]].s }, [[PG1]], [x8, x9, lsl #2]
61 ; VBITS_EQ_256-DAG: st1w { z[[R0]].s }, [[PG1]], [x8]
62 ; VBITS_EQ_256-DAG: ret
63 %a = load <16 x i16>, <16 x i16>* %ap
64 %val = zext <16 x i16> %a to <16 x i32>
68 define <32 x i32> @load_zext_v32i16i32(<32 x i16>* %ap) #0 {
69 ; CHECK-LABEL: load_zext_v32i16i32
70 ; VBITS_GE_1024: ptrue [[P0:p[0-9]+]].s, vl32
71 ; VBITS_GE_1024-NEXT: ld1h { [[Z0:z[0-9]+]].s }, [[P0]]/z, [x0]
72 ; VBITS_GE_1024-NEXT: st1w { [[Z0]].s }, [[P0]], [x8]
73 ; VBITS_GE_1024-NEXT: ret
74 %a = load <32 x i16>, <32 x i16>* %ap
75 %val = zext <32 x i16> %a to <32 x i32>
79 define <64 x i32> @load_zext_v64i16i32(<64 x i16>* %ap) #0 {
80 ; CHECK-LABEL: load_zext_v64i16i32
81 ; VBITS_GE_2048: ptrue [[P0:p[0-9]+]].s, vl64
82 ; VBITS_GE_2048-NEXT: ld1h { [[Z0:z[0-9]+]].s }, [[P0]]/z, [x0]
83 ; VBITS_GE_2048-NEXT: st1w { [[Z0]].s }, [[P0]], [x8]
84 ; VBITS_GE_2048-NEXT: ret
85 %a = load <64 x i16>, <64 x i16>* %ap
86 %val = zext <64 x i16> %a to <64 x i32>
90 define <4 x i32> @load_sext_v4i16i32(<4 x i16>* %ap) #0 {
91 ; CHECK-LABEL: load_sext_v4i16i32
92 ; CHECK: ldr d[[D0:[0-9]+]], [x0]
93 ; CHECK-NEXT: sshll v[[D0]].4s, v[[D0]].4h, #0
95 %a = load <4 x i16>, <4 x i16>* %ap
96 %val = sext <4 x i16> %a to <4 x i32>
100 define <8 x i32> @load_sext_v8i16i32(<8 x i16>* %ap) #0 {
101 ; CHECK-LABEL: load_sext_v8i16i32
102 ; CHECK: ptrue [[P0:p[0-9]+]].s, vl8
103 ; CHECK-NEXT: ld1sh { [[Z0:z[0-9]+]].s }, [[P0]]/z, [x0]
104 ; CHECK-NEXT: st1w { [[Z0]].s }, [[P0]], [x8]
106 %a = load <8 x i16>, <8 x i16>* %ap
107 %val = sext <8 x i16> %a to <8 x i32>
111 define <16 x i32> @load_sext_v16i16i32(<16 x i16>* %ap) #0 {
112 ; CHECK-LABEL: load_sext_v16i16i32
113 ; VBITS_GE_512: ptrue [[P0:p[0-9]+]].s, vl16
114 ; VBITS_GE_512-NEXT: ld1sh { [[Z0:z[0-9]+]].s }, [[P0]]/z, [x0]
115 ; VBITS_GE_512-NEXT: st1w { [[Z0]].s }, [[P0]], [x8]
116 ; VBITS_GE_512-NEXT: ret
118 ; Ensure sensible type legalistaion
119 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
120 ; VBITS_EQ_256-DAG: ld1h { [[Z0:z[0-9]+]].h }, [[PG]]/z, [x0]
121 ; VBITS_EQ_256-DAG: mov x9, sp
122 ; VBITS_EQ_256-DAG: st1h { [[Z0]].h }, [[PG]], [x9]
123 ; VBITS_EQ_256-DAG: ldp q[[R0:[0-9]+]], q[[R1:[0-9]+]], [sp]
124 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].s, vl8
125 ; VBITS_EQ_256-DAG: sunpklo z[[R0]].s, z[[R0]].h
126 ; VBITS_EQ_256-DAG: sunpklo z[[R1]].s, z[[R1]].h
127 ; VBITS_EQ_256-DAG: st1w { z[[R1]].s }, [[PG1]], [x8, x9, lsl #2]
128 ; VBITS_EQ_256-DAG: st1w { z[[R0]].s }, [[PG1]], [x8]
129 ; VBITS_EQ_256-DAG: ret
130 %a = load <16 x i16>, <16 x i16>* %ap
131 %val = sext <16 x i16> %a to <16 x i32>
135 define <32 x i32> @load_sext_v32i16i32(<32 x i16>* %ap) #0 {
136 ; CHECK-LABEL: load_sext_v32i16i32
137 ; VBITS_GE_1024: ptrue [[P0:p[0-9]+]].s, vl32
138 ; VBITS_GE_1024-NEXT: ld1sh { [[Z0:z[0-9]+]].s }, [[P0]]/z, [x0]
139 ; VBITS_GE_1024-NEXT: st1w { [[Z0]].s }, [[P0]], [x8]
140 ; VBITS_GE_1024-NEXT: ret
141 %a = load <32 x i16>, <32 x i16>* %ap
142 %val = sext <32 x i16> %a to <32 x i32>
146 define <64 x i32> @load_sext_v64i16i32(<64 x i16>* %ap) #0 {
147 ; CHECK-LABEL: load_sext_v64i16i32
148 ; VBITS_GE_2048: ptrue [[P0:p[0-9]+]].s, vl64
149 ; VBITS_GE_2048-NEXT: ld1sh { [[Z0:z[0-9]+]].s }, [[P0]]/z, [x0]
150 ; VBITS_GE_2048-NEXT: st1w { [[Z0]].s }, [[P0]], [x8]
151 ; VBITS_GE_2048-NEXT: ret
152 %a = load <64 x i16>, <64 x i16>* %ap
153 %val = sext <64 x i16> %a to <64 x i32>
157 define <32 x i64> @load_zext_v32i8i64(<32 x i8>* %ap) #0 {
158 ; CHECK-LABEL: load_zext_v32i8i64
159 ; VBITS_GE_2048: ptrue [[P0:p[0-9]+]].d, vl32
160 ; VBITS_GE_2048-NEXT: ld1b { [[Z0:z[0-9]+]].d }, [[P0]]/z, [x0]
161 ; VBITS_GE_2048-NEXT: st1d { [[Z0]].d }, [[P0]], [x8]
162 ; VBITS_GE_2048-NEXT: ret
163 %a = load <32 x i8>, <32 x i8>* %ap
164 %val = zext <32 x i8> %a to <32 x i64>
168 define <32 x i64> @load_sext_v32i8i64(<32 x i8>* %ap) #0 {
169 ; CHECK-LABEL: load_sext_v32i8i64
170 ; VBITS_GE_2048: ptrue [[P0:p[0-9]+]].d, vl32
171 ; VBITS_GE_2048-NEXT: ld1sb { [[Z0:z[0-9]+]].d }, [[P0]]/z, [x0]
172 ; VBITS_GE_2048-NEXT: st1d { [[Z0]].d }, [[P0]], [x8]
173 ; VBITS_GE_2048-NEXT: ret
174 %a = load <32 x i8>, <32 x i8>* %ap
175 %val = sext <32 x i8> %a to <32 x i64>
179 define <32 x i64> @load_zext_v32i16i64(<32 x i16>* %ap) #0 {
180 ; CHECK-LABEL: load_zext_v32i16i64
181 ; VBITS_GE_2048: ptrue [[P0:p[0-9]+]].d, vl32
182 ; VBITS_GE_2048-NEXT: ld1h { [[Z0:z[0-9]+]].d }, [[P0]]/z, [x0]
183 ; VBITS_GE_2048-NEXT: st1d { [[Z0]].d }, [[P0]], [x8]
184 ; VBITS_GE_2048-NEXT: ret
185 %a = load <32 x i16>, <32 x i16>* %ap
186 %val = zext <32 x i16> %a to <32 x i64>
190 define <32 x i64> @load_sext_v32i16i64(<32 x i16>* %ap) #0 {
191 ; CHECK-LABEL: load_sext_v32i16i64
192 ; VBITS_GE_2048: ptrue [[P0:p[0-9]+]].d, vl32
193 ; VBITS_GE_2048-NEXT: ld1sh { [[Z0:z[0-9]+]].d }, [[P0]]/z, [x0]
194 ; VBITS_GE_2048-NEXT: st1d { [[Z0]].d }, [[P0]], [x8]
195 ; VBITS_GE_2048-NEXT: ret
196 %a = load <32 x i16>, <32 x i16>* %ap
197 %val = sext <32 x i16> %a to <32 x i64>
201 define <32 x i64> @load_zext_v32i32i64(<32 x i32>* %ap) #0 {
202 ; CHECK-LABEL: load_zext_v32i32i64
203 ; VBITS_GE_2048: ptrue [[P0:p[0-9]+]].d, vl32
204 ; VBITS_GE_2048-NEXT: ld1w { [[Z0:z[0-9]+]].d }, [[P0]]/z, [x0]
205 ; VBITS_GE_2048-NEXT: st1d { [[Z0]].d }, [[P0]], [x8]
206 ; VBITS_GE_2048-NEXT: ret
207 %a = load <32 x i32>, <32 x i32>* %ap
208 %val = zext <32 x i32> %a to <32 x i64>
212 define <32 x i64> @load_sext_v32i32i64(<32 x i32>* %ap) #0 {
213 ; CHECK-LABEL: load_sext_v32i32i64
214 ; VBITS_GE_2048: ptrue [[P0:p[0-9]+]].d, vl32
215 ; VBITS_GE_2048-NEXT: ld1sw { [[Z0:z[0-9]+]].d }, [[P0]]/z, [x0]
216 ; VBITS_GE_2048-NEXT: st1d { [[Z0]].d }, [[P0]], [x8]
217 ; VBITS_GE_2048-NEXT: ret
218 %a = load <32 x i32>, <32 x i32>* %ap
219 %val = sext <32 x i32> %a to <32 x i64>
223 attributes #0 = { "target-features"="+sve" }