1 ; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
3 ; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK
4 ; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5 ; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6 ; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
7 ; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
8 ; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
9 ; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
10 ; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
11 ; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
12 ; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
13 ; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
14 ; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
15 ; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
16 ; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
18 target triple = "aarch64-unknown-linux-gnu"
20 ; Don't use SVE when its registers are no bigger than NEON.
27 ; Don't use SVE for 64-bit vectors.
28 define <8 x i8> @ashr_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
29 ; CHECK-LABEL: ashr_v8i8:
30 ; CHECK: neg v1.8b, v1.8b
31 ; CHECK-NEXT: sshl v0.8b, v0.8b, v1.8b
33 %res = ashr <8 x i8> %op1, %op2
37 ; Don't use SVE for 128-bit vectors.
38 define <16 x i8> @ashr_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
39 ; CHECK-LABEL: ashr_v16i8:
40 ; CHECK: neg v1.16b, v1.16b
41 ; CHECK-NEXT: sshl v0.16b, v0.16b, v1.16b
43 %res = ashr <16 x i8> %op1, %op2
47 define void @ashr_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
48 ; CHECK-LABEL: ashr_v32i8:
49 ; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
50 ; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
51 ; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
52 ; CHECK-NEXT: asr [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
53 ; CHECK-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
55 %op1 = load <32 x i8>, <32 x i8>* %a
56 %op2 = load <32 x i8>, <32 x i8>* %b
57 %res = ashr <32 x i8> %op1, %op2
58 store <32 x i8> %res, <32 x i8>* %a
62 define void @ashr_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
63 ; CHECK-LABEL: ashr_v64i8:
64 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].b, vl64
65 ; VBITS_GE_512-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
66 ; VBITS_GE_512-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
67 ; VBITS_GE_512-NEXT: asr [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
68 ; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
69 ; VBITS_GE_512-NEXT: ret
71 ; Ensure sensible type legalisation.
72 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
73 ; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
74 ; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0]
75 ; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
76 ; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1]
77 ; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[NUMELTS]]]
78 ; VBITS_EQ_256-DAG: asr [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b
79 ; VBITS_EQ_256-DAG: asr [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b
80 ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0]
81 ; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]]
82 ; VBITS_EQ_256-NEXT: ret
83 %op1 = load <64 x i8>, <64 x i8>* %a
84 %op2 = load <64 x i8>, <64 x i8>* %b
85 %res = ashr <64 x i8> %op1, %op2
86 store <64 x i8> %res, <64 x i8>* %a
90 define void @ashr_v128i8(<128 x i8>* %a, <128 x i8>* %b) #0 {
91 ; CHECK-LABEL: ashr_v128i8:
92 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].b, vl128
93 ; VBITS_GE_1024-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
94 ; VBITS_GE_1024-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
95 ; VBITS_GE_1024-NEXT: asr [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
96 ; VBITS_GE_1024-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
97 ; VBITS_GE_1024-NEXT: ret
98 %op1 = load <128 x i8>, <128 x i8>* %a
99 %op2 = load <128 x i8>, <128 x i8>* %b
100 %res = ashr <128 x i8> %op1, %op2
101 store <128 x i8> %res, <128 x i8>* %a
105 define void @ashr_v256i8(<256 x i8>* %a, <256 x i8>* %b) #0 {
106 ; CHECK-LABEL: ashr_v256i8:
107 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].b, vl256
108 ; VBITS_GE_2048-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
109 ; VBITS_GE_2048-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
110 ; VBITS_GE_2048-NEXT: asr [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
111 ; VBITS_GE_2048-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
112 ; VBITS_GE_2048-NEXT: ret
113 %op1 = load <256 x i8>, <256 x i8>* %a
114 %op2 = load <256 x i8>, <256 x i8>* %b
115 %res = ashr <256 x i8> %op1, %op2
116 store <256 x i8> %res, <256 x i8>* %a
120 ; Don't use SVE for 64-bit vectors.
121 define <4 x i16> @ashr_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 {
122 ; CHECK-LABEL: ashr_v4i16:
123 ; CHECK: neg v1.4h, v1.4h
124 ; CHECK-NEXT: sshl v0.4h, v0.4h, v1.4h
126 %res = ashr <4 x i16> %op1, %op2
130 ; Don't use SVE for 128-bit vectors.
131 define <8 x i16> @ashr_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 {
132 ; CHECK-LABEL: ashr_v8i16:
133 ; CHECK: neg v1.8h, v1.8h
134 ; CHECK-NEXT: sshl v0.8h, v0.8h, v1.8h
136 %res = ashr <8 x i16> %op1, %op2
140 define void @ashr_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 {
141 ; CHECK-LABEL: ashr_v16i16:
142 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
143 ; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
144 ; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
145 ; CHECK-NEXT: asr [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
146 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
148 %op1 = load <16 x i16>, <16 x i16>* %a
149 %op2 = load <16 x i16>, <16 x i16>* %b
150 %res = ashr <16 x i16> %op1, %op2
151 store <16 x i16> %res, <16 x i16>* %a
155 define void @ashr_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
156 ; CHECK-LABEL: ashr_v32i16:
157 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
158 ; VBITS_GE_512-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
159 ; VBITS_GE_512-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
160 ; VBITS_GE_512-NEXT: asr [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
161 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
162 ; VBITS_GE_512-NEXT: ret
164 ; Ensure sensible type legalisation.
165 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
166 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
167 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
168 ; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
169 ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
170 ; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1]
171 ; VBITS_EQ_256-DAG: asr [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h
172 ; VBITS_EQ_256-DAG: asr [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h
173 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
174 ; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
175 ; VBITS_EQ_256-NEXT: ret
176 %op1 = load <32 x i16>, <32 x i16>* %a
177 %op2 = load <32 x i16>, <32 x i16>* %b
178 %res = ashr <32 x i16> %op1, %op2
179 store <32 x i16> %res, <32 x i16>* %a
183 define void @ashr_v64i16(<64 x i16>* %a, <64 x i16>* %b) #0 {
184 ; CHECK-LABEL: ashr_v64i16:
185 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
186 ; VBITS_GE_1024-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
187 ; VBITS_GE_1024-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
188 ; VBITS_GE_1024-NEXT: asr [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
189 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
190 ; VBITS_GE_1024-NEXT: ret
191 %op1 = load <64 x i16>, <64 x i16>* %a
192 %op2 = load <64 x i16>, <64 x i16>* %b
193 %res = ashr <64 x i16> %op1, %op2
194 store <64 x i16> %res, <64 x i16>* %a
198 define void @ashr_v128i16(<128 x i16>* %a, <128 x i16>* %b) #0 {
199 ; CHECK-LABEL: ashr_v128i16:
200 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
201 ; VBITS_GE_2048-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
202 ; VBITS_GE_2048-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
203 ; VBITS_GE_2048-NEXT: asr [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
204 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
205 ; VBITS_GE_2048-NEXT: ret
206 %op1 = load <128 x i16>, <128 x i16>* %a
207 %op2 = load <128 x i16>, <128 x i16>* %b
208 %res = ashr <128 x i16> %op1, %op2
209 store <128 x i16> %res, <128 x i16>* %a
213 ; Don't use SVE for 64-bit vectors.
214 define <2 x i32> @ashr_v2i32(<2 x i32> %op1, <2 x i32> %op2) #0 {
215 ; CHECK-LABEL: ashr_v2i32:
216 ; CHECK: neg v1.2s, v1.2s
217 ; CHECK-NEXT: sshl v0.2s, v0.2s, v1.2s
219 %res = ashr <2 x i32> %op1, %op2
223 ; Don't use SVE for 128-bit vectors.
224 define <4 x i32> @ashr_v4i32(<4 x i32> %op1, <4 x i32> %op2) #0 {
225 ; CHECK-LABEL: ashr_v4i32:
226 ; CHECK: neg v1.4s, v1.4s
227 ; CHECK-NEXT: sshl v0.4s, v0.4s, v1.4s
229 %res = ashr <4 x i32> %op1, %op2
233 define void @ashr_v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 {
234 ; CHECK-LABEL: ashr_v8i32:
235 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
236 ; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
237 ; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
238 ; CHECK-NEXT: asr [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
239 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
241 %op1 = load <8 x i32>, <8 x i32>* %a
242 %op2 = load <8 x i32>, <8 x i32>* %b
243 %res = ashr <8 x i32> %op1, %op2
244 store <8 x i32> %res, <8 x i32>* %a
248 define void @ashr_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
249 ; CHECK-LABEL: ashr_v16i32:
250 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
251 ; VBITS_GE_512-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
252 ; VBITS_GE_512-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
253 ; VBITS_GE_512-NEXT: asr [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
254 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
255 ; VBITS_GE_512-NEXT: ret
257 ; Ensure sensible type legalisation.
258 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
259 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
260 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
261 ; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
262 ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
263 ; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2]
264 ; VBITS_EQ_256-DAG: asr [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s
265 ; VBITS_EQ_256-DAG: asr [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s
266 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
267 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
268 ; VBITS_EQ_256-NEXT: ret
269 %op1 = load <16 x i32>, <16 x i32>* %a
270 %op2 = load <16 x i32>, <16 x i32>* %b
271 %res = ashr <16 x i32> %op1, %op2
272 store <16 x i32> %res, <16 x i32>* %a
276 define void @ashr_v32i32(<32 x i32>* %a, <32 x i32>* %b) #0 {
277 ; CHECK-LABEL: ashr_v32i32:
278 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
279 ; VBITS_GE_1024-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
280 ; VBITS_GE_1024-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
281 ; VBITS_GE_1024-NEXT: asr [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
282 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
283 ; VBITS_GE_1024-NEXT: ret
284 %op1 = load <32 x i32>, <32 x i32>* %a
285 %op2 = load <32 x i32>, <32 x i32>* %b
286 %res = ashr <32 x i32> %op1, %op2
287 store <32 x i32> %res, <32 x i32>* %a
291 define void @ashr_v64i32(<64 x i32>* %a, <64 x i32>* %b) #0 {
292 ; CHECK-LABEL: ashr_v64i32:
293 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
294 ; VBITS_GE_2048-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
295 ; VBITS_GE_2048-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
296 ; VBITS_GE_2048-NEXT: asr [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
297 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
298 ; VBITS_GE_2048-NEXT: ret
299 %op1 = load <64 x i32>, <64 x i32>* %a
300 %op2 = load <64 x i32>, <64 x i32>* %b
301 %res = ashr <64 x i32> %op1, %op2
302 store <64 x i32> %res, <64 x i32>* %a
306 ; Don't use SVE for 64-bit vectors.
307 define <1 x i64> @ashr_v1i64(<1 x i64> %op1, <1 x i64> %op2) #0 {
308 ; CHECK-LABEL: ashr_v1i64:
310 ; CHECK-NEXT: sshl d0, d0, d1
312 %res = ashr <1 x i64> %op1, %op2
316 ; Don't use SVE for 128-bit vectors.
317 define <2 x i64> @ashr_v2i64(<2 x i64> %op1, <2 x i64> %op2) #0 {
318 ; CHECK-LABEL: ashr_v2i64:
319 ; CHECK: neg v1.2d, v1.2d
320 ; CHECK-NEXT: sshl v0.2d, v0.2d, v1.2d
322 %res = ashr <2 x i64> %op1, %op2
326 define void @ashr_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 {
327 ; CHECK-LABEL: ashr_v4i64:
328 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
329 ; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
330 ; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
331 ; CHECK-NEXT: asr [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
332 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
334 %op1 = load <4 x i64>, <4 x i64>* %a
335 %op2 = load <4 x i64>, <4 x i64>* %b
336 %res = ashr <4 x i64> %op1, %op2
337 store <4 x i64> %res, <4 x i64>* %a
341 define void @ashr_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
342 ; CHECK-LABEL: ashr_v8i64:
343 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
344 ; VBITS_GE_512-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
345 ; VBITS_GE_512-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
346 ; VBITS_GE_512-NEXT: asr [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
347 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
348 ; VBITS_GE_512-NEXT: ret
350 ; Ensure sensible type legalisation.
351 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
352 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
353 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
354 ; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
355 ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
356 ; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3]
357 ; VBITS_EQ_256-DAG: asr [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d
358 ; VBITS_EQ_256-DAG: asr [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d
359 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
360 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
361 ; VBITS_EQ_256-NEXT: ret
362 %op1 = load <8 x i64>, <8 x i64>* %a
363 %op2 = load <8 x i64>, <8 x i64>* %b
364 %res = ashr <8 x i64> %op1, %op2
365 store <8 x i64> %res, <8 x i64>* %a
369 define void @ashr_v16i64(<16 x i64>* %a, <16 x i64>* %b) #0 {
370 ; CHECK-LABEL: ashr_v16i64:
371 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
372 ; VBITS_GE_1024-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
373 ; VBITS_GE_1024-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
374 ; VBITS_GE_1024-NEXT: asr [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
375 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
376 ; VBITS_GE_1024-NEXT: ret
377 %op1 = load <16 x i64>, <16 x i64>* %a
378 %op2 = load <16 x i64>, <16 x i64>* %b
379 %res = ashr <16 x i64> %op1, %op2
380 store <16 x i64> %res, <16 x i64>* %a
384 define void @ashr_v32i64(<32 x i64>* %a, <32 x i64>* %b) #0 {
385 ; CHECK-LABEL: ashr_v32i64:
386 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
387 ; VBITS_GE_2048-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
388 ; VBITS_GE_2048-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
389 ; VBITS_GE_2048-NEXT: asr [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
390 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
391 ; VBITS_GE_2048-NEXT: ret
392 %op1 = load <32 x i64>, <32 x i64>* %a
393 %op2 = load <32 x i64>, <32 x i64>* %b
394 %res = ashr <32 x i64> %op1, %op2
395 store <32 x i64> %res, <32 x i64>* %a
403 ; Don't use SVE for 64-bit vectors.
404 define <8 x i8> @lshr_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
405 ; CHECK-LABEL: lshr_v8i8:
406 ; CHECK: neg v1.8b, v1.8b
407 ; CHECK-NEXT: ushl v0.8b, v0.8b, v1.8b
409 %res = lshr <8 x i8> %op1, %op2
413 ; Don't use SVE for 128-bit vectors.
414 define <16 x i8> @lshr_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
415 ; CHECK-LABEL: lshr_v16i8:
416 ; CHECK: neg v1.16b, v1.16b
417 ; CHECK-NEXT: ushl v0.16b, v0.16b, v1.16b
419 %res = lshr <16 x i8> %op1, %op2
423 define void @lshr_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
424 ; CHECK-LABEL: lshr_v32i8:
425 ; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
426 ; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
427 ; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
428 ; CHECK-NEXT: lsr [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
429 ; CHECK-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
431 %op1 = load <32 x i8>, <32 x i8>* %a
432 %op2 = load <32 x i8>, <32 x i8>* %b
433 %res = lshr <32 x i8> %op1, %op2
434 store <32 x i8> %res, <32 x i8>* %a
438 define void @lshr_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
439 ; CHECK-LABEL: lshr_v64i8:
440 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].b, vl64
441 ; VBITS_GE_512-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
442 ; VBITS_GE_512-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
443 ; VBITS_GE_512-NEXT: lsr [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
444 ; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
445 ; VBITS_GE_512-NEXT: ret
447 ; Ensure sensible type legalisation.
448 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
449 ; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
450 ; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0]
451 ; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
452 ; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1]
453 ; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[NUMELTS]]]
454 ; VBITS_EQ_256-DAG: lsr [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b
455 ; VBITS_EQ_256-DAG: lsr [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b
456 ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0]
457 ; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]]
458 ; VBITS_EQ_256-NEXT: ret
459 %op1 = load <64 x i8>, <64 x i8>* %a
460 %op2 = load <64 x i8>, <64 x i8>* %b
461 %res = lshr <64 x i8> %op1, %op2
462 store <64 x i8> %res, <64 x i8>* %a
466 define void @lshr_v128i8(<128 x i8>* %a, <128 x i8>* %b) #0 {
467 ; CHECK-LABEL: lshr_v128i8:
468 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].b, vl128
469 ; VBITS_GE_1024-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
470 ; VBITS_GE_1024-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
471 ; VBITS_GE_1024-NEXT: lsr [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
472 ; VBITS_GE_1024-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
473 ; VBITS_GE_1024-NEXT: ret
474 %op1 = load <128 x i8>, <128 x i8>* %a
475 %op2 = load <128 x i8>, <128 x i8>* %b
476 %res = lshr <128 x i8> %op1, %op2
477 store <128 x i8> %res, <128 x i8>* %a
481 define void @lshr_v256i8(<256 x i8>* %a, <256 x i8>* %b) #0 {
482 ; CHECK-LABEL: lshr_v256i8:
483 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].b, vl256
484 ; VBITS_GE_2048-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
485 ; VBITS_GE_2048-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
486 ; VBITS_GE_2048-NEXT: lsr [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
487 ; VBITS_GE_2048-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
488 ; VBITS_GE_2048-NEXT: ret
489 %op1 = load <256 x i8>, <256 x i8>* %a
490 %op2 = load <256 x i8>, <256 x i8>* %b
491 %res = lshr <256 x i8> %op1, %op2
492 store <256 x i8> %res, <256 x i8>* %a
496 ; Don't use SVE for 64-bit vectors.
497 define <4 x i16> @lshr_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 {
498 ; CHECK-LABEL: lshr_v4i16:
499 ; CHECK: neg v1.4h, v1.4h
500 ; CHECK-NEXT: ushl v0.4h, v0.4h, v1.4h
502 %res = lshr <4 x i16> %op1, %op2
506 ; Don't use SVE for 128-bit vectors.
507 define <8 x i16> @lshr_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 {
508 ; CHECK-LABEL: lshr_v8i16:
509 ; CHECK: neg v1.8h, v1.8h
510 ; CHECK-NEXT: ushl v0.8h, v0.8h, v1.8h
512 %res = lshr <8 x i16> %op1, %op2
516 define void @lshr_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 {
517 ; CHECK-LABEL: lshr_v16i16:
518 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
519 ; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
520 ; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
521 ; CHECK-NEXT: lsr [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
522 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
524 %op1 = load <16 x i16>, <16 x i16>* %a
525 %op2 = load <16 x i16>, <16 x i16>* %b
526 %res = lshr <16 x i16> %op1, %op2
527 store <16 x i16> %res, <16 x i16>* %a
531 define void @lshr_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
532 ; CHECK-LABEL: lshr_v32i16:
533 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
534 ; VBITS_GE_512-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
535 ; VBITS_GE_512-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
536 ; VBITS_GE_512-NEXT: lsr [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
537 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
538 ; VBITS_GE_512-NEXT: ret
540 ; Ensure sensible type legalisation.
541 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
542 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
543 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
544 ; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
545 ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
546 ; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1]
547 ; VBITS_EQ_256-DAG: lsr [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h
548 ; VBITS_EQ_256-DAG: lsr [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h
549 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
550 ; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
551 ; VBITS_EQ_256-NEXT: ret
552 %op1 = load <32 x i16>, <32 x i16>* %a
553 %op2 = load <32 x i16>, <32 x i16>* %b
554 %res = lshr <32 x i16> %op1, %op2
555 store <32 x i16> %res, <32 x i16>* %a
559 define void @lshr_v64i16(<64 x i16>* %a, <64 x i16>* %b) #0 {
560 ; CHECK-LABEL: lshr_v64i16:
561 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
562 ; VBITS_GE_1024-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
563 ; VBITS_GE_1024-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
564 ; VBITS_GE_1024-NEXT: lsr [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
565 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
566 ; VBITS_GE_1024-NEXT: ret
567 %op1 = load <64 x i16>, <64 x i16>* %a
568 %op2 = load <64 x i16>, <64 x i16>* %b
569 %res = lshr <64 x i16> %op1, %op2
570 store <64 x i16> %res, <64 x i16>* %a
574 define void @lshr_v128i16(<128 x i16>* %a, <128 x i16>* %b) #0 {
575 ; CHECK-LABEL: lshr_v128i16:
576 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
577 ; VBITS_GE_2048-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
578 ; VBITS_GE_2048-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
579 ; VBITS_GE_2048-NEXT: lsr [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
580 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
581 ; VBITS_GE_2048-NEXT: ret
582 %op1 = load <128 x i16>, <128 x i16>* %a
583 %op2 = load <128 x i16>, <128 x i16>* %b
584 %res = lshr <128 x i16> %op1, %op2
585 store <128 x i16> %res, <128 x i16>* %a
589 ; Don't use SVE for 64-bit vectors.
590 define <2 x i32> @lshr_v2i32(<2 x i32> %op1, <2 x i32> %op2) #0 {
591 ; CHECK-LABEL: lshr_v2i32:
592 ; CHECK: neg v1.2s, v1.2s
593 ; CHECK-NEXT: ushl v0.2s, v0.2s, v1.2s
595 %res = lshr <2 x i32> %op1, %op2
599 ; Don't use SVE for 128-bit vectors.
600 define <4 x i32> @lshr_v4i32(<4 x i32> %op1, <4 x i32> %op2) #0 {
601 ; CHECK-LABEL: lshr_v4i32:
602 ; CHECK: neg v1.4s, v1.4s
603 ; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s
605 %res = lshr <4 x i32> %op1, %op2
609 define void @lshr_v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 {
610 ; CHECK-LABEL: lshr_v8i32:
611 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
612 ; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
613 ; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
614 ; CHECK-NEXT: lsr [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
615 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
617 %op1 = load <8 x i32>, <8 x i32>* %a
618 %op2 = load <8 x i32>, <8 x i32>* %b
619 %res = lshr <8 x i32> %op1, %op2
620 store <8 x i32> %res, <8 x i32>* %a
624 define void @lshr_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
625 ; CHECK-LABEL: lshr_v16i32:
626 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
627 ; VBITS_GE_512-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
628 ; VBITS_GE_512-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
629 ; VBITS_GE_512-NEXT: lsr [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
630 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
631 ; VBITS_GE_512-NEXT: ret
633 ; Ensure sensible type legalisation.
634 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
635 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
636 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
637 ; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
638 ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
639 ; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2]
640 ; VBITS_EQ_256-DAG: lsr [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s
641 ; VBITS_EQ_256-DAG: lsr [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s
642 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
643 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
644 ; VBITS_EQ_256-NEXT: ret
645 %op1 = load <16 x i32>, <16 x i32>* %a
646 %op2 = load <16 x i32>, <16 x i32>* %b
647 %res = lshr <16 x i32> %op1, %op2
648 store <16 x i32> %res, <16 x i32>* %a
652 define void @lshr_v32i32(<32 x i32>* %a, <32 x i32>* %b) #0 {
653 ; CHECK-LABEL: lshr_v32i32:
654 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
655 ; VBITS_GE_1024-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
656 ; VBITS_GE_1024-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
657 ; VBITS_GE_1024-NEXT: lsr [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
658 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
659 ; VBITS_GE_1024-NEXT: ret
660 %op1 = load <32 x i32>, <32 x i32>* %a
661 %op2 = load <32 x i32>, <32 x i32>* %b
662 %res = lshr <32 x i32> %op1, %op2
663 store <32 x i32> %res, <32 x i32>* %a
667 define void @lshr_v64i32(<64 x i32>* %a, <64 x i32>* %b) #0 {
668 ; CHECK-LABEL: lshr_v64i32:
669 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
670 ; VBITS_GE_2048-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
671 ; VBITS_GE_2048-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
672 ; VBITS_GE_2048-NEXT: lsr [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
673 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
674 ; VBITS_GE_2048-NEXT: ret
675 %op1 = load <64 x i32>, <64 x i32>* %a
676 %op2 = load <64 x i32>, <64 x i32>* %b
677 %res = lshr <64 x i32> %op1, %op2
678 store <64 x i32> %res, <64 x i32>* %a
682 ; Don't use SVE for 64-bit vectors.
683 define <1 x i64> @lshr_v1i64(<1 x i64> %op1, <1 x i64> %op2) #0 {
684 ; CHECK-LABEL: lshr_v1i64:
686 ; CHECK-NEXT: ushl d0, d0, d1
688 %res = lshr <1 x i64> %op1, %op2
692 ; Don't use SVE for 128-bit vectors.
693 define <2 x i64> @lshr_v2i64(<2 x i64> %op1, <2 x i64> %op2) #0 {
694 ; CHECK-LABEL: lshr_v2i64:
695 ; CHECK: neg v1.2d, v1.2d
696 ; CHECK-NEXT: ushl v0.2d, v0.2d, v1.2d
698 %res = lshr <2 x i64> %op1, %op2
702 define void @lshr_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 {
703 ; CHECK-LABEL: lshr_v4i64:
704 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
705 ; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
706 ; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
707 ; CHECK-NEXT: lsr [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
708 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
710 %op1 = load <4 x i64>, <4 x i64>* %a
711 %op2 = load <4 x i64>, <4 x i64>* %b
712 %res = lshr <4 x i64> %op1, %op2
713 store <4 x i64> %res, <4 x i64>* %a
717 define void @lshr_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
718 ; CHECK-LABEL: lshr_v8i64:
719 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
720 ; VBITS_GE_512-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
721 ; VBITS_GE_512-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
722 ; VBITS_GE_512-NEXT: lsr [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
723 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
724 ; VBITS_GE_512-NEXT: ret
726 ; Ensure sensible type legalisation.
727 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
728 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
729 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
730 ; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
731 ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
732 ; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3]
733 ; VBITS_EQ_256-DAG: lsr [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d
734 ; VBITS_EQ_256-DAG: lsr [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d
735 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
736 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
737 ; VBITS_EQ_256-NEXT: ret
738 %op1 = load <8 x i64>, <8 x i64>* %a
739 %op2 = load <8 x i64>, <8 x i64>* %b
740 %res = lshr <8 x i64> %op1, %op2
741 store <8 x i64> %res, <8 x i64>* %a
745 define void @lshr_v16i64(<16 x i64>* %a, <16 x i64>* %b) #0 {
746 ; CHECK-LABEL: lshr_v16i64:
747 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
748 ; VBITS_GE_1024-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
749 ; VBITS_GE_1024-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
750 ; VBITS_GE_1024-NEXT: lsr [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
751 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
752 ; VBITS_GE_1024-NEXT: ret
753 %op1 = load <16 x i64>, <16 x i64>* %a
754 %op2 = load <16 x i64>, <16 x i64>* %b
755 %res = lshr <16 x i64> %op1, %op2
756 store <16 x i64> %res, <16 x i64>* %a
760 define void @lshr_v32i64(<32 x i64>* %a, <32 x i64>* %b) #0 {
761 ; CHECK-LABEL: lshr_v32i64:
762 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
763 ; VBITS_GE_2048-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
764 ; VBITS_GE_2048-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
765 ; VBITS_GE_2048-NEXT: lsr [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
766 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
767 ; VBITS_GE_2048-NEXT: ret
768 %op1 = load <32 x i64>, <32 x i64>* %a
769 %op2 = load <32 x i64>, <32 x i64>* %b
770 %res = lshr <32 x i64> %op1, %op2
771 store <32 x i64> %res, <32 x i64>* %a
779 ; Don't use SVE for 64-bit vectors.
780 define <8 x i8> @shl_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
781 ; CHECK-LABEL: shl_v8i8:
782 ; CHECK: ushl v0.8b, v0.8b, v1.8b
784 %res = shl <8 x i8> %op1, %op2
788 ; Don't use SVE for 128-bit vectors.
789 define <16 x i8> @shl_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
790 ; CHECK-LABEL: shl_v16i8:
791 ; CHECK: ushl v0.16b, v0.16b, v1.16b
793 %res = shl <16 x i8> %op1, %op2
797 define void @shl_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
798 ; CHECK-LABEL: shl_v32i8:
799 ; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
800 ; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
801 ; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
802 ; CHECK-NEXT: lsl [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
803 ; CHECK-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
805 %op1 = load <32 x i8>, <32 x i8>* %a
806 %op2 = load <32 x i8>, <32 x i8>* %b
807 %res = shl <32 x i8> %op1, %op2
808 store <32 x i8> %res, <32 x i8>* %a
812 define void @shl_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
813 ; CHECK-LABEL: shl_v64i8:
814 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].b, vl64
815 ; VBITS_GE_512-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
816 ; VBITS_GE_512-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
817 ; VBITS_GE_512-NEXT: lsl [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
818 ; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
819 ; VBITS_GE_512-NEXT: ret
821 ; Ensure sensible type legalisation.
822 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
823 ; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
824 ; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0]
825 ; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
826 ; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1]
827 ; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[NUMELTS]]]
828 ; VBITS_EQ_256-DAG: lsl [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b
829 ; VBITS_EQ_256-DAG: lsl [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b
830 ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0]
831 ; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]]
832 ; VBITS_EQ_256-NEXT: ret
833 %op1 = load <64 x i8>, <64 x i8>* %a
834 %op2 = load <64 x i8>, <64 x i8>* %b
835 %res = shl <64 x i8> %op1, %op2
836 store <64 x i8> %res, <64 x i8>* %a
840 define void @shl_v128i8(<128 x i8>* %a, <128 x i8>* %b) #0 {
841 ; CHECK-LABEL: shl_v128i8:
842 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].b, vl128
843 ; VBITS_GE_1024-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
844 ; VBITS_GE_1024-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
845 ; VBITS_GE_1024-NEXT: lsl [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
846 ; VBITS_GE_1024-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
847 ; VBITS_GE_1024-NEXT: ret
848 %op1 = load <128 x i8>, <128 x i8>* %a
849 %op2 = load <128 x i8>, <128 x i8>* %b
850 %res = shl <128 x i8> %op1, %op2
851 store <128 x i8> %res, <128 x i8>* %a
855 define void @shl_v256i8(<256 x i8>* %a, <256 x i8>* %b) #0 {
856 ; CHECK-LABEL: shl_v256i8:
857 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].b, vl256
858 ; VBITS_GE_2048-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
859 ; VBITS_GE_2048-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
860 ; VBITS_GE_2048-NEXT: lsl [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
861 ; VBITS_GE_2048-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
862 ; VBITS_GE_2048-NEXT: ret
863 %op1 = load <256 x i8>, <256 x i8>* %a
864 %op2 = load <256 x i8>, <256 x i8>* %b
865 %res = shl <256 x i8> %op1, %op2
866 store <256 x i8> %res, <256 x i8>* %a
870 ; Don't use SVE for 64-bit vectors.
871 define <4 x i16> @shl_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 {
872 ; CHECK-LABEL: shl_v4i16:
873 ; CHECK: ushl v0.4h, v0.4h, v1.4h
875 %res = shl <4 x i16> %op1, %op2
879 ; Don't use SVE for 128-bit vectors.
880 define <8 x i16> @shl_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 {
881 ; CHECK-LABEL: shl_v8i16:
882 ; CHECK: ushl v0.8h, v0.8h, v1.8h
884 %res = shl <8 x i16> %op1, %op2
888 define void @shl_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 {
889 ; CHECK-LABEL: shl_v16i16:
890 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
891 ; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
892 ; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
893 ; CHECK-NEXT: lsl [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
894 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
896 %op1 = load <16 x i16>, <16 x i16>* %a
897 %op2 = load <16 x i16>, <16 x i16>* %b
898 %res = shl <16 x i16> %op1, %op2
899 store <16 x i16> %res, <16 x i16>* %a
903 define void @shl_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
904 ; CHECK-LABEL: shl_v32i16:
905 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
906 ; VBITS_GE_512-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
907 ; VBITS_GE_512-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
908 ; VBITS_GE_512-NEXT: lsl [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
909 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
910 ; VBITS_GE_512-NEXT: ret
912 ; Ensure sensible type legalisation.
913 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
914 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
915 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
916 ; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
917 ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
918 ; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1]
919 ; VBITS_EQ_256-DAG: lsl [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h
920 ; VBITS_EQ_256-DAG: lsl [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h
921 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
922 ; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
923 ; VBITS_EQ_256-NEXT: ret
924 %op1 = load <32 x i16>, <32 x i16>* %a
925 %op2 = load <32 x i16>, <32 x i16>* %b
926 %res = shl <32 x i16> %op1, %op2
927 store <32 x i16> %res, <32 x i16>* %a
931 define void @shl_v64i16(<64 x i16>* %a, <64 x i16>* %b) #0 {
932 ; CHECK-LABEL: shl_v64i16:
933 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
934 ; VBITS_GE_1024-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
935 ; VBITS_GE_1024-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
936 ; VBITS_GE_1024-NEXT: lsl [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
937 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
938 ; VBITS_GE_1024-NEXT: ret
939 %op1 = load <64 x i16>, <64 x i16>* %a
940 %op2 = load <64 x i16>, <64 x i16>* %b
941 %res = shl <64 x i16> %op1, %op2
942 store <64 x i16> %res, <64 x i16>* %a
946 define void @shl_v128i16(<128 x i16>* %a, <128 x i16>* %b) #0 {
947 ; CHECK-LABEL: shl_v128i16:
948 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
949 ; VBITS_GE_2048-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
950 ; VBITS_GE_2048-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
951 ; VBITS_GE_2048-NEXT: lsl [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
952 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
953 ; VBITS_GE_2048-NEXT: ret
954 %op1 = load <128 x i16>, <128 x i16>* %a
955 %op2 = load <128 x i16>, <128 x i16>* %b
956 %res = shl <128 x i16> %op1, %op2
957 store <128 x i16> %res, <128 x i16>* %a
961 ; Don't use SVE for 64-bit vectors.
962 define <2 x i32> @shl_v2i32(<2 x i32> %op1, <2 x i32> %op2) #0 {
963 ; CHECK-LABEL: shl_v2i32:
964 ; CHECK: ushl v0.2s, v0.2s, v1.2s
966 %res = shl <2 x i32> %op1, %op2
970 ; Don't use SVE for 128-bit vectors.
971 define <4 x i32> @shl_v4i32(<4 x i32> %op1, <4 x i32> %op2) #0 {
972 ; CHECK-LABEL: shl_v4i32:
973 ; CHECK: ushl v0.4s, v0.4s, v1.4s
975 %res = shl <4 x i32> %op1, %op2
979 define void @shl_v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 {
980 ; CHECK-LABEL: shl_v8i32:
981 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
982 ; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
983 ; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
984 ; CHECK-NEXT: lsl [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
985 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
987 %op1 = load <8 x i32>, <8 x i32>* %a
988 %op2 = load <8 x i32>, <8 x i32>* %b
989 %res = shl <8 x i32> %op1, %op2
990 store <8 x i32> %res, <8 x i32>* %a
994 define void @shl_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
995 ; CHECK-LABEL: shl_v16i32:
996 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
997 ; VBITS_GE_512-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
998 ; VBITS_GE_512-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
999 ; VBITS_GE_512-NEXT: lsl [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
1000 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1001 ; VBITS_GE_512-NEXT: ret
1003 ; Ensure sensible type legalisation.
1004 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
1005 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
1006 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
1007 ; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
1008 ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
1009 ; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2]
1010 ; VBITS_EQ_256-DAG: lsl [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s
1011 ; VBITS_EQ_256-DAG: lsl [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s
1012 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
1013 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
1014 ; VBITS_EQ_256-NEXT: ret
1015 %op1 = load <16 x i32>, <16 x i32>* %a
1016 %op2 = load <16 x i32>, <16 x i32>* %b
1017 %res = shl <16 x i32> %op1, %op2
1018 store <16 x i32> %res, <16 x i32>* %a
1022 define void @shl_v32i32(<32 x i32>* %a, <32 x i32>* %b) #0 {
1023 ; CHECK-LABEL: shl_v32i32:
1024 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
1025 ; VBITS_GE_1024-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
1026 ; VBITS_GE_1024-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
1027 ; VBITS_GE_1024-NEXT: lsl [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
1028 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1029 ; VBITS_GE_1024-NEXT: ret
1030 %op1 = load <32 x i32>, <32 x i32>* %a
1031 %op2 = load <32 x i32>, <32 x i32>* %b
1032 %res = shl <32 x i32> %op1, %op2
1033 store <32 x i32> %res, <32 x i32>* %a
1037 define void @shl_v64i32(<64 x i32>* %a, <64 x i32>* %b) #0 {
1038 ; CHECK-LABEL: shl_v64i32:
1039 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
1040 ; VBITS_GE_2048-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
1041 ; VBITS_GE_2048-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
1042 ; VBITS_GE_2048-NEXT: lsl [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
1043 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1044 ; VBITS_GE_2048-NEXT: ret
1045 %op1 = load <64 x i32>, <64 x i32>* %a
1046 %op2 = load <64 x i32>, <64 x i32>* %b
1047 %res = shl <64 x i32> %op1, %op2
1048 store <64 x i32> %res, <64 x i32>* %a
1052 ; Don't use SVE for 64-bit vectors.
1053 define <1 x i64> @shl_v1i64(<1 x i64> %op1, <1 x i64> %op2) #0 {
1054 ; CHECK-LABEL: shl_v1i64:
1055 ; CHECK: ushl d0, d0, d1
1057 %res = shl <1 x i64> %op1, %op2
1061 ; Don't use SVE for 128-bit vectors.
1062 define <2 x i64> @shl_v2i64(<2 x i64> %op1, <2 x i64> %op2) #0 {
1063 ; CHECK-LABEL: shl_v2i64:
1064 ; CHECK: ushl v0.2d, v0.2d, v1.2d
1066 %res = shl <2 x i64> %op1, %op2
1070 define void @shl_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 {
1071 ; CHECK-LABEL: shl_v4i64:
1072 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
1073 ; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
1074 ; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
1075 ; CHECK-NEXT: lsl [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
1076 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1078 %op1 = load <4 x i64>, <4 x i64>* %a
1079 %op2 = load <4 x i64>, <4 x i64>* %b
1080 %res = shl <4 x i64> %op1, %op2
1081 store <4 x i64> %res, <4 x i64>* %a
1085 define void @shl_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
1086 ; CHECK-LABEL: shl_v8i64:
1087 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
1088 ; VBITS_GE_512-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
1089 ; VBITS_GE_512-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
1090 ; VBITS_GE_512-NEXT: lsl [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
1091 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1092 ; VBITS_GE_512-NEXT: ret
1094 ; Ensure sensible type legalisation.
1095 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
1096 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
1097 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
1098 ; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
1099 ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
1100 ; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3]
1101 ; VBITS_EQ_256-DAG: lsl [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d
1102 ; VBITS_EQ_256-DAG: lsl [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d
1103 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
1104 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
1105 ; VBITS_EQ_256-NEXT: ret
1106 %op1 = load <8 x i64>, <8 x i64>* %a
1107 %op2 = load <8 x i64>, <8 x i64>* %b
1108 %res = shl <8 x i64> %op1, %op2
1109 store <8 x i64> %res, <8 x i64>* %a
1113 define void @shl_v16i64(<16 x i64>* %a, <16 x i64>* %b) #0 {
1114 ; CHECK-LABEL: shl_v16i64:
1115 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
1116 ; VBITS_GE_1024-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
1117 ; VBITS_GE_1024-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
1118 ; VBITS_GE_1024-NEXT: lsl [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
1119 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1120 ; VBITS_GE_1024-NEXT: ret
1121 %op1 = load <16 x i64>, <16 x i64>* %a
1122 %op2 = load <16 x i64>, <16 x i64>* %b
1123 %res = shl <16 x i64> %op1, %op2
1124 store <16 x i64> %res, <16 x i64>* %a
1128 define void @shl_v32i64(<32 x i64>* %a, <32 x i64>* %b) #0 {
1129 ; CHECK-LABEL: shl_v32i64:
1130 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
1131 ; VBITS_GE_2048-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
1132 ; VBITS_GE_2048-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
1133 ; VBITS_GE_2048-NEXT: lsl [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
1134 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1135 ; VBITS_GE_2048-NEXT: ret
1136 %op1 = load <32 x i64>, <32 x i64>* %a
1137 %op2 = load <32 x i64>, <32 x i64>* %b
1138 %res = shl <32 x i64> %op1, %op2
1139 store <32 x i64> %res, <32 x i64>* %a
1143 attributes #0 = { "target-features"="+sve" }