1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -force-streaming -verify-machineinstrs < %s | FileCheck %s
6 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_rounding_shl_single_x2_s8(<vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) {
7 ; CHECK-LABEL: multi_vec_rounding_shl_single_x2_s8:
9 ; CHECK-NEXT: mov z5.d, z2.d
10 ; CHECK-NEXT: mov z4.d, z1.d
11 ; CHECK-NEXT: srshl { z4.b, z5.b }, { z4.b, z5.b }, z3.b
12 ; CHECK-NEXT: mov z0.d, z4.d
13 ; CHECK-NEXT: mov z1.d, z5.d
15 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.srshl.single.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm)
16 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
19 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_rounding_shl_single_x2_s16(<vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) {
20 ; CHECK-LABEL: multi_vec_rounding_shl_single_x2_s16:
22 ; CHECK-NEXT: mov z5.d, z2.d
23 ; CHECK-NEXT: mov z4.d, z1.d
24 ; CHECK-NEXT: srshl { z4.h, z5.h }, { z4.h, z5.h }, z3.h
25 ; CHECK-NEXT: mov z0.d, z4.d
26 ; CHECK-NEXT: mov z1.d, z5.d
28 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.srshl.single.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm)
29 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
32 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_rounding_shl_single_x2_s32(<vscale x 4 x i32> %dummy, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) {
33 ; CHECK-LABEL: multi_vec_rounding_shl_single_x2_s32:
35 ; CHECK-NEXT: mov z5.d, z2.d
36 ; CHECK-NEXT: mov z4.d, z1.d
37 ; CHECK-NEXT: srshl { z4.s, z5.s }, { z4.s, z5.s }, z3.s
38 ; CHECK-NEXT: mov z0.d, z4.d
39 ; CHECK-NEXT: mov z1.d, z5.d
41 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.srshl.single.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm)
42 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
45 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_single_x2_s64(<vscale x 2 x i64> %dummy, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) {
46 ; CHECK-LABEL: multi_vec_rounding_shl_single_x2_s64:
48 ; CHECK-NEXT: mov z5.d, z2.d
49 ; CHECK-NEXT: mov z4.d, z1.d
50 ; CHECK-NEXT: srshl { z4.d, z5.d }, { z4.d, z5.d }, z3.d
51 ; CHECK-NEXT: mov z0.d, z4.d
52 ; CHECK-NEXT: mov z1.d, z5.d
54 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.srshl.single.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm)
55 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
60 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_rounding_shl_single_x4_s8(<vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm) {
61 ; CHECK-LABEL: multi_vec_rounding_shl_single_x4_s8:
63 ; CHECK-NEXT: mov z27.d, z4.d
64 ; CHECK-NEXT: mov z26.d, z3.d
65 ; CHECK-NEXT: mov z25.d, z2.d
66 ; CHECK-NEXT: mov z24.d, z1.d
67 ; CHECK-NEXT: srshl { z24.b - z27.b }, { z24.b - z27.b }, z5.b
68 ; CHECK-NEXT: mov z0.d, z24.d
69 ; CHECK-NEXT: mov z1.d, z25.d
70 ; CHECK-NEXT: mov z2.d, z26.d
71 ; CHECK-NEXT: mov z3.d, z27.d
73 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
74 @llvm.aarch64.sve.srshl.single.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm)
75 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
78 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_rounding_shl_single_x4_s16(<vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) {
79 ; CHECK-LABEL: multi_vec_rounding_shl_single_x4_s16:
81 ; CHECK-NEXT: mov z27.d, z4.d
82 ; CHECK-NEXT: mov z26.d, z3.d
83 ; CHECK-NEXT: mov z25.d, z2.d
84 ; CHECK-NEXT: mov z24.d, z1.d
85 ; CHECK-NEXT: srshl { z24.h - z27.h }, { z24.h - z27.h }, z5.h
86 ; CHECK-NEXT: mov z0.d, z24.d
87 ; CHECK-NEXT: mov z1.d, z25.d
88 ; CHECK-NEXT: mov z2.d, z26.d
89 ; CHECK-NEXT: mov z3.d, z27.d
91 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
92 @llvm.aarch64.sve.srshl.single.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm)
93 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
96 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_rounding_shl_single_x4_s32(<vscale x 4 x i32> %dummy, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) {
97 ; CHECK-LABEL: multi_vec_rounding_shl_single_x4_s32:
99 ; CHECK-NEXT: mov z27.d, z4.d
100 ; CHECK-NEXT: mov z26.d, z3.d
101 ; CHECK-NEXT: mov z25.d, z2.d
102 ; CHECK-NEXT: mov z24.d, z1.d
103 ; CHECK-NEXT: srshl { z24.s - z27.s }, { z24.s - z27.s }, z5.s
104 ; CHECK-NEXT: mov z0.d, z24.d
105 ; CHECK-NEXT: mov z1.d, z25.d
106 ; CHECK-NEXT: mov z2.d, z26.d
107 ; CHECK-NEXT: mov z3.d, z27.d
109 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
110 @llvm.aarch64.sve.srshl.single.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm)
111 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
114 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_single_x4_s64(<vscale x 2 x i64> %dummy, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) {
115 ; CHECK-LABEL: multi_vec_rounding_shl_single_x4_s64:
117 ; CHECK-NEXT: mov z27.d, z4.d
118 ; CHECK-NEXT: mov z26.d, z3.d
119 ; CHECK-NEXT: mov z25.d, z2.d
120 ; CHECK-NEXT: mov z24.d, z1.d
121 ; CHECK-NEXT: srshl { z24.d - z27.d }, { z24.d - z27.d }, z5.d
122 ; CHECK-NEXT: mov z0.d, z24.d
123 ; CHECK-NEXT: mov z1.d, z25.d
124 ; CHECK-NEXT: mov z2.d, z26.d
125 ; CHECK-NEXT: mov z3.d, z27.d
127 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
128 @llvm.aarch64.sve.srshl.single.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm)
129 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
134 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_rounding_shl_single_x2_u8(<vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) {
135 ; CHECK-LABEL: multi_vec_rounding_shl_single_x2_u8:
137 ; CHECK-NEXT: mov z5.d, z2.d
138 ; CHECK-NEXT: mov z4.d, z1.d
139 ; CHECK-NEXT: urshl { z4.b, z5.b }, { z4.b, z5.b }, z3.b
140 ; CHECK-NEXT: mov z0.d, z4.d
141 ; CHECK-NEXT: mov z1.d, z5.d
143 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.urshl.single.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm)
144 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
147 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_rounding_shl_single_x2_u16(<vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) {
148 ; CHECK-LABEL: multi_vec_rounding_shl_single_x2_u16:
150 ; CHECK-NEXT: mov z5.d, z2.d
151 ; CHECK-NEXT: mov z4.d, z1.d
152 ; CHECK-NEXT: urshl { z4.h, z5.h }, { z4.h, z5.h }, z3.h
153 ; CHECK-NEXT: mov z0.d, z4.d
154 ; CHECK-NEXT: mov z1.d, z5.d
156 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.urshl.single.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm)
157 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
160 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_rounding_shl_single_x2_u32(<vscale x 4 x i32> %dummy, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) {
161 ; CHECK-LABEL: multi_vec_rounding_shl_single_x2_u32:
163 ; CHECK-NEXT: mov z5.d, z2.d
164 ; CHECK-NEXT: mov z4.d, z1.d
165 ; CHECK-NEXT: urshl { z4.s, z5.s }, { z4.s, z5.s }, z3.s
166 ; CHECK-NEXT: mov z0.d, z4.d
167 ; CHECK-NEXT: mov z1.d, z5.d
169 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.urshl.single.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm)
170 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
173 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_single_x2_u64(<vscale x 2 x i64> %dummy, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) {
174 ; CHECK-LABEL: multi_vec_rounding_shl_single_x2_u64:
176 ; CHECK-NEXT: mov z5.d, z2.d
177 ; CHECK-NEXT: mov z4.d, z1.d
178 ; CHECK-NEXT: urshl { z4.d, z5.d }, { z4.d, z5.d }, z3.d
179 ; CHECK-NEXT: mov z0.d, z4.d
180 ; CHECK-NEXT: mov z1.d, z5.d
182 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.urshl.single.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm)
183 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
188 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_rounding_shl_single_x4_u8(<vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm) {
189 ; CHECK-LABEL: multi_vec_rounding_shl_single_x4_u8:
191 ; CHECK-NEXT: mov z27.d, z4.d
192 ; CHECK-NEXT: mov z26.d, z3.d
193 ; CHECK-NEXT: mov z25.d, z2.d
194 ; CHECK-NEXT: mov z24.d, z1.d
195 ; CHECK-NEXT: urshl { z24.b - z27.b }, { z24.b - z27.b }, z5.b
196 ; CHECK-NEXT: mov z0.d, z24.d
197 ; CHECK-NEXT: mov z1.d, z25.d
198 ; CHECK-NEXT: mov z2.d, z26.d
199 ; CHECK-NEXT: mov z3.d, z27.d
201 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
202 @llvm.aarch64.sve.urshl.single.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm)
203 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
206 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_rounding_shl_single_x4_u16(<vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) {
207 ; CHECK-LABEL: multi_vec_rounding_shl_single_x4_u16:
209 ; CHECK-NEXT: mov z27.d, z4.d
210 ; CHECK-NEXT: mov z26.d, z3.d
211 ; CHECK-NEXT: mov z25.d, z2.d
212 ; CHECK-NEXT: mov z24.d, z1.d
213 ; CHECK-NEXT: urshl { z24.h - z27.h }, { z24.h - z27.h }, z5.h
214 ; CHECK-NEXT: mov z0.d, z24.d
215 ; CHECK-NEXT: mov z1.d, z25.d
216 ; CHECK-NEXT: mov z2.d, z26.d
217 ; CHECK-NEXT: mov z3.d, z27.d
219 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
220 @llvm.aarch64.sve.urshl.single.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm)
221 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
224 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_rounding_shl_single_x4_u32(<vscale x 4 x i32> %dummy, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) {
225 ; CHECK-LABEL: multi_vec_rounding_shl_single_x4_u32:
227 ; CHECK-NEXT: mov z27.d, z4.d
228 ; CHECK-NEXT: mov z26.d, z3.d
229 ; CHECK-NEXT: mov z25.d, z2.d
230 ; CHECK-NEXT: mov z24.d, z1.d
231 ; CHECK-NEXT: urshl { z24.s - z27.s }, { z24.s - z27.s }, z5.s
232 ; CHECK-NEXT: mov z0.d, z24.d
233 ; CHECK-NEXT: mov z1.d, z25.d
234 ; CHECK-NEXT: mov z2.d, z26.d
235 ; CHECK-NEXT: mov z3.d, z27.d
237 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
238 @llvm.aarch64.sve.urshl.single.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm)
239 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
242 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_single_x4_u64(<vscale x 2 x i64> %dummy, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) {
243 ; CHECK-LABEL: multi_vec_rounding_shl_single_x4_u64:
245 ; CHECK-NEXT: mov z27.d, z4.d
246 ; CHECK-NEXT: mov z26.d, z3.d
247 ; CHECK-NEXT: mov z25.d, z2.d
248 ; CHECK-NEXT: mov z24.d, z1.d
249 ; CHECK-NEXT: urshl { z24.d - z27.d }, { z24.d - z27.d }, z5.d
250 ; CHECK-NEXT: mov z0.d, z24.d
251 ; CHECK-NEXT: mov z1.d, z25.d
252 ; CHECK-NEXT: mov z2.d, z26.d
253 ; CHECK-NEXT: mov z3.d, z27.d
255 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
256 @llvm.aarch64.sve.urshl.single.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm)
257 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
262 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_rounding_shl_x2_s8(<vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) {
263 ; CHECK-LABEL: multi_vec_rounding_shl_x2_s8:
265 ; CHECK-NEXT: mov z7.d, z4.d
266 ; CHECK-NEXT: mov z5.d, z2.d
267 ; CHECK-NEXT: mov z6.d, z3.d
268 ; CHECK-NEXT: mov z4.d, z1.d
269 ; CHECK-NEXT: srshl { z4.b, z5.b }, { z4.b, z5.b }, { z6.b, z7.b }
270 ; CHECK-NEXT: mov z0.d, z4.d
271 ; CHECK-NEXT: mov z1.d, z5.d
273 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.srshl.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2)
274 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
277 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_rounding_shl_x2_s16(<vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) {
278 ; CHECK-LABEL: multi_vec_rounding_shl_x2_s16:
280 ; CHECK-NEXT: mov z7.d, z4.d
281 ; CHECK-NEXT: mov z5.d, z2.d
282 ; CHECK-NEXT: mov z6.d, z3.d
283 ; CHECK-NEXT: mov z4.d, z1.d
284 ; CHECK-NEXT: srshl { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h }
285 ; CHECK-NEXT: mov z0.d, z4.d
286 ; CHECK-NEXT: mov z1.d, z5.d
288 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.srshl.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
289 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
292 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_rounding_shl_x2_s32(<vscale x 4 x i32> %dummy, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) {
293 ; CHECK-LABEL: multi_vec_rounding_shl_x2_s32:
295 ; CHECK-NEXT: mov z7.d, z4.d
296 ; CHECK-NEXT: mov z5.d, z2.d
297 ; CHECK-NEXT: mov z6.d, z3.d
298 ; CHECK-NEXT: mov z4.d, z1.d
299 ; CHECK-NEXT: srshl { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s }
300 ; CHECK-NEXT: mov z0.d, z4.d
301 ; CHECK-NEXT: mov z1.d, z5.d
303 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.srshl.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2)
304 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
307 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_x2_s64(<vscale x 2 x i64> %dummy, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) {
308 ; CHECK-LABEL: multi_vec_rounding_shl_x2_s64:
310 ; CHECK-NEXT: mov z7.d, z4.d
311 ; CHECK-NEXT: mov z5.d, z2.d
312 ; CHECK-NEXT: mov z6.d, z3.d
313 ; CHECK-NEXT: mov z4.d, z1.d
314 ; CHECK-NEXT: srshl { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d }
315 ; CHECK-NEXT: mov z0.d, z4.d
316 ; CHECK-NEXT: mov z1.d, z5.d
318 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.srshl.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2)
319 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
324 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_rounding_shl_x4_s8(<vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
325 ; CHECK-LABEL: multi_vec_rounding_shl_x4_s8:
327 ; CHECK-NEXT: mov z30.d, z7.d
328 ; CHECK-NEXT: mov z27.d, z4.d
329 ; CHECK-NEXT: ptrue p0.b
330 ; CHECK-NEXT: mov z29.d, z6.d
331 ; CHECK-NEXT: mov z26.d, z3.d
332 ; CHECK-NEXT: mov z28.d, z5.d
333 ; CHECK-NEXT: mov z25.d, z2.d
334 ; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0]
335 ; CHECK-NEXT: mov z24.d, z1.d
336 ; CHECK-NEXT: srshl { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
337 ; CHECK-NEXT: mov z0.d, z24.d
338 ; CHECK-NEXT: mov z1.d, z25.d
339 ; CHECK-NEXT: mov z2.d, z26.d
340 ; CHECK-NEXT: mov z3.d, z27.d
342 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
343 @llvm.aarch64.sve.srshl.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
344 <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4)
345 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
348 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_rounding_shl_x4_s16(<vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
349 ; CHECK-LABEL: multi_vec_rounding_shl_x4_s16:
351 ; CHECK-NEXT: mov z30.d, z7.d
352 ; CHECK-NEXT: mov z27.d, z4.d
353 ; CHECK-NEXT: ptrue p0.h
354 ; CHECK-NEXT: mov z29.d, z6.d
355 ; CHECK-NEXT: mov z26.d, z3.d
356 ; CHECK-NEXT: mov z28.d, z5.d
357 ; CHECK-NEXT: mov z25.d, z2.d
358 ; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0]
359 ; CHECK-NEXT: mov z24.d, z1.d
360 ; CHECK-NEXT: srshl { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
361 ; CHECK-NEXT: mov z0.d, z24.d
362 ; CHECK-NEXT: mov z1.d, z25.d
363 ; CHECK-NEXT: mov z2.d, z26.d
364 ; CHECK-NEXT: mov z3.d, z27.d
366 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
367 @llvm.aarch64.sve.srshl.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
368 <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4)
369 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
372 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_rounding_shl_x4_s32(<vscale x 4 x i32> %dummy, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
373 ; CHECK-LABEL: multi_vec_rounding_shl_x4_s32:
375 ; CHECK-NEXT: mov z30.d, z7.d
376 ; CHECK-NEXT: mov z27.d, z4.d
377 ; CHECK-NEXT: ptrue p0.s
378 ; CHECK-NEXT: mov z29.d, z6.d
379 ; CHECK-NEXT: mov z26.d, z3.d
380 ; CHECK-NEXT: mov z28.d, z5.d
381 ; CHECK-NEXT: mov z25.d, z2.d
382 ; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0]
383 ; CHECK-NEXT: mov z24.d, z1.d
384 ; CHECK-NEXT: srshl { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
385 ; CHECK-NEXT: mov z0.d, z24.d
386 ; CHECK-NEXT: mov z1.d, z25.d
387 ; CHECK-NEXT: mov z2.d, z26.d
388 ; CHECK-NEXT: mov z3.d, z27.d
390 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
391 @llvm.aarch64.sve.srshl.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
392 <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4)
393 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
396 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_x4_s64(<vscale x 2 x i64> %dummy, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
397 ; CHECK-LABEL: multi_vec_rounding_shl_x4_s64:
399 ; CHECK-NEXT: mov z30.d, z7.d
400 ; CHECK-NEXT: mov z27.d, z4.d
401 ; CHECK-NEXT: ptrue p0.d
402 ; CHECK-NEXT: mov z29.d, z6.d
403 ; CHECK-NEXT: mov z26.d, z3.d
404 ; CHECK-NEXT: mov z28.d, z5.d
405 ; CHECK-NEXT: mov z25.d, z2.d
406 ; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0]
407 ; CHECK-NEXT: mov z24.d, z1.d
408 ; CHECK-NEXT: srshl { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
409 ; CHECK-NEXT: mov z0.d, z24.d
410 ; CHECK-NEXT: mov z1.d, z25.d
411 ; CHECK-NEXT: mov z2.d, z26.d
412 ; CHECK-NEXT: mov z3.d, z27.d
414 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
415 @llvm.aarch64.sve.srshl.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
416 <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4)
417 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
422 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_rounding_uhl_x2_u8(<vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) {
423 ; CHECK-LABEL: multi_vec_rounding_uhl_x2_u8:
425 ; CHECK-NEXT: mov z7.d, z4.d
426 ; CHECK-NEXT: mov z5.d, z2.d
427 ; CHECK-NEXT: mov z6.d, z3.d
428 ; CHECK-NEXT: mov z4.d, z1.d
429 ; CHECK-NEXT: urshl { z4.b, z5.b }, { z4.b, z5.b }, { z6.b, z7.b }
430 ; CHECK-NEXT: mov z0.d, z4.d
431 ; CHECK-NEXT: mov z1.d, z5.d
433 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.urshl.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2)
434 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
437 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_rounding_uhl_x2_u16(<vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) {
438 ; CHECK-LABEL: multi_vec_rounding_uhl_x2_u16:
440 ; CHECK-NEXT: mov z7.d, z4.d
441 ; CHECK-NEXT: mov z5.d, z2.d
442 ; CHECK-NEXT: mov z6.d, z3.d
443 ; CHECK-NEXT: mov z4.d, z1.d
444 ; CHECK-NEXT: urshl { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h }
445 ; CHECK-NEXT: mov z0.d, z4.d
446 ; CHECK-NEXT: mov z1.d, z5.d
448 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.urshl.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
449 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
452 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_rounding_uhl_x2_u32(<vscale x 4 x i32> %dummy, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) {
453 ; CHECK-LABEL: multi_vec_rounding_uhl_x2_u32:
455 ; CHECK-NEXT: mov z7.d, z4.d
456 ; CHECK-NEXT: mov z5.d, z2.d
457 ; CHECK-NEXT: mov z6.d, z3.d
458 ; CHECK-NEXT: mov z4.d, z1.d
459 ; CHECK-NEXT: urshl { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s }
460 ; CHECK-NEXT: mov z0.d, z4.d
461 ; CHECK-NEXT: mov z1.d, z5.d
463 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.urshl.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2)
464 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
467 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_uhl_x2_u64(<vscale x 2 x i64> %dummy, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) {
468 ; CHECK-LABEL: multi_vec_rounding_uhl_x2_u64:
470 ; CHECK-NEXT: mov z7.d, z4.d
471 ; CHECK-NEXT: mov z5.d, z2.d
472 ; CHECK-NEXT: mov z6.d, z3.d
473 ; CHECK-NEXT: mov z4.d, z1.d
474 ; CHECK-NEXT: urshl { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d }
475 ; CHECK-NEXT: mov z0.d, z4.d
476 ; CHECK-NEXT: mov z1.d, z5.d
478 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.urshl.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2)
479 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
484 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_rounding_shl_x4_u8(<vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
485 ; CHECK-LABEL: multi_vec_rounding_shl_x4_u8:
487 ; CHECK-NEXT: mov z30.d, z7.d
488 ; CHECK-NEXT: mov z27.d, z4.d
489 ; CHECK-NEXT: ptrue p0.b
490 ; CHECK-NEXT: mov z29.d, z6.d
491 ; CHECK-NEXT: mov z26.d, z3.d
492 ; CHECK-NEXT: mov z28.d, z5.d
493 ; CHECK-NEXT: mov z25.d, z2.d
494 ; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0]
495 ; CHECK-NEXT: mov z24.d, z1.d
496 ; CHECK-NEXT: urshl { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
497 ; CHECK-NEXT: mov z0.d, z24.d
498 ; CHECK-NEXT: mov z1.d, z25.d
499 ; CHECK-NEXT: mov z2.d, z26.d
500 ; CHECK-NEXT: mov z3.d, z27.d
502 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
503 @llvm.aarch64.sve.urshl.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
504 <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4)
505 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
508 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_rounding_shl_x4_u16(<vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
509 ; CHECK-LABEL: multi_vec_rounding_shl_x4_u16:
511 ; CHECK-NEXT: mov z30.d, z7.d
512 ; CHECK-NEXT: mov z27.d, z4.d
513 ; CHECK-NEXT: ptrue p0.h
514 ; CHECK-NEXT: mov z29.d, z6.d
515 ; CHECK-NEXT: mov z26.d, z3.d
516 ; CHECK-NEXT: mov z28.d, z5.d
517 ; CHECK-NEXT: mov z25.d, z2.d
518 ; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0]
519 ; CHECK-NEXT: mov z24.d, z1.d
520 ; CHECK-NEXT: urshl { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
521 ; CHECK-NEXT: mov z0.d, z24.d
522 ; CHECK-NEXT: mov z1.d, z25.d
523 ; CHECK-NEXT: mov z2.d, z26.d
524 ; CHECK-NEXT: mov z3.d, z27.d
526 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
527 @llvm.aarch64.sve.urshl.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
528 <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4)
529 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
532 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_rounding_shl_x4_u32(<vscale x 4 x i32> %dummy, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
533 ; CHECK-LABEL: multi_vec_rounding_shl_x4_u32:
535 ; CHECK-NEXT: mov z30.d, z7.d
536 ; CHECK-NEXT: mov z27.d, z4.d
537 ; CHECK-NEXT: ptrue p0.s
538 ; CHECK-NEXT: mov z29.d, z6.d
539 ; CHECK-NEXT: mov z26.d, z3.d
540 ; CHECK-NEXT: mov z28.d, z5.d
541 ; CHECK-NEXT: mov z25.d, z2.d
542 ; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0]
543 ; CHECK-NEXT: mov z24.d, z1.d
544 ; CHECK-NEXT: urshl { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
545 ; CHECK-NEXT: mov z0.d, z24.d
546 ; CHECK-NEXT: mov z1.d, z25.d
547 ; CHECK-NEXT: mov z2.d, z26.d
548 ; CHECK-NEXT: mov z3.d, z27.d
550 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
551 @llvm.aarch64.sve.urshl.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
552 <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4)
553 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
556 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_x4_u64(<vscale x 2 x i64> %dummy, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
557 ; CHECK-LABEL: multi_vec_rounding_shl_x4_u64:
559 ; CHECK-NEXT: mov z30.d, z7.d
560 ; CHECK-NEXT: mov z27.d, z4.d
561 ; CHECK-NEXT: ptrue p0.d
562 ; CHECK-NEXT: mov z29.d, z6.d
563 ; CHECK-NEXT: mov z26.d, z3.d
564 ; CHECK-NEXT: mov z28.d, z5.d
565 ; CHECK-NEXT: mov z25.d, z2.d
566 ; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0]
567 ; CHECK-NEXT: mov z24.d, z1.d
568 ; CHECK-NEXT: urshl { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
569 ; CHECK-NEXT: mov z0.d, z24.d
570 ; CHECK-NEXT: mov z1.d, z25.d
571 ; CHECK-NEXT: mov z2.d, z26.d
572 ; CHECK-NEXT: mov z3.d, z27.d
574 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
575 @llvm.aarch64.sve.urshl.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
576 <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4)
577 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
580 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.srshl.single.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
581 declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.srshl.single.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
582 declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.srshl.single.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
583 declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.srshl.single.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
585 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.srshl.single.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
586 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.srshl.single.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
587 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.srshl.single.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
588 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.srshl.single.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
590 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.urshl.single.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
591 declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.urshl.single.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
592 declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.urshl.single.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
593 declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.urshl.single.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
595 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.urshl.single.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
596 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.urshl.single.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
597 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.urshl.single.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
598 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.urshl.single.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
600 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.srshl.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
601 declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.srshl.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
602 declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.srshl.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
603 declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.srshl.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
605 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
606 @llvm.aarch64.sve.srshl.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
607 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
608 @llvm.aarch64.sve.srshl.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
609 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
610 @llvm.aarch64.sve.srshl.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
611 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
612 @llvm.aarch64.sve.srshl.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> )
614 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.urshl.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
615 declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.urshl.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
616 declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.urshl.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
617 declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.urshl.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
619 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
620 @llvm.aarch64.sve.urshl.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
621 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
622 @llvm.aarch64.sve.urshl.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
623 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
624 @llvm.aarch64.sve.urshl.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
625 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
626 @llvm.aarch64.sve.urshl.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)