1 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s
5 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_sat_double_mulh_single_x2_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) {
6 ; CHECK-LABEL: multi_vec_sat_double_mulh_single_x2_s8:
8 ; CHECK-NEXT: mov z5.d, z2.d
9 ; CHECK-NEXT: mov z4.d, z1.d
10 ; CHECK-NEXT: sqdmulh { z4.b, z5.b }, { z4.b, z5.b }, z3.b
11 ; CHECK-NEXT: mov z0.d, z4.d
12 ; CHECK-NEXT: mov z1.d, z5.d
14 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm)
15 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
18 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_sat_double_mulh_single_x2_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) {
19 ; CHECK-LABEL: multi_vec_sat_double_mulh_single_x2_s16:
21 ; CHECK-NEXT: mov z5.d, z2.d
22 ; CHECK-NEXT: mov z4.d, z1.d
23 ; CHECK-NEXT: sqdmulh { z4.h, z5.h }, { z4.h, z5.h }, z3.h
24 ; CHECK-NEXT: mov z0.d, z4.d
25 ; CHECK-NEXT: mov z1.d, z5.d
27 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm)
28 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
31 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_sat_double_mulh_single_x2_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) {
32 ; CHECK-LABEL: multi_vec_sat_double_mulh_single_x2_s32:
34 ; CHECK-NEXT: mov z5.d, z2.d
35 ; CHECK-NEXT: mov z4.d, z1.d
36 ; CHECK-NEXT: sqdmulh { z4.s, z5.s }, { z4.s, z5.s }, z3.s
37 ; CHECK-NEXT: mov z0.d, z4.d
38 ; CHECK-NEXT: mov z1.d, z5.d
40 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm)
41 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
44 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_sat_double_mulh_single_x2_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) {
45 ; CHECK-LABEL: multi_vec_sat_double_mulh_single_x2_s64:
47 ; CHECK-NEXT: mov z5.d, z2.d
48 ; CHECK-NEXT: mov z4.d, z1.d
49 ; CHECK-NEXT: sqdmulh { z4.d, z5.d }, { z4.d, z5.d }, z3.d
50 ; CHECK-NEXT: mov z0.d, z4.d
51 ; CHECK-NEXT: mov z1.d, z5.d
53 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm)
54 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
57 ; SQDMULH (Single, x4)
59 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
60 @multi_vec_sat_double_mulh_single_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm) {
61 ; CHECK-LABEL: multi_vec_sat_double_mulh_single_x4_s8:
63 ; CHECK-NEXT: mov z27.d, z4.d
64 ; CHECK-NEXT: mov z26.d, z3.d
65 ; CHECK-NEXT: mov z25.d, z2.d
66 ; CHECK-NEXT: mov z24.d, z1.d
67 ; CHECK-NEXT: sqdmulh { z24.b - z27.b }, { z24.b - z27.b }, z5.b
68 ; CHECK-NEXT: mov z0.d, z24.d
69 ; CHECK-NEXT: mov z1.d, z25.d
70 ; CHECK-NEXT: mov z2.d, z26.d
71 ; CHECK-NEXT: mov z3.d, z27.d
73 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
74 @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm)
75 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
78 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
79 @multi_vec_sat_double_mulh_single_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) {
80 ; CHECK-LABEL: multi_vec_sat_double_mulh_single_x4_s16:
82 ; CHECK-NEXT: mov z27.d, z4.d
83 ; CHECK-NEXT: mov z26.d, z3.d
84 ; CHECK-NEXT: mov z25.d, z2.d
85 ; CHECK-NEXT: mov z24.d, z1.d
86 ; CHECK-NEXT: sqdmulh { z24.h - z27.h }, { z24.h - z27.h }, z5.h
87 ; CHECK-NEXT: mov z0.d, z24.d
88 ; CHECK-NEXT: mov z1.d, z25.d
89 ; CHECK-NEXT: mov z2.d, z26.d
90 ; CHECK-NEXT: mov z3.d, z27.d
92 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
93 @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm)
94 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
97 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
98 @multi_vec_sat_double_mulh_single_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) {
99 ; CHECK-LABEL: multi_vec_sat_double_mulh_single_x4_s32:
101 ; CHECK-NEXT: mov z27.d, z4.d
102 ; CHECK-NEXT: mov z26.d, z3.d
103 ; CHECK-NEXT: mov z25.d, z2.d
104 ; CHECK-NEXT: mov z24.d, z1.d
105 ; CHECK-NEXT: sqdmulh { z24.s - z27.s }, { z24.s - z27.s }, z5.s
106 ; CHECK-NEXT: mov z0.d, z24.d
107 ; CHECK-NEXT: mov z1.d, z25.d
108 ; CHECK-NEXT: mov z2.d, z26.d
109 ; CHECK-NEXT: mov z3.d, z27.d
111 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
112 @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm)
113 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
116 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
117 @multi_vec_sat_double_mulh_single_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) {
118 ; CHECK-LABEL: multi_vec_sat_double_mulh_single_x4_s64:
120 ; CHECK-NEXT: mov z27.d, z4.d
121 ; CHECK-NEXT: mov z26.d, z3.d
122 ; CHECK-NEXT: mov z25.d, z2.d
123 ; CHECK-NEXT: mov z24.d, z1.d
124 ; CHECK-NEXT: sqdmulh { z24.d - z27.d }, { z24.d - z27.d }, z5.d
125 ; CHECK-NEXT: mov z0.d, z24.d
126 ; CHECK-NEXT: mov z1.d, z25.d
127 ; CHECK-NEXT: mov z2.d, z26.d
128 ; CHECK-NEXT: mov z3.d, z27.d
130 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
131 @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm)
132 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
135 ; SQDMULH (x2, Multi)
137 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_sat_double_mulh_multi_x2_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) {
138 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x2_s8:
140 ; CHECK-NEXT: mov z7.d, z4.d
141 ; CHECK-NEXT: mov z5.d, z2.d
142 ; CHECK-NEXT: mov z6.d, z3.d
143 ; CHECK-NEXT: mov z4.d, z1.d
144 ; CHECK-NEXT: sqdmulh { z4.b, z5.b }, { z4.b, z5.b }, { z6.b, z7.b }
145 ; CHECK-NEXT: mov z0.d, z4.d
146 ; CHECK-NEXT: mov z1.d, z5.d
148 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sqdmulh.vgx2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2)
149 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
152 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_sat_double_mulh_multi_x2_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) {
153 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x2_s16:
155 ; CHECK-NEXT: mov z7.d, z4.d
156 ; CHECK-NEXT: mov z5.d, z2.d
157 ; CHECK-NEXT: mov z6.d, z3.d
158 ; CHECK-NEXT: mov z4.d, z1.d
159 ; CHECK-NEXT: sqdmulh { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h }
160 ; CHECK-NEXT: mov z0.d, z4.d
161 ; CHECK-NEXT: mov z1.d, z5.d
163 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sqdmulh.vgx2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
164 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
167 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_sat_double_mulh_multi_x2_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) {
168 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x2_s32:
170 ; CHECK-NEXT: mov z7.d, z4.d
171 ; CHECK-NEXT: mov z5.d, z2.d
172 ; CHECK-NEXT: mov z6.d, z3.d
173 ; CHECK-NEXT: mov z4.d, z1.d
174 ; CHECK-NEXT: sqdmulh { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s }
175 ; CHECK-NEXT: mov z0.d, z4.d
176 ; CHECK-NEXT: mov z1.d, z5.d
178 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sqdmulh.vgx2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2)
179 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
182 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_sat_double_mulh_multi_x2_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) {
183 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x2_s64:
185 ; CHECK-NEXT: mov z7.d, z4.d
186 ; CHECK-NEXT: mov z5.d, z2.d
187 ; CHECK-NEXT: mov z6.d, z3.d
188 ; CHECK-NEXT: mov z4.d, z1.d
189 ; CHECK-NEXT: sqdmulh { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d }
190 ; CHECK-NEXT: mov z0.d, z4.d
191 ; CHECK-NEXT: mov z1.d, z5.d
193 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sqdmulh.vgx2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2)
194 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
197 ; SQDMULH (x4, Multi)
199 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
200 @multi_vec_sat_double_mulh_multi_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
201 <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
202 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s8:
204 ; CHECK-NEXT: ptrue p0.b
205 ; CHECK-NEXT: mov z30.d, z7.d
206 ; CHECK-NEXT: mov z27.d, z4.d
207 ; CHECK-NEXT: mov z29.d, z6.d
208 ; CHECK-NEXT: mov z26.d, z3.d
209 ; CHECK-NEXT: mov z28.d, z5.d
210 ; CHECK-NEXT: mov z25.d, z2.d
211 ; CHECK-NEXT: mov z24.d, z1.d
212 ; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0]
213 ; CHECK-NEXT: sqdmulh { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
214 ; CHECK-NEXT: mov z0.d, z24.d
215 ; CHECK-NEXT: mov z1.d, z25.d
216 ; CHECK-NEXT: mov z2.d, z26.d
217 ; CHECK-NEXT: mov z3.d, z27.d
219 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
220 @llvm.aarch64.sve.sqdmulh.vgx4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
221 <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4)
222 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
225 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
226 @multi_vec_sat_double_mulh_multi_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
227 <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
228 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s16:
230 ; CHECK-NEXT: ptrue p0.h
231 ; CHECK-NEXT: mov z30.d, z7.d
232 ; CHECK-NEXT: mov z27.d, z4.d
233 ; CHECK-NEXT: mov z29.d, z6.d
234 ; CHECK-NEXT: mov z26.d, z3.d
235 ; CHECK-NEXT: mov z28.d, z5.d
236 ; CHECK-NEXT: mov z25.d, z2.d
237 ; CHECK-NEXT: mov z24.d, z1.d
238 ; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0]
239 ; CHECK-NEXT: sqdmulh { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
240 ; CHECK-NEXT: mov z0.d, z24.d
241 ; CHECK-NEXT: mov z1.d, z25.d
242 ; CHECK-NEXT: mov z2.d, z26.d
243 ; CHECK-NEXT: mov z3.d, z27.d
245 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
246 @llvm.aarch64.sve.sqdmulh.vgx4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
247 <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4)
248 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
251 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
252 @multi_vec_sat_double_mulh_multi_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
253 <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
254 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s32:
256 ; CHECK-NEXT: ptrue p0.s
257 ; CHECK-NEXT: mov z30.d, z7.d
258 ; CHECK-NEXT: mov z27.d, z4.d
259 ; CHECK-NEXT: mov z29.d, z6.d
260 ; CHECK-NEXT: mov z26.d, z3.d
261 ; CHECK-NEXT: mov z28.d, z5.d
262 ; CHECK-NEXT: mov z25.d, z2.d
263 ; CHECK-NEXT: mov z24.d, z1.d
264 ; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0]
265 ; CHECK-NEXT: sqdmulh { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
266 ; CHECK-NEXT: mov z0.d, z24.d
267 ; CHECK-NEXT: mov z1.d, z25.d
268 ; CHECK-NEXT: mov z2.d, z26.d
269 ; CHECK-NEXT: mov z3.d, z27.d
271 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
272 @llvm.aarch64.sve.sqdmulh.vgx4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
273 <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4)
274 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
277 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
278 @multi_vec_sat_double_mulh_multi_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
279 <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
280 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s64:
282 ; CHECK-NEXT: ptrue p0.d
283 ; CHECK-NEXT: mov z30.d, z7.d
284 ; CHECK-NEXT: mov z27.d, z4.d
285 ; CHECK-NEXT: mov z29.d, z6.d
286 ; CHECK-NEXT: mov z26.d, z3.d
287 ; CHECK-NEXT: mov z28.d, z5.d
288 ; CHECK-NEXT: mov z25.d, z2.d
289 ; CHECK-NEXT: mov z24.d, z1.d
290 ; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0]
291 ; CHECK-NEXT: sqdmulh { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
292 ; CHECK-NEXT: mov z0.d, z24.d
293 ; CHECK-NEXT: mov z1.d, z25.d
294 ; CHECK-NEXT: mov z2.d, z26.d
295 ; CHECK-NEXT: mov z3.d, z27.d
297 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
298 @llvm.aarch64.sve.sqdmulh.vgx4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
299 <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4)
300 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
303 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
304 declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
305 declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
306 declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
308 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
309 @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
310 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
311 @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
312 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
313 @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
314 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
315 @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
317 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sqdmulh.vgx2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
318 declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sqdmulh.vgx2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
319 declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sqdmulh.vgx2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
320 declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sqdmulh.vgx2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
322 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
323 @llvm.aarch64.sve.sqdmulh.vgx4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>,
324 <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
325 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
326 @llvm.aarch64.sve.sqdmulh.vgx4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>,
327 <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
328 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
329 @llvm.aarch64.sve.sqdmulh.vgx4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>,
330 <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
331 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
332 @llvm.aarch64.sve.sqdmulh.vgx4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>,
333 <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)