1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -force-streaming -verify-machineinstrs < %s | FileCheck %s
6 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_sat_double_mulh_single_x2_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) {
7 ; CHECK-LABEL: multi_vec_sat_double_mulh_single_x2_s8:
9 ; CHECK-NEXT: mov z5.d, z2.d
10 ; CHECK-NEXT: mov z4.d, z1.d
11 ; CHECK-NEXT: sqdmulh { z4.b, z5.b }, { z4.b, z5.b }, z3.b
12 ; CHECK-NEXT: mov z0.d, z4.d
13 ; CHECK-NEXT: mov z1.d, z5.d
15 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm)
16 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
19 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_sat_double_mulh_single_x2_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) {
20 ; CHECK-LABEL: multi_vec_sat_double_mulh_single_x2_s16:
22 ; CHECK-NEXT: mov z5.d, z2.d
23 ; CHECK-NEXT: mov z4.d, z1.d
24 ; CHECK-NEXT: sqdmulh { z4.h, z5.h }, { z4.h, z5.h }, z3.h
25 ; CHECK-NEXT: mov z0.d, z4.d
26 ; CHECK-NEXT: mov z1.d, z5.d
28 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm)
29 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
32 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_sat_double_mulh_single_x2_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) {
33 ; CHECK-LABEL: multi_vec_sat_double_mulh_single_x2_s32:
35 ; CHECK-NEXT: mov z5.d, z2.d
36 ; CHECK-NEXT: mov z4.d, z1.d
37 ; CHECK-NEXT: sqdmulh { z4.s, z5.s }, { z4.s, z5.s }, z3.s
38 ; CHECK-NEXT: mov z0.d, z4.d
39 ; CHECK-NEXT: mov z1.d, z5.d
41 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm)
42 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
45 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_sat_double_mulh_single_x2_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) {
46 ; CHECK-LABEL: multi_vec_sat_double_mulh_single_x2_s64:
48 ; CHECK-NEXT: mov z5.d, z2.d
49 ; CHECK-NEXT: mov z4.d, z1.d
50 ; CHECK-NEXT: sqdmulh { z4.d, z5.d }, { z4.d, z5.d }, z3.d
51 ; CHECK-NEXT: mov z0.d, z4.d
52 ; CHECK-NEXT: mov z1.d, z5.d
54 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm)
55 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
58 ; SQDMULH (Single, x4)
60 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_sat_double_mulh_single_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm) {
61 ; CHECK-LABEL: multi_vec_sat_double_mulh_single_x4_s8:
63 ; CHECK-NEXT: mov z27.d, z4.d
64 ; CHECK-NEXT: mov z26.d, z3.d
65 ; CHECK-NEXT: mov z25.d, z2.d
66 ; CHECK-NEXT: mov z24.d, z1.d
67 ; CHECK-NEXT: sqdmulh { z24.b - z27.b }, { z24.b - z27.b }, z5.b
68 ; CHECK-NEXT: mov z0.d, z24.d
69 ; CHECK-NEXT: mov z1.d, z25.d
70 ; CHECK-NEXT: mov z2.d, z26.d
71 ; CHECK-NEXT: mov z3.d, z27.d
73 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
74 @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm)
75 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
78 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_sat_double_mulh_single_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) {
79 ; CHECK-LABEL: multi_vec_sat_double_mulh_single_x4_s16:
81 ; CHECK-NEXT: mov z27.d, z4.d
82 ; CHECK-NEXT: mov z26.d, z3.d
83 ; CHECK-NEXT: mov z25.d, z2.d
84 ; CHECK-NEXT: mov z24.d, z1.d
85 ; CHECK-NEXT: sqdmulh { z24.h - z27.h }, { z24.h - z27.h }, z5.h
86 ; CHECK-NEXT: mov z0.d, z24.d
87 ; CHECK-NEXT: mov z1.d, z25.d
88 ; CHECK-NEXT: mov z2.d, z26.d
89 ; CHECK-NEXT: mov z3.d, z27.d
91 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
92 @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm)
93 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
96 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_sat_double_mulh_single_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) {
97 ; CHECK-LABEL: multi_vec_sat_double_mulh_single_x4_s32:
99 ; CHECK-NEXT: mov z27.d, z4.d
100 ; CHECK-NEXT: mov z26.d, z3.d
101 ; CHECK-NEXT: mov z25.d, z2.d
102 ; CHECK-NEXT: mov z24.d, z1.d
103 ; CHECK-NEXT: sqdmulh { z24.s - z27.s }, { z24.s - z27.s }, z5.s
104 ; CHECK-NEXT: mov z0.d, z24.d
105 ; CHECK-NEXT: mov z1.d, z25.d
106 ; CHECK-NEXT: mov z2.d, z26.d
107 ; CHECK-NEXT: mov z3.d, z27.d
109 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
110 @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm)
111 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
114 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_sat_double_mulh_single_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) {
115 ; CHECK-LABEL: multi_vec_sat_double_mulh_single_x4_s64:
117 ; CHECK-NEXT: mov z27.d, z4.d
118 ; CHECK-NEXT: mov z26.d, z3.d
119 ; CHECK-NEXT: mov z25.d, z2.d
120 ; CHECK-NEXT: mov z24.d, z1.d
121 ; CHECK-NEXT: sqdmulh { z24.d - z27.d }, { z24.d - z27.d }, z5.d
122 ; CHECK-NEXT: mov z0.d, z24.d
123 ; CHECK-NEXT: mov z1.d, z25.d
124 ; CHECK-NEXT: mov z2.d, z26.d
125 ; CHECK-NEXT: mov z3.d, z27.d
127 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
128 @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm)
129 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
132 ; SQDMULH (x2, Multi)
134 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_sat_double_mulh_multi_x2_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) {
135 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x2_s8:
137 ; CHECK-NEXT: mov z7.d, z4.d
138 ; CHECK-NEXT: mov z5.d, z2.d
139 ; CHECK-NEXT: mov z6.d, z3.d
140 ; CHECK-NEXT: mov z4.d, z1.d
141 ; CHECK-NEXT: sqdmulh { z4.b, z5.b }, { z4.b, z5.b }, { z6.b, z7.b }
142 ; CHECK-NEXT: mov z0.d, z4.d
143 ; CHECK-NEXT: mov z1.d, z5.d
145 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sqdmulh.vgx2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2)
146 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
149 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_sat_double_mulh_multi_x2_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) {
150 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x2_s16:
152 ; CHECK-NEXT: mov z7.d, z4.d
153 ; CHECK-NEXT: mov z5.d, z2.d
154 ; CHECK-NEXT: mov z6.d, z3.d
155 ; CHECK-NEXT: mov z4.d, z1.d
156 ; CHECK-NEXT: sqdmulh { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h }
157 ; CHECK-NEXT: mov z0.d, z4.d
158 ; CHECK-NEXT: mov z1.d, z5.d
160 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sqdmulh.vgx2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
161 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
164 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_sat_double_mulh_multi_x2_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) {
165 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x2_s32:
167 ; CHECK-NEXT: mov z7.d, z4.d
168 ; CHECK-NEXT: mov z5.d, z2.d
169 ; CHECK-NEXT: mov z6.d, z3.d
170 ; CHECK-NEXT: mov z4.d, z1.d
171 ; CHECK-NEXT: sqdmulh { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s }
172 ; CHECK-NEXT: mov z0.d, z4.d
173 ; CHECK-NEXT: mov z1.d, z5.d
175 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sqdmulh.vgx2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2)
176 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
179 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_sat_double_mulh_multi_x2_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) {
180 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x2_s64:
182 ; CHECK-NEXT: mov z7.d, z4.d
183 ; CHECK-NEXT: mov z5.d, z2.d
184 ; CHECK-NEXT: mov z6.d, z3.d
185 ; CHECK-NEXT: mov z4.d, z1.d
186 ; CHECK-NEXT: sqdmulh { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d }
187 ; CHECK-NEXT: mov z0.d, z4.d
188 ; CHECK-NEXT: mov z1.d, z5.d
190 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sqdmulh.vgx2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2)
191 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
194 ; SQDMULH (x4, Multi)
196 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_sat_double_mulh_multi_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
197 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s8:
199 ; CHECK-NEXT: mov z30.d, z7.d
200 ; CHECK-NEXT: mov z27.d, z4.d
201 ; CHECK-NEXT: ptrue p0.b
202 ; CHECK-NEXT: mov z29.d, z6.d
203 ; CHECK-NEXT: mov z26.d, z3.d
204 ; CHECK-NEXT: mov z28.d, z5.d
205 ; CHECK-NEXT: mov z25.d, z2.d
206 ; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0]
207 ; CHECK-NEXT: mov z24.d, z1.d
208 ; CHECK-NEXT: sqdmulh { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
209 ; CHECK-NEXT: mov z0.d, z24.d
210 ; CHECK-NEXT: mov z1.d, z25.d
211 ; CHECK-NEXT: mov z2.d, z26.d
212 ; CHECK-NEXT: mov z3.d, z27.d
214 <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
215 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
216 @llvm.aarch64.sve.sqdmulh.vgx4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
217 <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4)
218 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
221 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_sat_double_mulh_multi_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
222 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s16:
224 ; CHECK-NEXT: mov z30.d, z7.d
225 ; CHECK-NEXT: mov z27.d, z4.d
226 ; CHECK-NEXT: ptrue p0.h
227 ; CHECK-NEXT: mov z29.d, z6.d
228 ; CHECK-NEXT: mov z26.d, z3.d
229 ; CHECK-NEXT: mov z28.d, z5.d
230 ; CHECK-NEXT: mov z25.d, z2.d
231 ; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0]
232 ; CHECK-NEXT: mov z24.d, z1.d
233 ; CHECK-NEXT: sqdmulh { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
234 ; CHECK-NEXT: mov z0.d, z24.d
235 ; CHECK-NEXT: mov z1.d, z25.d
236 ; CHECK-NEXT: mov z2.d, z26.d
237 ; CHECK-NEXT: mov z3.d, z27.d
239 <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
240 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
241 @llvm.aarch64.sve.sqdmulh.vgx4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
242 <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4)
243 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
246 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_sat_double_mulh_multi_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
247 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s32:
249 ; CHECK-NEXT: mov z30.d, z7.d
250 ; CHECK-NEXT: mov z27.d, z4.d
251 ; CHECK-NEXT: ptrue p0.s
252 ; CHECK-NEXT: mov z29.d, z6.d
253 ; CHECK-NEXT: mov z26.d, z3.d
254 ; CHECK-NEXT: mov z28.d, z5.d
255 ; CHECK-NEXT: mov z25.d, z2.d
256 ; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0]
257 ; CHECK-NEXT: mov z24.d, z1.d
258 ; CHECK-NEXT: sqdmulh { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
259 ; CHECK-NEXT: mov z0.d, z24.d
260 ; CHECK-NEXT: mov z1.d, z25.d
261 ; CHECK-NEXT: mov z2.d, z26.d
262 ; CHECK-NEXT: mov z3.d, z27.d
264 <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
265 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
266 @llvm.aarch64.sve.sqdmulh.vgx4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
267 <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4)
268 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
271 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_sat_double_mulh_multi_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
272 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s64:
274 ; CHECK-NEXT: mov z30.d, z7.d
275 ; CHECK-NEXT: mov z27.d, z4.d
276 ; CHECK-NEXT: ptrue p0.d
277 ; CHECK-NEXT: mov z29.d, z6.d
278 ; CHECK-NEXT: mov z26.d, z3.d
279 ; CHECK-NEXT: mov z28.d, z5.d
280 ; CHECK-NEXT: mov z25.d, z2.d
281 ; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0]
282 ; CHECK-NEXT: mov z24.d, z1.d
283 ; CHECK-NEXT: sqdmulh { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
284 ; CHECK-NEXT: mov z0.d, z24.d
285 ; CHECK-NEXT: mov z1.d, z25.d
286 ; CHECK-NEXT: mov z2.d, z26.d
287 ; CHECK-NEXT: mov z3.d, z27.d
289 <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
290 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
291 @llvm.aarch64.sve.sqdmulh.vgx4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
292 <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4)
293 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
296 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
297 declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
298 declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
299 declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
301 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
302 @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
303 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
304 @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
305 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
306 @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
307 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
308 @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
310 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sqdmulh.vgx2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
311 declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sqdmulh.vgx2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
312 declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sqdmulh.vgx2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
313 declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sqdmulh.vgx2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
315 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
316 @llvm.aarch64.sve.sqdmulh.vgx4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>,
317 <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
318 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
319 @llvm.aarch64.sve.sqdmulh.vgx4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>,
320 <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
321 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
322 @llvm.aarch64.sve.sqdmulh.vgx4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>,
323 <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
324 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
325 @llvm.aarch64.sve.sqdmulh.vgx4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>,
326 <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)