1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+sve-b16b16 -force-streaming -verify-machineinstrs < %s | FileCheck %s
6 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_max_single_x2_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) {
7 ; CHECK-LABEL: multi_vec_max_single_x2_s8:
9 ; CHECK-NEXT: mov z5.d, z2.d
10 ; CHECK-NEXT: mov z4.d, z1.d
11 ; CHECK-NEXT: smax { z4.b, z5.b }, { z4.b, z5.b }, z3.b
12 ; CHECK-NEXT: mov z0.d, z4.d
13 ; CHECK-NEXT: mov z1.d, z5.d
15 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smax.single.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm)
16 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
19 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_max_single_x2_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) {
20 ; CHECK-LABEL: multi_vec_max_single_x2_s16:
22 ; CHECK-NEXT: mov z5.d, z2.d
23 ; CHECK-NEXT: mov z4.d, z1.d
24 ; CHECK-NEXT: smax { z4.h, z5.h }, { z4.h, z5.h }, z3.h
25 ; CHECK-NEXT: mov z0.d, z4.d
26 ; CHECK-NEXT: mov z1.d, z5.d
28 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smax.single.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm)
29 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
32 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_max_single_x2_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) {
33 ; CHECK-LABEL: multi_vec_max_single_x2_s32:
35 ; CHECK-NEXT: mov z5.d, z2.d
36 ; CHECK-NEXT: mov z4.d, z1.d
37 ; CHECK-NEXT: smax { z4.s, z5.s }, { z4.s, z5.s }, z3.s
38 ; CHECK-NEXT: mov z0.d, z4.d
39 ; CHECK-NEXT: mov z1.d, z5.d
41 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smax.single.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm)
42 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
45 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_single_x2_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) {
46 ; CHECK-LABEL: multi_vec_max_single_x2_s64:
48 ; CHECK-NEXT: mov z5.d, z2.d
49 ; CHECK-NEXT: mov z4.d, z1.d
50 ; CHECK-NEXT: smax { z4.d, z5.d }, { z4.d, z5.d }, z3.d
51 ; CHECK-NEXT: mov z0.d, z4.d
52 ; CHECK-NEXT: mov z1.d, z5.d
54 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smax.single.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm)
55 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
60 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_max_single_x2_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) {
61 ; CHECK-LABEL: multi_vec_max_single_x2_u8:
63 ; CHECK-NEXT: mov z5.d, z2.d
64 ; CHECK-NEXT: mov z4.d, z1.d
65 ; CHECK-NEXT: umax { z4.b, z5.b }, { z4.b, z5.b }, z3.b
66 ; CHECK-NEXT: mov z0.d, z4.d
67 ; CHECK-NEXT: mov z1.d, z5.d
69 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umax.single.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm)
70 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
73 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_max_single_x2_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) {
74 ; CHECK-LABEL: multi_vec_max_single_x2_u16:
76 ; CHECK-NEXT: mov z5.d, z2.d
77 ; CHECK-NEXT: mov z4.d, z1.d
78 ; CHECK-NEXT: umax { z4.h, z5.h }, { z4.h, z5.h }, z3.h
79 ; CHECK-NEXT: mov z0.d, z4.d
80 ; CHECK-NEXT: mov z1.d, z5.d
82 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umax.single.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm)
83 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
86 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_max_single_x2_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) {
87 ; CHECK-LABEL: multi_vec_max_single_x2_u32:
89 ; CHECK-NEXT: mov z5.d, z2.d
90 ; CHECK-NEXT: mov z4.d, z1.d
91 ; CHECK-NEXT: umax { z4.s, z5.s }, { z4.s, z5.s }, z3.s
92 ; CHECK-NEXT: mov z0.d, z4.d
93 ; CHECK-NEXT: mov z1.d, z5.d
95 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umax.single.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm)
96 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
99 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_single_x2_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) {
100 ; CHECK-LABEL: multi_vec_max_single_x2_u64:
102 ; CHECK-NEXT: mov z5.d, z2.d
103 ; CHECK-NEXT: mov z4.d, z1.d
104 ; CHECK-NEXT: umax { z4.d, z5.d }, { z4.d, z5.d }, z3.d
105 ; CHECK-NEXT: mov z0.d, z4.d
106 ; CHECK-NEXT: mov z1.d, z5.d
108 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umax.single.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm)
109 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
114 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_max_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) {
115 ; CHECK-LABEL: multi_vec_max_single_x2_bf16:
117 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
118 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
119 ; CHECK-NEXT: bfmax { z0.h, z1.h }, { z0.h, z1.h }, z2.h
121 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmax.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm)
122 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
127 define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_max_single_x2_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm) {
128 ; CHECK-LABEL: multi_vec_max_single_x2_f16:
130 ; CHECK-NEXT: mov z5.d, z2.d
131 ; CHECK-NEXT: mov z4.d, z1.d
132 ; CHECK-NEXT: fmax { z4.h, z5.h }, { z4.h, z5.h }, z3.h
133 ; CHECK-NEXT: mov z0.d, z4.d
134 ; CHECK-NEXT: mov z1.d, z5.d
136 %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmax.single.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm)
137 ret { <vscale x 8 x half>, <vscale x 8 x half> } %res
140 define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_max_single_x2_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm) {
141 ; CHECK-LABEL: multi_vec_max_single_x2_f32:
143 ; CHECK-NEXT: mov z5.d, z2.d
144 ; CHECK-NEXT: mov z4.d, z1.d
145 ; CHECK-NEXT: fmax { z4.s, z5.s }, { z4.s, z5.s }, z3.s
146 ; CHECK-NEXT: mov z0.d, z4.d
147 ; CHECK-NEXT: mov z1.d, z5.d
149 %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmax.single.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm)
150 ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
153 define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_max_single_x2_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm) {
154 ; CHECK-LABEL: multi_vec_max_single_x2_f64:
156 ; CHECK-NEXT: mov z5.d, z2.d
157 ; CHECK-NEXT: mov z4.d, z1.d
158 ; CHECK-NEXT: fmax { z4.d, z5.d }, { z4.d, z5.d }, z3.d
159 ; CHECK-NEXT: mov z0.d, z4.d
160 ; CHECK-NEXT: mov z1.d, z5.d
162 %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmax.single.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm)
163 ret { <vscale x 2 x double>, <vscale x 2 x double> } %res
168 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_max_single_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm) {
169 ; CHECK-LABEL: multi_vec_max_single_x4_s8:
171 ; CHECK-NEXT: mov z27.d, z4.d
172 ; CHECK-NEXT: mov z26.d, z3.d
173 ; CHECK-NEXT: mov z25.d, z2.d
174 ; CHECK-NEXT: mov z24.d, z1.d
175 ; CHECK-NEXT: smax { z24.b - z27.b }, { z24.b - z27.b }, z5.b
176 ; CHECK-NEXT: mov z0.d, z24.d
177 ; CHECK-NEXT: mov z1.d, z25.d
178 ; CHECK-NEXT: mov z2.d, z26.d
179 ; CHECK-NEXT: mov z3.d, z27.d
181 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
182 @llvm.aarch64.sve.smax.single.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm)
183 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
186 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_max_single_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) {
187 ; CHECK-LABEL: multi_vec_max_single_x4_s16:
189 ; CHECK-NEXT: mov z27.d, z4.d
190 ; CHECK-NEXT: mov z26.d, z3.d
191 ; CHECK-NEXT: mov z25.d, z2.d
192 ; CHECK-NEXT: mov z24.d, z1.d
193 ; CHECK-NEXT: smax { z24.h - z27.h }, { z24.h - z27.h }, z5.h
194 ; CHECK-NEXT: mov z0.d, z24.d
195 ; CHECK-NEXT: mov z1.d, z25.d
196 ; CHECK-NEXT: mov z2.d, z26.d
197 ; CHECK-NEXT: mov z3.d, z27.d
199 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
200 @llvm.aarch64.sve.smax.single.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm)
201 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
204 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_max_single_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) {
205 ; CHECK-LABEL: multi_vec_max_single_x4_s32:
207 ; CHECK-NEXT: mov z27.d, z4.d
208 ; CHECK-NEXT: mov z26.d, z3.d
209 ; CHECK-NEXT: mov z25.d, z2.d
210 ; CHECK-NEXT: mov z24.d, z1.d
211 ; CHECK-NEXT: smax { z24.s - z27.s }, { z24.s - z27.s }, z5.s
212 ; CHECK-NEXT: mov z0.d, z24.d
213 ; CHECK-NEXT: mov z1.d, z25.d
214 ; CHECK-NEXT: mov z2.d, z26.d
215 ; CHECK-NEXT: mov z3.d, z27.d
217 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
218 @llvm.aarch64.sve.smax.single.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm)
219 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
222 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_single_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) {
223 ; CHECK-LABEL: multi_vec_max_single_x4_s64:
225 ; CHECK-NEXT: mov z27.d, z4.d
226 ; CHECK-NEXT: mov z26.d, z3.d
227 ; CHECK-NEXT: mov z25.d, z2.d
228 ; CHECK-NEXT: mov z24.d, z1.d
229 ; CHECK-NEXT: smax { z24.d - z27.d }, { z24.d - z27.d }, z5.d
230 ; CHECK-NEXT: mov z0.d, z24.d
231 ; CHECK-NEXT: mov z1.d, z25.d
232 ; CHECK-NEXT: mov z2.d, z26.d
233 ; CHECK-NEXT: mov z3.d, z27.d
235 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
236 @llvm.aarch64.sve.smax.single.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm)
237 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
242 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_max_single_x4_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm) {
243 ; CHECK-LABEL: multi_vec_max_single_x4_u8:
245 ; CHECK-NEXT: mov z27.d, z4.d
246 ; CHECK-NEXT: mov z26.d, z3.d
247 ; CHECK-NEXT: mov z25.d, z2.d
248 ; CHECK-NEXT: mov z24.d, z1.d
249 ; CHECK-NEXT: umax { z24.b - z27.b }, { z24.b - z27.b }, z5.b
250 ; CHECK-NEXT: mov z0.d, z24.d
251 ; CHECK-NEXT: mov z1.d, z25.d
252 ; CHECK-NEXT: mov z2.d, z26.d
253 ; CHECK-NEXT: mov z3.d, z27.d
255 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
256 @llvm.aarch64.sve.umax.single.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm)
257 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
260 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_max_single_x4_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) {
261 ; CHECK-LABEL: multi_vec_max_single_x4_u16:
263 ; CHECK-NEXT: mov z27.d, z4.d
264 ; CHECK-NEXT: mov z26.d, z3.d
265 ; CHECK-NEXT: mov z25.d, z2.d
266 ; CHECK-NEXT: mov z24.d, z1.d
267 ; CHECK-NEXT: umax { z24.h - z27.h }, { z24.h - z27.h }, z5.h
268 ; CHECK-NEXT: mov z0.d, z24.d
269 ; CHECK-NEXT: mov z1.d, z25.d
270 ; CHECK-NEXT: mov z2.d, z26.d
271 ; CHECK-NEXT: mov z3.d, z27.d
273 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
274 @llvm.aarch64.sve.umax.single.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm)
275 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
278 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_max_single_x4_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) {
279 ; CHECK-LABEL: multi_vec_max_single_x4_u32:
281 ; CHECK-NEXT: mov z27.d, z4.d
282 ; CHECK-NEXT: mov z26.d, z3.d
283 ; CHECK-NEXT: mov z25.d, z2.d
284 ; CHECK-NEXT: mov z24.d, z1.d
285 ; CHECK-NEXT: umax { z24.s - z27.s }, { z24.s - z27.s }, z5.s
286 ; CHECK-NEXT: mov z0.d, z24.d
287 ; CHECK-NEXT: mov z1.d, z25.d
288 ; CHECK-NEXT: mov z2.d, z26.d
289 ; CHECK-NEXT: mov z3.d, z27.d
291 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
292 @llvm.aarch64.sve.umax.single.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm)
293 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
296 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_single_x4_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) {
297 ; CHECK-LABEL: multi_vec_max_single_x4_u64:
299 ; CHECK-NEXT: mov z27.d, z4.d
300 ; CHECK-NEXT: mov z26.d, z3.d
301 ; CHECK-NEXT: mov z25.d, z2.d
302 ; CHECK-NEXT: mov z24.d, z1.d
303 ; CHECK-NEXT: umax { z24.d - z27.d }, { z24.d - z27.d }, z5.d
304 ; CHECK-NEXT: mov z0.d, z24.d
305 ; CHECK-NEXT: mov z1.d, z25.d
306 ; CHECK-NEXT: mov z2.d, z26.d
307 ; CHECK-NEXT: mov z3.d, z27.d
309 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
310 @llvm.aarch64.sve.umax.single.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm)
311 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
316 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_max_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) {
317 ; CHECK-LABEL: multi_vec_max_single_x4_bf16:
319 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
320 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
321 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
322 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
323 ; CHECK-NEXT: bfmax { z0.h - z3.h }, { z0.h - z3.h }, z4.h
325 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmax.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm)
326 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
331 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_max_single_x4_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm) {
332 ; CHECK-LABEL: multi_vec_max_single_x4_f16:
334 ; CHECK-NEXT: mov z27.d, z4.d
335 ; CHECK-NEXT: mov z26.d, z3.d
336 ; CHECK-NEXT: mov z25.d, z2.d
337 ; CHECK-NEXT: mov z24.d, z1.d
338 ; CHECK-NEXT: fmax { z24.h - z27.h }, { z24.h - z27.h }, z5.h
339 ; CHECK-NEXT: mov z0.d, z24.d
340 ; CHECK-NEXT: mov z1.d, z25.d
341 ; CHECK-NEXT: mov z2.d, z26.d
342 ; CHECK-NEXT: mov z3.d, z27.d
344 %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
345 @llvm.aarch64.sve.fmax.single.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm)
346 ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res
349 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_max_single_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm) {
350 ; CHECK-LABEL: multi_vec_max_single_x4_f32:
352 ; CHECK-NEXT: mov z27.d, z4.d
353 ; CHECK-NEXT: mov z26.d, z3.d
354 ; CHECK-NEXT: mov z25.d, z2.d
355 ; CHECK-NEXT: mov z24.d, z1.d
356 ; CHECK-NEXT: fmax { z24.s - z27.s }, { z24.s - z27.s }, z5.s
357 ; CHECK-NEXT: mov z0.d, z24.d
358 ; CHECK-NEXT: mov z1.d, z25.d
359 ; CHECK-NEXT: mov z2.d, z26.d
360 ; CHECK-NEXT: mov z3.d, z27.d
362 %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
363 @llvm.aarch64.sve.fmax.single.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm)
364 ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
367 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_max_single_x4_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm) {
368 ; CHECK-LABEL: multi_vec_max_single_x4_f64:
370 ; CHECK-NEXT: mov z27.d, z4.d
371 ; CHECK-NEXT: mov z26.d, z3.d
372 ; CHECK-NEXT: mov z25.d, z2.d
373 ; CHECK-NEXT: mov z24.d, z1.d
374 ; CHECK-NEXT: fmax { z24.d - z27.d }, { z24.d - z27.d }, z5.d
375 ; CHECK-NEXT: mov z0.d, z24.d
376 ; CHECK-NEXT: mov z1.d, z25.d
377 ; CHECK-NEXT: mov z2.d, z26.d
378 ; CHECK-NEXT: mov z3.d, z27.d
380 %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
381 @llvm.aarch64.sve.fmax.single.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm)
382 ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
387 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_max_multi_x2_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) {
388 ; CHECK-LABEL: multi_vec_max_multi_x2_s8:
390 ; CHECK-NEXT: mov z7.d, z4.d
391 ; CHECK-NEXT: mov z5.d, z2.d
392 ; CHECK-NEXT: mov z6.d, z3.d
393 ; CHECK-NEXT: mov z4.d, z1.d
394 ; CHECK-NEXT: smax { z4.b, z5.b }, { z4.b, z5.b }, { z6.b, z7.b }
395 ; CHECK-NEXT: mov z0.d, z4.d
396 ; CHECK-NEXT: mov z1.d, z5.d
398 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smax.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2)
399 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
402 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_max_multi_x2_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) {
403 ; CHECK-LABEL: multi_vec_max_multi_x2_s16:
405 ; CHECK-NEXT: mov z7.d, z4.d
406 ; CHECK-NEXT: mov z5.d, z2.d
407 ; CHECK-NEXT: mov z6.d, z3.d
408 ; CHECK-NEXT: mov z4.d, z1.d
409 ; CHECK-NEXT: smax { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h }
410 ; CHECK-NEXT: mov z0.d, z4.d
411 ; CHECK-NEXT: mov z1.d, z5.d
413 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smax.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
414 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
417 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_max_multi_x2_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) {
418 ; CHECK-LABEL: multi_vec_max_multi_x2_s32:
420 ; CHECK-NEXT: mov z7.d, z4.d
421 ; CHECK-NEXT: mov z5.d, z2.d
422 ; CHECK-NEXT: mov z6.d, z3.d
423 ; CHECK-NEXT: mov z4.d, z1.d
424 ; CHECK-NEXT: smax { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s }
425 ; CHECK-NEXT: mov z0.d, z4.d
426 ; CHECK-NEXT: mov z1.d, z5.d
428 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smax.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2)
429 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
432 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_multi_x2_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) {
433 ; CHECK-LABEL: multi_vec_max_multi_x2_s64:
435 ; CHECK-NEXT: mov z7.d, z4.d
436 ; CHECK-NEXT: mov z5.d, z2.d
437 ; CHECK-NEXT: mov z6.d, z3.d
438 ; CHECK-NEXT: mov z4.d, z1.d
439 ; CHECK-NEXT: smax { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d }
440 ; CHECK-NEXT: mov z0.d, z4.d
441 ; CHECK-NEXT: mov z1.d, z5.d
443 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smax.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2)
444 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
449 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_max_multi_x2_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) {
450 ; CHECK-LABEL: multi_vec_max_multi_x2_u8:
452 ; CHECK-NEXT: mov z7.d, z4.d
453 ; CHECK-NEXT: mov z5.d, z2.d
454 ; CHECK-NEXT: mov z6.d, z3.d
455 ; CHECK-NEXT: mov z4.d, z1.d
456 ; CHECK-NEXT: umax { z4.b, z5.b }, { z4.b, z5.b }, { z6.b, z7.b }
457 ; CHECK-NEXT: mov z0.d, z4.d
458 ; CHECK-NEXT: mov z1.d, z5.d
460 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umax.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2)
461 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
464 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_max_multi_x2_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) {
465 ; CHECK-LABEL: multi_vec_max_multi_x2_u16:
467 ; CHECK-NEXT: mov z7.d, z4.d
468 ; CHECK-NEXT: mov z5.d, z2.d
469 ; CHECK-NEXT: mov z6.d, z3.d
470 ; CHECK-NEXT: mov z4.d, z1.d
471 ; CHECK-NEXT: umax { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h }
472 ; CHECK-NEXT: mov z0.d, z4.d
473 ; CHECK-NEXT: mov z1.d, z5.d
475 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umax.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
476 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
479 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_max_multi_x2_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) {
480 ; CHECK-LABEL: multi_vec_max_multi_x2_u32:
482 ; CHECK-NEXT: mov z7.d, z4.d
483 ; CHECK-NEXT: mov z5.d, z2.d
484 ; CHECK-NEXT: mov z6.d, z3.d
485 ; CHECK-NEXT: mov z4.d, z1.d
486 ; CHECK-NEXT: umax { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s }
487 ; CHECK-NEXT: mov z0.d, z4.d
488 ; CHECK-NEXT: mov z1.d, z5.d
490 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umax.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2)
491 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
494 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_multi_x2_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) {
495 ; CHECK-LABEL: multi_vec_max_multi_x2_u64:
497 ; CHECK-NEXT: mov z7.d, z4.d
498 ; CHECK-NEXT: mov z5.d, z2.d
499 ; CHECK-NEXT: mov z6.d, z3.d
500 ; CHECK-NEXT: mov z4.d, z1.d
501 ; CHECK-NEXT: umax { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d }
502 ; CHECK-NEXT: mov z0.d, z4.d
503 ; CHECK-NEXT: mov z1.d, z5.d
505 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umax.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2)
506 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
511 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_max_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) {
512 ; CHECK-LABEL: multi_vec_max_x2_bf16:
514 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
515 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
516 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
517 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
518 ; CHECK-NEXT: bfmax { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
520 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmax.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
521 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
526 define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_max_multi_x2_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) {
527 ; CHECK-LABEL: multi_vec_max_multi_x2_f16:
529 ; CHECK-NEXT: mov z7.d, z4.d
530 ; CHECK-NEXT: mov z5.d, z2.d
531 ; CHECK-NEXT: mov z6.d, z3.d
532 ; CHECK-NEXT: mov z4.d, z1.d
533 ; CHECK-NEXT: fmax { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h }
534 ; CHECK-NEXT: mov z0.d, z4.d
535 ; CHECK-NEXT: mov z1.d, z5.d
537 %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmax.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2)
538 ret { <vscale x 8 x half>, <vscale x 8 x half> } %res
541 define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_max_multi_x2_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2) {
542 ; CHECK-LABEL: multi_vec_max_multi_x2_f32:
544 ; CHECK-NEXT: mov z7.d, z4.d
545 ; CHECK-NEXT: mov z5.d, z2.d
546 ; CHECK-NEXT: mov z6.d, z3.d
547 ; CHECK-NEXT: mov z4.d, z1.d
548 ; CHECK-NEXT: fmax { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s }
549 ; CHECK-NEXT: mov z0.d, z4.d
550 ; CHECK-NEXT: mov z1.d, z5.d
552 %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmax.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2)
553 ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
556 define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_max_multi_x2_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2) {
557 ; CHECK-LABEL: multi_vec_max_multi_x2_f64:
559 ; CHECK-NEXT: mov z7.d, z4.d
560 ; CHECK-NEXT: mov z5.d, z2.d
561 ; CHECK-NEXT: mov z6.d, z3.d
562 ; CHECK-NEXT: mov z4.d, z1.d
563 ; CHECK-NEXT: fmax { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d }
564 ; CHECK-NEXT: mov z0.d, z4.d
565 ; CHECK-NEXT: mov z1.d, z5.d
567 %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmax.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2)
568 ret { <vscale x 2 x double>, <vscale x 2 x double> } %res
573 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_max_multi_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
574 ; CHECK-LABEL: multi_vec_max_multi_x4_s8:
576 ; CHECK-NEXT: mov z30.d, z7.d
577 ; CHECK-NEXT: mov z27.d, z4.d
578 ; CHECK-NEXT: ptrue p0.b
579 ; CHECK-NEXT: mov z29.d, z6.d
580 ; CHECK-NEXT: mov z26.d, z3.d
581 ; CHECK-NEXT: mov z28.d, z5.d
582 ; CHECK-NEXT: mov z25.d, z2.d
583 ; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0]
584 ; CHECK-NEXT: mov z24.d, z1.d
585 ; CHECK-NEXT: smax { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
586 ; CHECK-NEXT: mov z0.d, z24.d
587 ; CHECK-NEXT: mov z1.d, z25.d
588 ; CHECK-NEXT: mov z2.d, z26.d
589 ; CHECK-NEXT: mov z3.d, z27.d
591 <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
592 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
593 @llvm.aarch64.sve.smax.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
594 <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4)
595 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
598 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_max_multi_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
599 ; CHECK-LABEL: multi_vec_max_multi_x4_s16:
601 ; CHECK-NEXT: mov z30.d, z7.d
602 ; CHECK-NEXT: mov z27.d, z4.d
603 ; CHECK-NEXT: ptrue p0.h
604 ; CHECK-NEXT: mov z29.d, z6.d
605 ; CHECK-NEXT: mov z26.d, z3.d
606 ; CHECK-NEXT: mov z28.d, z5.d
607 ; CHECK-NEXT: mov z25.d, z2.d
608 ; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0]
609 ; CHECK-NEXT: mov z24.d, z1.d
610 ; CHECK-NEXT: smax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
611 ; CHECK-NEXT: mov z0.d, z24.d
612 ; CHECK-NEXT: mov z1.d, z25.d
613 ; CHECK-NEXT: mov z2.d, z26.d
614 ; CHECK-NEXT: mov z3.d, z27.d
616 <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
617 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
618 @llvm.aarch64.sve.smax.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
619 <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4)
620 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
623 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_max_multi_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
624 ; CHECK-LABEL: multi_vec_max_multi_x4_s32:
626 ; CHECK-NEXT: mov z30.d, z7.d
627 ; CHECK-NEXT: mov z27.d, z4.d
628 ; CHECK-NEXT: ptrue p0.s
629 ; CHECK-NEXT: mov z29.d, z6.d
630 ; CHECK-NEXT: mov z26.d, z3.d
631 ; CHECK-NEXT: mov z28.d, z5.d
632 ; CHECK-NEXT: mov z25.d, z2.d
633 ; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0]
634 ; CHECK-NEXT: mov z24.d, z1.d
635 ; CHECK-NEXT: smax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
636 ; CHECK-NEXT: mov z0.d, z24.d
637 ; CHECK-NEXT: mov z1.d, z25.d
638 ; CHECK-NEXT: mov z2.d, z26.d
639 ; CHECK-NEXT: mov z3.d, z27.d
641 <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
642 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
643 @llvm.aarch64.sve.smax.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
644 <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4)
645 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
648 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_multi_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
649 ; CHECK-LABEL: multi_vec_max_multi_x4_s64:
651 ; CHECK-NEXT: mov z30.d, z7.d
652 ; CHECK-NEXT: mov z27.d, z4.d
653 ; CHECK-NEXT: ptrue p0.d
654 ; CHECK-NEXT: mov z29.d, z6.d
655 ; CHECK-NEXT: mov z26.d, z3.d
656 ; CHECK-NEXT: mov z28.d, z5.d
657 ; CHECK-NEXT: mov z25.d, z2.d
658 ; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0]
659 ; CHECK-NEXT: mov z24.d, z1.d
660 ; CHECK-NEXT: smax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
661 ; CHECK-NEXT: mov z0.d, z24.d
662 ; CHECK-NEXT: mov z1.d, z25.d
663 ; CHECK-NEXT: mov z2.d, z26.d
664 ; CHECK-NEXT: mov z3.d, z27.d
666 <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
667 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
668 @llvm.aarch64.sve.smax.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
669 <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4)
670 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
675 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_max_multi_x4_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
676 ; CHECK-LABEL: multi_vec_max_multi_x4_u8:
678 ; CHECK-NEXT: mov z30.d, z7.d
679 ; CHECK-NEXT: mov z27.d, z4.d
680 ; CHECK-NEXT: ptrue p0.b
681 ; CHECK-NEXT: mov z29.d, z6.d
682 ; CHECK-NEXT: mov z26.d, z3.d
683 ; CHECK-NEXT: mov z28.d, z5.d
684 ; CHECK-NEXT: mov z25.d, z2.d
685 ; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0]
686 ; CHECK-NEXT: mov z24.d, z1.d
687 ; CHECK-NEXT: umax { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
688 ; CHECK-NEXT: mov z0.d, z24.d
689 ; CHECK-NEXT: mov z1.d, z25.d
690 ; CHECK-NEXT: mov z2.d, z26.d
691 ; CHECK-NEXT: mov z3.d, z27.d
693 <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
694 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
695 @llvm.aarch64.sve.umax.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
696 <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4)
697 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
700 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_max_multi_x4_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
701 ; CHECK-LABEL: multi_vec_max_multi_x4_u16:
703 ; CHECK-NEXT: mov z30.d, z7.d
704 ; CHECK-NEXT: mov z27.d, z4.d
705 ; CHECK-NEXT: ptrue p0.h
706 ; CHECK-NEXT: mov z29.d, z6.d
707 ; CHECK-NEXT: mov z26.d, z3.d
708 ; CHECK-NEXT: mov z28.d, z5.d
709 ; CHECK-NEXT: mov z25.d, z2.d
710 ; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0]
711 ; CHECK-NEXT: mov z24.d, z1.d
712 ; CHECK-NEXT: umax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
713 ; CHECK-NEXT: mov z0.d, z24.d
714 ; CHECK-NEXT: mov z1.d, z25.d
715 ; CHECK-NEXT: mov z2.d, z26.d
716 ; CHECK-NEXT: mov z3.d, z27.d
718 <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
719 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
720 @llvm.aarch64.sve.umax.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
721 <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4)
722 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
725 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_max_multi_x4_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
726 ; CHECK-LABEL: multi_vec_max_multi_x4_u32:
728 ; CHECK-NEXT: mov z30.d, z7.d
729 ; CHECK-NEXT: mov z27.d, z4.d
730 ; CHECK-NEXT: ptrue p0.s
731 ; CHECK-NEXT: mov z29.d, z6.d
732 ; CHECK-NEXT: mov z26.d, z3.d
733 ; CHECK-NEXT: mov z28.d, z5.d
734 ; CHECK-NEXT: mov z25.d, z2.d
735 ; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0]
736 ; CHECK-NEXT: mov z24.d, z1.d
737 ; CHECK-NEXT: umax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
738 ; CHECK-NEXT: mov z0.d, z24.d
739 ; CHECK-NEXT: mov z1.d, z25.d
740 ; CHECK-NEXT: mov z2.d, z26.d
741 ; CHECK-NEXT: mov z3.d, z27.d
743 <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
744 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
745 @llvm.aarch64.sve.umax.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
746 <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4)
747 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
750 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_multi_x4_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
751 ; CHECK-LABEL: multi_vec_max_multi_x4_u64:
753 ; CHECK-NEXT: mov z30.d, z7.d
754 ; CHECK-NEXT: mov z27.d, z4.d
755 ; CHECK-NEXT: ptrue p0.d
756 ; CHECK-NEXT: mov z29.d, z6.d
757 ; CHECK-NEXT: mov z26.d, z3.d
758 ; CHECK-NEXT: mov z28.d, z5.d
759 ; CHECK-NEXT: mov z25.d, z2.d
760 ; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0]
761 ; CHECK-NEXT: mov z24.d, z1.d
762 ; CHECK-NEXT: umax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
763 ; CHECK-NEXT: mov z0.d, z24.d
764 ; CHECK-NEXT: mov z1.d, z25.d
765 ; CHECK-NEXT: mov z2.d, z26.d
766 ; CHECK-NEXT: mov z3.d, z27.d
768 <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
769 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
770 @llvm.aarch64.sve.umax.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
771 <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4)
772 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
777 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_max_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) {
778 ; CHECK-LABEL: multi_vec_max_x4_bf16:
780 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
781 ; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
782 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
783 ; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
784 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
785 ; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
786 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
787 ; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
788 ; CHECK-NEXT: bfmax { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
790 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmax.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
791 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
796 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_max_multi_x4_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
797 ; CHECK-LABEL: multi_vec_max_multi_x4_f16:
799 ; CHECK-NEXT: mov z30.d, z7.d
800 ; CHECK-NEXT: mov z27.d, z4.d
801 ; CHECK-NEXT: ptrue p0.h
802 ; CHECK-NEXT: mov z29.d, z6.d
803 ; CHECK-NEXT: mov z26.d, z3.d
804 ; CHECK-NEXT: mov z28.d, z5.d
805 ; CHECK-NEXT: mov z25.d, z2.d
806 ; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0]
807 ; CHECK-NEXT: mov z24.d, z1.d
808 ; CHECK-NEXT: fmax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
809 ; CHECK-NEXT: mov z0.d, z24.d
810 ; CHECK-NEXT: mov z1.d, z25.d
811 ; CHECK-NEXT: mov z2.d, z26.d
812 ; CHECK-NEXT: mov z3.d, z27.d
814 <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
815 %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
816 @llvm.aarch64.sve.fmax.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
817 <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4)
818 ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res
821 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_max_multi_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
822 ; CHECK-LABEL: multi_vec_max_multi_x4_f32:
824 ; CHECK-NEXT: mov z30.d, z7.d
825 ; CHECK-NEXT: mov z27.d, z4.d
826 ; CHECK-NEXT: ptrue p0.s
827 ; CHECK-NEXT: mov z29.d, z6.d
828 ; CHECK-NEXT: mov z26.d, z3.d
829 ; CHECK-NEXT: mov z28.d, z5.d
830 ; CHECK-NEXT: mov z25.d, z2.d
831 ; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0]
832 ; CHECK-NEXT: mov z24.d, z1.d
833 ; CHECK-NEXT: fmax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
834 ; CHECK-NEXT: mov z0.d, z24.d
835 ; CHECK-NEXT: mov z1.d, z25.d
836 ; CHECK-NEXT: mov z2.d, z26.d
837 ; CHECK-NEXT: mov z3.d, z27.d
839 <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
840 %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
841 @llvm.aarch64.sve.fmax.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
842 <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4)
843 ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
846 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_max_multi_x4_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
847 ; CHECK-LABEL: multi_vec_max_multi_x4_f64:
849 ; CHECK-NEXT: mov z30.d, z7.d
850 ; CHECK-NEXT: mov z27.d, z4.d
851 ; CHECK-NEXT: ptrue p0.d
852 ; CHECK-NEXT: mov z29.d, z6.d
853 ; CHECK-NEXT: mov z26.d, z3.d
854 ; CHECK-NEXT: mov z28.d, z5.d
855 ; CHECK-NEXT: mov z25.d, z2.d
856 ; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0]
857 ; CHECK-NEXT: mov z24.d, z1.d
858 ; CHECK-NEXT: fmax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
859 ; CHECK-NEXT: mov z0.d, z24.d
860 ; CHECK-NEXT: mov z1.d, z25.d
861 ; CHECK-NEXT: mov z2.d, z26.d
862 ; CHECK-NEXT: mov z3.d, z27.d
864 <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
865 %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
866 @llvm.aarch64.sve.fmax.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
867 <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4)
868 ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
871 ; BFMAXNM (Single, x2)
873 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_maxnm_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) {
874 ; CHECK-LABEL: multi_vec_maxnm_single_x2_bf16:
876 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
877 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
878 ; CHECK-NEXT: bfmaxnm { z0.h, z1.h }, { z0.h, z1.h }, z2.h
880 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm)
881 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
884 ; FMAXNM (Single, x2)
886 define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_maxnm_single_x2_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm) {
887 ; CHECK-LABEL: multi_vec_maxnm_single_x2_f16:
889 ; CHECK-NEXT: mov z5.d, z2.d
890 ; CHECK-NEXT: mov z4.d, z1.d
891 ; CHECK-NEXT: fmaxnm { z4.h, z5.h }, { z4.h, z5.h }, z3.h
892 ; CHECK-NEXT: mov z0.d, z4.d
893 ; CHECK-NEXT: mov z1.d, z5.d
895 %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm)
896 ret { <vscale x 8 x half>, <vscale x 8 x half> } %res
899 define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_maxnm_single_x2_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm) {
900 ; CHECK-LABEL: multi_vec_maxnm_single_x2_f32:
902 ; CHECK-NEXT: mov z5.d, z2.d
903 ; CHECK-NEXT: mov z4.d, z1.d
904 ; CHECK-NEXT: fmaxnm { z4.s, z5.s }, { z4.s, z5.s }, z3.s
905 ; CHECK-NEXT: mov z0.d, z4.d
906 ; CHECK-NEXT: mov z1.d, z5.d
908 %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmaxnm.single.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm)
909 ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
912 define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_maxnm_single_x2_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm) {
913 ; CHECK-LABEL: multi_vec_maxnm_single_x2_f64:
915 ; CHECK-NEXT: mov z5.d, z2.d
916 ; CHECK-NEXT: mov z4.d, z1.d
917 ; CHECK-NEXT: fmaxnm { z4.d, z5.d }, { z4.d, z5.d }, z3.d
918 ; CHECK-NEXT: mov z0.d, z4.d
919 ; CHECK-NEXT: mov z1.d, z5.d
921 %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmaxnm.single.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm)
922 ret { <vscale x 2 x double>, <vscale x 2 x double> } %res
925 ; BFMAXNM (Single, x4)
927 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_maxnm_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) {
928 ; CHECK-LABEL: multi_vec_maxnm_single_x4_bf16:
930 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
931 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
932 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
933 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
934 ; CHECK-NEXT: bfmaxnm { z0.h - z3.h }, { z0.h - z3.h }, z4.h
936 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm)
937 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
940 ; FMAXNM (Single, x4)
942 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_maxnm_single_x4_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm) {
943 ; CHECK-LABEL: multi_vec_maxnm_single_x4_f16:
945 ; CHECK-NEXT: mov z27.d, z4.d
946 ; CHECK-NEXT: mov z26.d, z3.d
947 ; CHECK-NEXT: mov z25.d, z2.d
948 ; CHECK-NEXT: mov z24.d, z1.d
949 ; CHECK-NEXT: fmaxnm { z24.h - z27.h }, { z24.h - z27.h }, z5.h
950 ; CHECK-NEXT: mov z0.d, z24.d
951 ; CHECK-NEXT: mov z1.d, z25.d
952 ; CHECK-NEXT: mov z2.d, z26.d
953 ; CHECK-NEXT: mov z3.d, z27.d
955 %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
956 @llvm.aarch64.sve.fmaxnm.single.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm)
957 ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res
960 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_maxnm_single_x4_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm) {
961 ; CHECK-LABEL: multi_vec_maxnm_single_x4_f32:
963 ; CHECK-NEXT: mov z27.d, z4.d
964 ; CHECK-NEXT: mov z26.d, z3.d
965 ; CHECK-NEXT: mov z25.d, z2.d
966 ; CHECK-NEXT: mov z24.d, z1.d
967 ; CHECK-NEXT: fmaxnm { z24.s - z27.s }, { z24.s - z27.s }, z5.s
968 ; CHECK-NEXT: mov z0.d, z24.d
969 ; CHECK-NEXT: mov z1.d, z25.d
970 ; CHECK-NEXT: mov z2.d, z26.d
971 ; CHECK-NEXT: mov z3.d, z27.d
973 %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
974 @llvm.aarch64.sve.fmaxnm.single.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm)
975 ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
978 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_maxnm_single_x4_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm) {
979 ; CHECK-LABEL: multi_vec_maxnm_single_x4_f64:
981 ; CHECK-NEXT: mov z27.d, z4.d
982 ; CHECK-NEXT: mov z26.d, z3.d
983 ; CHECK-NEXT: mov z25.d, z2.d
984 ; CHECK-NEXT: mov z24.d, z1.d
985 ; CHECK-NEXT: fmaxnm { z24.d - z27.d }, { z24.d - z27.d }, z5.d
986 ; CHECK-NEXT: mov z0.d, z24.d
987 ; CHECK-NEXT: mov z1.d, z25.d
988 ; CHECK-NEXT: mov z2.d, z26.d
989 ; CHECK-NEXT: mov z3.d, z27.d
991 %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
992 @llvm.aarch64.sve.fmaxnm.single.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm)
993 ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
996 ; BFMAXNM (Multi, x2)
998 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_maxnm_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) {
999 ; CHECK-LABEL: multi_vec_maxnm_x2_bf16:
1001 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
1002 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
1003 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
1004 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
1005 ; CHECK-NEXT: bfmaxnm { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
1007 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmaxnm.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
1008 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
1011 ; FMAXNM (Multi, x2)
1013 define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_maxnm_x2_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) {
1014 ; CHECK-LABEL: multi_vec_maxnm_x2_f16:
1016 ; CHECK-NEXT: mov z7.d, z4.d
1017 ; CHECK-NEXT: mov z5.d, z2.d
1018 ; CHECK-NEXT: mov z6.d, z3.d
1019 ; CHECK-NEXT: mov z4.d, z1.d
1020 ; CHECK-NEXT: fmaxnm { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h }
1021 ; CHECK-NEXT: mov z0.d, z4.d
1022 ; CHECK-NEXT: mov z1.d, z5.d
1024 %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmaxnm.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2)
1025 ret { <vscale x 8 x half>, <vscale x 8 x half> } %res
1028 define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_maxnm_x2_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2) {
1029 ; CHECK-LABEL: multi_vec_maxnm_x2_f32:
1031 ; CHECK-NEXT: mov z7.d, z4.d
1032 ; CHECK-NEXT: mov z5.d, z2.d
1033 ; CHECK-NEXT: mov z6.d, z3.d
1034 ; CHECK-NEXT: mov z4.d, z1.d
1035 ; CHECK-NEXT: fmaxnm { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s }
1036 ; CHECK-NEXT: mov z0.d, z4.d
1037 ; CHECK-NEXT: mov z1.d, z5.d
1039 %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmaxnm.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2)
1040 ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
1043 define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_maxnm_x2_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2) {
1044 ; CHECK-LABEL: multi_vec_maxnm_x2_f64:
1046 ; CHECK-NEXT: mov z7.d, z4.d
1047 ; CHECK-NEXT: mov z5.d, z2.d
1048 ; CHECK-NEXT: mov z6.d, z3.d
1049 ; CHECK-NEXT: mov z4.d, z1.d
1050 ; CHECK-NEXT: fmaxnm { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d }
1051 ; CHECK-NEXT: mov z0.d, z4.d
1052 ; CHECK-NEXT: mov z1.d, z5.d
1054 %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmaxnm.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2)
1055 ret { <vscale x 2 x double>, <vscale x 2 x double> } %res
1058 ; BFMAXNM (Multi, x4)
1060 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_maxnm_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) {
1061 ; CHECK-LABEL: multi_vec_maxnm_x4_bf16:
1063 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
1064 ; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
1065 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
1066 ; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
1067 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
1068 ; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
1069 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
1070 ; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
1071 ; CHECK-NEXT: bfmaxnm { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
1073 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmaxnm.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
1074 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
1077 ; FMAXNM (Multi, x4)
1079 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_maxnm_x4_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
1080 ; CHECK-LABEL: multi_vec_maxnm_x4_f16:
1082 ; CHECK-NEXT: mov z30.d, z7.d
1083 ; CHECK-NEXT: mov z27.d, z4.d
1084 ; CHECK-NEXT: ptrue p0.h
1085 ; CHECK-NEXT: mov z29.d, z6.d
1086 ; CHECK-NEXT: mov z26.d, z3.d
1087 ; CHECK-NEXT: mov z28.d, z5.d
1088 ; CHECK-NEXT: mov z25.d, z2.d
1089 ; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0]
1090 ; CHECK-NEXT: mov z24.d, z1.d
1091 ; CHECK-NEXT: fmaxnm { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
1092 ; CHECK-NEXT: mov z0.d, z24.d
1093 ; CHECK-NEXT: mov z1.d, z25.d
1094 ; CHECK-NEXT: mov z2.d, z26.d
1095 ; CHECK-NEXT: mov z3.d, z27.d
1097 %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
1098 @llvm.aarch64.sve.fmaxnm.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
1099 <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4)
1100 ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res
1103 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_maxnm_x4_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
1104 ; CHECK-LABEL: multi_vec_maxnm_x4_f32:
1106 ; CHECK-NEXT: mov z30.d, z7.d
1107 ; CHECK-NEXT: mov z27.d, z4.d
1108 ; CHECK-NEXT: ptrue p0.s
1109 ; CHECK-NEXT: mov z29.d, z6.d
1110 ; CHECK-NEXT: mov z26.d, z3.d
1111 ; CHECK-NEXT: mov z28.d, z5.d
1112 ; CHECK-NEXT: mov z25.d, z2.d
1113 ; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0]
1114 ; CHECK-NEXT: mov z24.d, z1.d
1115 ; CHECK-NEXT: fmaxnm { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
1116 ; CHECK-NEXT: mov z0.d, z24.d
1117 ; CHECK-NEXT: mov z1.d, z25.d
1118 ; CHECK-NEXT: mov z2.d, z26.d
1119 ; CHECK-NEXT: mov z3.d, z27.d
1121 %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
1122 @llvm.aarch64.sve.fmaxnm.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
1123 <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4)
1124 ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
1127 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_maxnm_x4_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
1128 ; CHECK-LABEL: multi_vec_maxnm_x4_f64:
1130 ; CHECK-NEXT: mov z30.d, z7.d
1131 ; CHECK-NEXT: mov z27.d, z4.d
1132 ; CHECK-NEXT: ptrue p0.d
1133 ; CHECK-NEXT: mov z29.d, z6.d
1134 ; CHECK-NEXT: mov z26.d, z3.d
1135 ; CHECK-NEXT: mov z28.d, z5.d
1136 ; CHECK-NEXT: mov z25.d, z2.d
1137 ; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0]
1138 ; CHECK-NEXT: mov z24.d, z1.d
1139 ; CHECK-NEXT: fmaxnm { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
1140 ; CHECK-NEXT: mov z0.d, z24.d
1141 ; CHECK-NEXT: mov z1.d, z25.d
1142 ; CHECK-NEXT: mov z2.d, z26.d
1143 ; CHECK-NEXT: mov z3.d, z27.d
1145 %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
1146 @llvm.aarch64.sve.fmaxnm.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
1147 <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4)
1148 ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
1151 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smax.single.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1152 declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smax.single.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1153 declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smax.single.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1154 declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smax.single.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1156 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umax.single.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1157 declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umax.single.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1158 declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umax.single.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1159 declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umax.single.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1161 declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmax.single.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1162 declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmax.single.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1163 declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmax.single.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1165 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smax.single.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1166 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smax.single.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1167 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smax.single.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1168 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smax.single.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1170 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umax.single.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1171 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umax.single.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1172 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umax.single.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1173 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umax.single.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1175 declare { <vscale x 8 x half>, <vscale x 8 x half> , <vscale x 8 x half>, <vscale x 8 x half> }
1176 @llvm.aarch64.sve.fmax.single.x4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1177 declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
1178 @llvm.aarch64.sve.fmax.single.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1179 declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
1180 @llvm.aarch64.sve.fmax.single.x4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1182 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smax.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1183 declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smax.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> )
1184 declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smax.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1185 declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smax.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1187 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umax.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1188 declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umax.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> )
1189 declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umax.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1190 declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umax.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1192 declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmax.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1193 declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmax.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1194 declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmax.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1196 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
1197 @llvm.aarch64.sve.smax.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1198 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
1199 @llvm.aarch64.sve.smax.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1200 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
1201 @llvm.aarch64.sve.smax.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1202 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
1203 @llvm.aarch64.sve.smax.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1205 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
1206 @llvm.aarch64.sve.umax.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1207 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
1208 @llvm.aarch64.sve.umax.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1209 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
1210 @llvm.aarch64.sve.umax.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1211 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
1212 @llvm.aarch64.sve.umax.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1214 declare { <vscale x 8 x half>, <vscale x 8 x half> , <vscale x 8 x half>, <vscale x 8 x half> }
1215 @llvm.aarch64.sve.fmax.x4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1216 declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
1217 @llvm.aarch64.sve.fmax.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1218 declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
1219 @llvm.aarch64.sve.fmax.x4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1221 declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1222 declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmaxnm.single.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1223 declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmaxnm.single.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1225 declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
1226 @llvm.aarch64.sve.fmaxnm.single.x4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1227 declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
1228 @llvm.aarch64.sve.fmaxnm.single.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1229 declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
1230 @llvm.aarch64.sve.fmaxnm.single.x4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1232 declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmaxnm.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1233 declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmaxnm.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1234 declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmaxnm.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1236 declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
1237 @llvm.aarch64.sve.fmaxnm.x4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1238 declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
1239 @llvm.aarch64.sve.fmaxnm.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1240 declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
1241 @llvm.aarch64.sve.fmaxnm.x4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)