1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+sve-b16b16 -force-streaming -verify-machineinstrs < %s | FileCheck %s
6 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_single_x2_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) {
7 ; CHECK-LABEL: multi_vec_min_single_x2_s8:
9 ; CHECK-NEXT: mov z5.d, z2.d
10 ; CHECK-NEXT: mov z4.d, z1.d
11 ; CHECK-NEXT: smin { z4.b, z5.b }, { z4.b, z5.b }, z3.b
12 ; CHECK-NEXT: mov z0.d, z4.d
13 ; CHECK-NEXT: mov z1.d, z5.d
15 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smin.single.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm)
16 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
19 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_single_x2_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) {
20 ; CHECK-LABEL: multi_vec_min_single_x2_s16:
22 ; CHECK-NEXT: mov z5.d, z2.d
23 ; CHECK-NEXT: mov z4.d, z1.d
24 ; CHECK-NEXT: smin { z4.h, z5.h }, { z4.h, z5.h }, z3.h
25 ; CHECK-NEXT: mov z0.d, z4.d
26 ; CHECK-NEXT: mov z1.d, z5.d
28 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smin.single.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm)
29 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
32 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_single_x2_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) {
33 ; CHECK-LABEL: multi_vec_min_single_x2_s32:
35 ; CHECK-NEXT: mov z5.d, z2.d
36 ; CHECK-NEXT: mov z4.d, z1.d
37 ; CHECK-NEXT: smin { z4.s, z5.s }, { z4.s, z5.s }, z3.s
38 ; CHECK-NEXT: mov z0.d, z4.d
39 ; CHECK-NEXT: mov z1.d, z5.d
41 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smin.single.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm)
42 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
45 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_single_x2_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) {
46 ; CHECK-LABEL: multi_vec_min_single_x2_s64:
48 ; CHECK-NEXT: mov z5.d, z2.d
49 ; CHECK-NEXT: mov z4.d, z1.d
50 ; CHECK-NEXT: smin { z4.d, z5.d }, { z4.d, z5.d }, z3.d
51 ; CHECK-NEXT: mov z0.d, z4.d
52 ; CHECK-NEXT: mov z1.d, z5.d
54 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smin.single.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm)
55 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
60 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_single_x2_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) {
61 ; CHECK-LABEL: multi_vec_min_single_x2_u8:
63 ; CHECK-NEXT: mov z5.d, z2.d
64 ; CHECK-NEXT: mov z4.d, z1.d
65 ; CHECK-NEXT: umin { z4.b, z5.b }, { z4.b, z5.b }, z3.b
66 ; CHECK-NEXT: mov z0.d, z4.d
67 ; CHECK-NEXT: mov z1.d, z5.d
69 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umin.single.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm)
70 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
73 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_single_x2_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) {
74 ; CHECK-LABEL: multi_vec_min_single_x2_u16:
76 ; CHECK-NEXT: mov z5.d, z2.d
77 ; CHECK-NEXT: mov z4.d, z1.d
78 ; CHECK-NEXT: umin { z4.h, z5.h }, { z4.h, z5.h }, z3.h
79 ; CHECK-NEXT: mov z0.d, z4.d
80 ; CHECK-NEXT: mov z1.d, z5.d
82 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umin.single.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm)
83 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
86 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_single_x2_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) {
87 ; CHECK-LABEL: multi_vec_min_single_x2_u32:
89 ; CHECK-NEXT: mov z5.d, z2.d
90 ; CHECK-NEXT: mov z4.d, z1.d
91 ; CHECK-NEXT: umin { z4.s, z5.s }, { z4.s, z5.s }, z3.s
92 ; CHECK-NEXT: mov z0.d, z4.d
93 ; CHECK-NEXT: mov z1.d, z5.d
95 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umin.single.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm)
96 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
99 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_single_x2_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) {
100 ; CHECK-LABEL: multi_vec_min_single_x2_u64:
102 ; CHECK-NEXT: mov z5.d, z2.d
103 ; CHECK-NEXT: mov z4.d, z1.d
104 ; CHECK-NEXT: umin { z4.d, z5.d }, { z4.d, z5.d }, z3.d
105 ; CHECK-NEXT: mov z0.d, z4.d
106 ; CHECK-NEXT: mov z1.d, z5.d
108 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umin.single.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm)
109 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
114 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_min_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) {
115 ; CHECK-LABEL: multi_vec_min_single_x2_bf16:
117 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
118 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
119 ; CHECK-NEXT: bfmin { z0.h, z1.h }, { z0.h, z1.h }, z2.h
121 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmin.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm)
122 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
127 define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_min_single_x2_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm) {
128 ; CHECK-LABEL: multi_vec_min_single_x2_f16:
130 ; CHECK-NEXT: mov z5.d, z2.d
131 ; CHECK-NEXT: mov z4.d, z1.d
132 ; CHECK-NEXT: fmin { z4.h, z5.h }, { z4.h, z5.h }, z3.h
133 ; CHECK-NEXT: mov z0.d, z4.d
134 ; CHECK-NEXT: mov z1.d, z5.d
136 %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmin.single.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm)
137 ret { <vscale x 8 x half>, <vscale x 8 x half> } %res
140 define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_min_single_x2_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm) {
141 ; CHECK-LABEL: multi_vec_min_single_x2_f32:
143 ; CHECK-NEXT: mov z5.d, z2.d
144 ; CHECK-NEXT: mov z4.d, z1.d
145 ; CHECK-NEXT: fmin { z4.s, z5.s }, { z4.s, z5.s }, z3.s
146 ; CHECK-NEXT: mov z0.d, z4.d
147 ; CHECK-NEXT: mov z1.d, z5.d
149 %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmin.single.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm)
150 ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
153 define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_min_single_x2_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm) {
154 ; CHECK-LABEL: multi_vec_min_single_x2_f64:
156 ; CHECK-NEXT: mov z5.d, z2.d
157 ; CHECK-NEXT: mov z4.d, z1.d
158 ; CHECK-NEXT: fmin { z4.d, z5.d }, { z4.d, z5.d }, z3.d
159 ; CHECK-NEXT: mov z0.d, z4.d
160 ; CHECK-NEXT: mov z1.d, z5.d
162 %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmin.single.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm)
163 ret { <vscale x 2 x double>, <vscale x 2 x double> } %res
168 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_single_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm) {
169 ; CHECK-LABEL: multi_vec_min_single_x4_s8:
171 ; CHECK-NEXT: mov z27.d, z4.d
172 ; CHECK-NEXT: mov z26.d, z3.d
173 ; CHECK-NEXT: mov z25.d, z2.d
174 ; CHECK-NEXT: mov z24.d, z1.d
175 ; CHECK-NEXT: smin { z24.b - z27.b }, { z24.b - z27.b }, z5.b
176 ; CHECK-NEXT: mov z0.d, z24.d
177 ; CHECK-NEXT: mov z1.d, z25.d
178 ; CHECK-NEXT: mov z2.d, z26.d
179 ; CHECK-NEXT: mov z3.d, z27.d
181 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
182 @llvm.aarch64.sve.smin.single.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm)
183 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
186 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_single_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) {
187 ; CHECK-LABEL: multi_vec_min_single_x4_s16:
189 ; CHECK-NEXT: mov z27.d, z4.d
190 ; CHECK-NEXT: mov z26.d, z3.d
191 ; CHECK-NEXT: mov z25.d, z2.d
192 ; CHECK-NEXT: mov z24.d, z1.d
193 ; CHECK-NEXT: smin { z24.h - z27.h }, { z24.h - z27.h }, z5.h
194 ; CHECK-NEXT: mov z0.d, z24.d
195 ; CHECK-NEXT: mov z1.d, z25.d
196 ; CHECK-NEXT: mov z2.d, z26.d
197 ; CHECK-NEXT: mov z3.d, z27.d
199 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
200 @llvm.aarch64.sve.smin.single.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm)
201 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
204 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_single_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) {
205 ; CHECK-LABEL: multi_vec_min_single_x4_s32:
207 ; CHECK-NEXT: mov z27.d, z4.d
208 ; CHECK-NEXT: mov z26.d, z3.d
209 ; CHECK-NEXT: mov z25.d, z2.d
210 ; CHECK-NEXT: mov z24.d, z1.d
211 ; CHECK-NEXT: smin { z24.s - z27.s }, { z24.s - z27.s }, z5.s
212 ; CHECK-NEXT: mov z0.d, z24.d
213 ; CHECK-NEXT: mov z1.d, z25.d
214 ; CHECK-NEXT: mov z2.d, z26.d
215 ; CHECK-NEXT: mov z3.d, z27.d
217 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
218 @llvm.aarch64.sve.smin.single.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm)
219 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
222 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_single_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) {
223 ; CHECK-LABEL: multi_vec_min_single_x4_s64:
225 ; CHECK-NEXT: mov z27.d, z4.d
226 ; CHECK-NEXT: mov z26.d, z3.d
227 ; CHECK-NEXT: mov z25.d, z2.d
228 ; CHECK-NEXT: mov z24.d, z1.d
229 ; CHECK-NEXT: smin { z24.d - z27.d }, { z24.d - z27.d }, z5.d
230 ; CHECK-NEXT: mov z0.d, z24.d
231 ; CHECK-NEXT: mov z1.d, z25.d
232 ; CHECK-NEXT: mov z2.d, z26.d
233 ; CHECK-NEXT: mov z3.d, z27.d
235 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
236 @llvm.aarch64.sve.smin.single.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm)
237 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
242 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_single_x4_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm) {
243 ; CHECK-LABEL: multi_vec_min_single_x4_u8:
245 ; CHECK-NEXT: mov z27.d, z4.d
246 ; CHECK-NEXT: mov z26.d, z3.d
247 ; CHECK-NEXT: mov z25.d, z2.d
248 ; CHECK-NEXT: mov z24.d, z1.d
249 ; CHECK-NEXT: umin { z24.b - z27.b }, { z24.b - z27.b }, z5.b
250 ; CHECK-NEXT: mov z0.d, z24.d
251 ; CHECK-NEXT: mov z1.d, z25.d
252 ; CHECK-NEXT: mov z2.d, z26.d
253 ; CHECK-NEXT: mov z3.d, z27.d
255 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
256 @llvm.aarch64.sve.umin.single.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm)
257 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
260 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_single_x4_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) {
261 ; CHECK-LABEL: multi_vec_min_single_x4_u16:
263 ; CHECK-NEXT: mov z27.d, z4.d
264 ; CHECK-NEXT: mov z26.d, z3.d
265 ; CHECK-NEXT: mov z25.d, z2.d
266 ; CHECK-NEXT: mov z24.d, z1.d
267 ; CHECK-NEXT: umin { z24.h - z27.h }, { z24.h - z27.h }, z5.h
268 ; CHECK-NEXT: mov z0.d, z24.d
269 ; CHECK-NEXT: mov z1.d, z25.d
270 ; CHECK-NEXT: mov z2.d, z26.d
271 ; CHECK-NEXT: mov z3.d, z27.d
273 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
274 @llvm.aarch64.sve.umin.single.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm)
275 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
278 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_single_x4_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) {
279 ; CHECK-LABEL: multi_vec_min_single_x4_u32:
281 ; CHECK-NEXT: mov z27.d, z4.d
282 ; CHECK-NEXT: mov z26.d, z3.d
283 ; CHECK-NEXT: mov z25.d, z2.d
284 ; CHECK-NEXT: mov z24.d, z1.d
285 ; CHECK-NEXT: umin { z24.s - z27.s }, { z24.s - z27.s }, z5.s
286 ; CHECK-NEXT: mov z0.d, z24.d
287 ; CHECK-NEXT: mov z1.d, z25.d
288 ; CHECK-NEXT: mov z2.d, z26.d
289 ; CHECK-NEXT: mov z3.d, z27.d
291 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
292 @llvm.aarch64.sve.umin.single.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm)
293 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
296 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_single_x4_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) {
297 ; CHECK-LABEL: multi_vec_min_single_x4_u64:
299 ; CHECK-NEXT: mov z27.d, z4.d
300 ; CHECK-NEXT: mov z26.d, z3.d
301 ; CHECK-NEXT: mov z25.d, z2.d
302 ; CHECK-NEXT: mov z24.d, z1.d
303 ; CHECK-NEXT: umin { z24.d - z27.d }, { z24.d - z27.d }, z5.d
304 ; CHECK-NEXT: mov z0.d, z24.d
305 ; CHECK-NEXT: mov z1.d, z25.d
306 ; CHECK-NEXT: mov z2.d, z26.d
307 ; CHECK-NEXT: mov z3.d, z27.d
309 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
310 @llvm.aarch64.sve.umin.single.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm)
311 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
316 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_min_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) {
317 ; CHECK-LABEL: multi_vec_min_single_x4_bf16:
319 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
320 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
321 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
322 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
323 ; CHECK-NEXT: bfmin { z0.h - z3.h }, { z0.h - z3.h }, z4.h
325 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmin.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm)
326 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
331 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_min_single_x4_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm) {
332 ; CHECK-LABEL: multi_vec_min_single_x4_f16:
334 ; CHECK-NEXT: mov z27.d, z4.d
335 ; CHECK-NEXT: mov z26.d, z3.d
336 ; CHECK-NEXT: mov z25.d, z2.d
337 ; CHECK-NEXT: mov z24.d, z1.d
338 ; CHECK-NEXT: fmin { z24.h - z27.h }, { z24.h - z27.h }, z5.h
339 ; CHECK-NEXT: mov z0.d, z24.d
340 ; CHECK-NEXT: mov z1.d, z25.d
341 ; CHECK-NEXT: mov z2.d, z26.d
342 ; CHECK-NEXT: mov z3.d, z27.d
344 %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
345 @llvm.aarch64.sve.fmin.single.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm)
346 ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res
349 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_min_single_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm) {
350 ; CHECK-LABEL: multi_vec_min_single_x4_f32:
352 ; CHECK-NEXT: mov z27.d, z4.d
353 ; CHECK-NEXT: mov z26.d, z3.d
354 ; CHECK-NEXT: mov z25.d, z2.d
355 ; CHECK-NEXT: mov z24.d, z1.d
356 ; CHECK-NEXT: fmin { z24.s - z27.s }, { z24.s - z27.s }, z5.s
357 ; CHECK-NEXT: mov z0.d, z24.d
358 ; CHECK-NEXT: mov z1.d, z25.d
359 ; CHECK-NEXT: mov z2.d, z26.d
360 ; CHECK-NEXT: mov z3.d, z27.d
362 %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
363 @llvm.aarch64.sve.fmin.single.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm)
364 ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
367 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_min_single_x4_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm) {
368 ; CHECK-LABEL: multi_vec_min_single_x4_f64:
370 ; CHECK-NEXT: mov z27.d, z4.d
371 ; CHECK-NEXT: mov z26.d, z3.d
372 ; CHECK-NEXT: mov z25.d, z2.d
373 ; CHECK-NEXT: mov z24.d, z1.d
374 ; CHECK-NEXT: fmin { z24.d - z27.d }, { z24.d - z27.d }, z5.d
375 ; CHECK-NEXT: mov z0.d, z24.d
376 ; CHECK-NEXT: mov z1.d, z25.d
377 ; CHECK-NEXT: mov z2.d, z26.d
378 ; CHECK-NEXT: mov z3.d, z27.d
380 %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
381 @llvm.aarch64.sve.fmin.single.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm)
382 ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
387 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_multi_x2_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) {
388 ; CHECK-LABEL: multi_vec_min_multi_x2_s8:
390 ; CHECK-NEXT: mov z7.d, z4.d
391 ; CHECK-NEXT: mov z5.d, z2.d
392 ; CHECK-NEXT: mov z6.d, z3.d
393 ; CHECK-NEXT: mov z4.d, z1.d
394 ; CHECK-NEXT: smin { z4.b, z5.b }, { z4.b, z5.b }, { z6.b, z7.b }
395 ; CHECK-NEXT: mov z0.d, z4.d
396 ; CHECK-NEXT: mov z1.d, z5.d
398 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smin.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2)
399 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
402 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_multi_x2_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) {
403 ; CHECK-LABEL: multi_vec_min_multi_x2_s16:
405 ; CHECK-NEXT: mov z7.d, z4.d
406 ; CHECK-NEXT: mov z5.d, z2.d
407 ; CHECK-NEXT: mov z6.d, z3.d
408 ; CHECK-NEXT: mov z4.d, z1.d
409 ; CHECK-NEXT: smin { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h }
410 ; CHECK-NEXT: mov z0.d, z4.d
411 ; CHECK-NEXT: mov z1.d, z5.d
413 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smin.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
414 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
417 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_multi_x2_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) {
418 ; CHECK-LABEL: multi_vec_min_multi_x2_s32:
420 ; CHECK-NEXT: mov z7.d, z4.d
421 ; CHECK-NEXT: mov z5.d, z2.d
422 ; CHECK-NEXT: mov z6.d, z3.d
423 ; CHECK-NEXT: mov z4.d, z1.d
424 ; CHECK-NEXT: smin { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s }
425 ; CHECK-NEXT: mov z0.d, z4.d
426 ; CHECK-NEXT: mov z1.d, z5.d
428 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smin.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2)
429 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
432 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_multi_x2_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) {
433 ; CHECK-LABEL: multi_vec_min_multi_x2_s64:
435 ; CHECK-NEXT: mov z7.d, z4.d
436 ; CHECK-NEXT: mov z5.d, z2.d
437 ; CHECK-NEXT: mov z6.d, z3.d
438 ; CHECK-NEXT: mov z4.d, z1.d
439 ; CHECK-NEXT: smin { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d }
440 ; CHECK-NEXT: mov z0.d, z4.d
441 ; CHECK-NEXT: mov z1.d, z5.d
443 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smin.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2)
444 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
449 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_multi_x2_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) {
450 ; CHECK-LABEL: multi_vec_min_multi_x2_u8:
452 ; CHECK-NEXT: mov z7.d, z4.d
453 ; CHECK-NEXT: mov z5.d, z2.d
454 ; CHECK-NEXT: mov z6.d, z3.d
455 ; CHECK-NEXT: mov z4.d, z1.d
456 ; CHECK-NEXT: umin { z4.b, z5.b }, { z4.b, z5.b }, { z6.b, z7.b }
457 ; CHECK-NEXT: mov z0.d, z4.d
458 ; CHECK-NEXT: mov z1.d, z5.d
460 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umin.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2)
461 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
464 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_multi_x2_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) {
465 ; CHECK-LABEL: multi_vec_min_multi_x2_u16:
467 ; CHECK-NEXT: mov z7.d, z4.d
468 ; CHECK-NEXT: mov z5.d, z2.d
469 ; CHECK-NEXT: mov z6.d, z3.d
470 ; CHECK-NEXT: mov z4.d, z1.d
471 ; CHECK-NEXT: umin { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h }
472 ; CHECK-NEXT: mov z0.d, z4.d
473 ; CHECK-NEXT: mov z1.d, z5.d
475 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umin.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
476 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
479 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_multi_x2_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) {
480 ; CHECK-LABEL: multi_vec_min_multi_x2_u32:
482 ; CHECK-NEXT: mov z7.d, z4.d
483 ; CHECK-NEXT: mov z5.d, z2.d
484 ; CHECK-NEXT: mov z6.d, z3.d
485 ; CHECK-NEXT: mov z4.d, z1.d
486 ; CHECK-NEXT: umin { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s }
487 ; CHECK-NEXT: mov z0.d, z4.d
488 ; CHECK-NEXT: mov z1.d, z5.d
490 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umin.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2)
491 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
494 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_multi_x2_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) {
495 ; CHECK-LABEL: multi_vec_min_multi_x2_u64:
497 ; CHECK-NEXT: mov z7.d, z4.d
498 ; CHECK-NEXT: mov z5.d, z2.d
499 ; CHECK-NEXT: mov z6.d, z3.d
500 ; CHECK-NEXT: mov z4.d, z1.d
501 ; CHECK-NEXT: umin { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d }
502 ; CHECK-NEXT: mov z0.d, z4.d
503 ; CHECK-NEXT: mov z1.d, z5.d
505 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umin.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2)
506 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
511 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_min_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) {
512 ; CHECK-LABEL: multi_vec_min_x2_bf16:
514 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
515 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
516 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
517 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
518 ; CHECK-NEXT: bfmin { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
520 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmin.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
521 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
526 define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_min_multi_x2_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) {
527 ; CHECK-LABEL: multi_vec_min_multi_x2_f16:
529 ; CHECK-NEXT: mov z7.d, z4.d
530 ; CHECK-NEXT: mov z5.d, z2.d
531 ; CHECK-NEXT: mov z6.d, z3.d
532 ; CHECK-NEXT: mov z4.d, z1.d
533 ; CHECK-NEXT: fmin { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h }
534 ; CHECK-NEXT: mov z0.d, z4.d
535 ; CHECK-NEXT: mov z1.d, z5.d
537 %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmin.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2)
538 ret { <vscale x 8 x half>, <vscale x 8 x half> } %res
541 define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_min_multi_x2_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2) {
542 ; CHECK-LABEL: multi_vec_min_multi_x2_f32:
544 ; CHECK-NEXT: mov z7.d, z4.d
545 ; CHECK-NEXT: mov z5.d, z2.d
546 ; CHECK-NEXT: mov z6.d, z3.d
547 ; CHECK-NEXT: mov z4.d, z1.d
548 ; CHECK-NEXT: fmin { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s }
549 ; CHECK-NEXT: mov z0.d, z4.d
550 ; CHECK-NEXT: mov z1.d, z5.d
552 %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmin.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2)
553 ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
556 define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_min_multi_x2_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2) {
557 ; CHECK-LABEL: multi_vec_min_multi_x2_f64:
559 ; CHECK-NEXT: mov z7.d, z4.d
560 ; CHECK-NEXT: mov z5.d, z2.d
561 ; CHECK-NEXT: mov z6.d, z3.d
562 ; CHECK-NEXT: mov z4.d, z1.d
563 ; CHECK-NEXT: fmin { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d }
564 ; CHECK-NEXT: mov z0.d, z4.d
565 ; CHECK-NEXT: mov z1.d, z5.d
567 %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmin.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2)
568 ret { <vscale x 2 x double>, <vscale x 2 x double> } %res
573 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_multi_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
574 ; CHECK-LABEL: multi_vec_min_multi_x4_s8:
576 ; CHECK-NEXT: mov z30.d, z7.d
577 ; CHECK-NEXT: mov z27.d, z4.d
578 ; CHECK-NEXT: ptrue p0.b
579 ; CHECK-NEXT: mov z29.d, z6.d
580 ; CHECK-NEXT: mov z26.d, z3.d
581 ; CHECK-NEXT: mov z28.d, z5.d
582 ; CHECK-NEXT: mov z25.d, z2.d
583 ; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0]
584 ; CHECK-NEXT: mov z24.d, z1.d
585 ; CHECK-NEXT: smin { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
586 ; CHECK-NEXT: mov z0.d, z24.d
587 ; CHECK-NEXT: mov z1.d, z25.d
588 ; CHECK-NEXT: mov z2.d, z26.d
589 ; CHECK-NEXT: mov z3.d, z27.d
591 <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
592 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
593 @llvm.aarch64.sve.smin.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
594 <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4)
595 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
598 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_multi_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
599 ; CHECK-LABEL: multi_vec_min_multi_x4_s16:
601 ; CHECK-NEXT: mov z30.d, z7.d
602 ; CHECK-NEXT: mov z27.d, z4.d
603 ; CHECK-NEXT: ptrue p0.h
604 ; CHECK-NEXT: mov z29.d, z6.d
605 ; CHECK-NEXT: mov z26.d, z3.d
606 ; CHECK-NEXT: mov z28.d, z5.d
607 ; CHECK-NEXT: mov z25.d, z2.d
608 ; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0]
609 ; CHECK-NEXT: mov z24.d, z1.d
610 ; CHECK-NEXT: smin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
611 ; CHECK-NEXT: mov z0.d, z24.d
612 ; CHECK-NEXT: mov z1.d, z25.d
613 ; CHECK-NEXT: mov z2.d, z26.d
614 ; CHECK-NEXT: mov z3.d, z27.d
616 <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
617 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
618 @llvm.aarch64.sve.smin.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
619 <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4)
620 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
623 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_multi_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
624 ; CHECK-LABEL: multi_vec_min_multi_x4_s32:
626 ; CHECK-NEXT: mov z30.d, z7.d
627 ; CHECK-NEXT: mov z27.d, z4.d
628 ; CHECK-NEXT: ptrue p0.s
629 ; CHECK-NEXT: mov z29.d, z6.d
630 ; CHECK-NEXT: mov z26.d, z3.d
631 ; CHECK-NEXT: mov z28.d, z5.d
632 ; CHECK-NEXT: mov z25.d, z2.d
633 ; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0]
634 ; CHECK-NEXT: mov z24.d, z1.d
635 ; CHECK-NEXT: smin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
636 ; CHECK-NEXT: mov z0.d, z24.d
637 ; CHECK-NEXT: mov z1.d, z25.d
638 ; CHECK-NEXT: mov z2.d, z26.d
639 ; CHECK-NEXT: mov z3.d, z27.d
641 <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
642 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
643 @llvm.aarch64.sve.smin.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
644 <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4)
645 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
648 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_multi_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
649 ; CHECK-LABEL: multi_vec_min_multi_x4_s64:
651 ; CHECK-NEXT: mov z30.d, z7.d
652 ; CHECK-NEXT: mov z27.d, z4.d
653 ; CHECK-NEXT: ptrue p0.d
654 ; CHECK-NEXT: mov z29.d, z6.d
655 ; CHECK-NEXT: mov z26.d, z3.d
656 ; CHECK-NEXT: mov z28.d, z5.d
657 ; CHECK-NEXT: mov z25.d, z2.d
658 ; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0]
659 ; CHECK-NEXT: mov z24.d, z1.d
660 ; CHECK-NEXT: smin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
661 ; CHECK-NEXT: mov z0.d, z24.d
662 ; CHECK-NEXT: mov z1.d, z25.d
663 ; CHECK-NEXT: mov z2.d, z26.d
664 ; CHECK-NEXT: mov z3.d, z27.d
666 <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
667 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
668 @llvm.aarch64.sve.smin.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
669 <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4)
670 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
675 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_multi_x4_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
676 ; CHECK-LABEL: multi_vec_min_multi_x4_u8:
678 ; CHECK-NEXT: mov z30.d, z7.d
679 ; CHECK-NEXT: mov z27.d, z4.d
680 ; CHECK-NEXT: ptrue p0.b
681 ; CHECK-NEXT: mov z29.d, z6.d
682 ; CHECK-NEXT: mov z26.d, z3.d
683 ; CHECK-NEXT: mov z28.d, z5.d
684 ; CHECK-NEXT: mov z25.d, z2.d
685 ; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0]
686 ; CHECK-NEXT: mov z24.d, z1.d
687 ; CHECK-NEXT: umin { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
688 ; CHECK-NEXT: mov z0.d, z24.d
689 ; CHECK-NEXT: mov z1.d, z25.d
690 ; CHECK-NEXT: mov z2.d, z26.d
691 ; CHECK-NEXT: mov z3.d, z27.d
693 <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
694 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
695 @llvm.aarch64.sve.umin.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
696 <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4)
697 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
700 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_multi_x4_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
701 ; CHECK-LABEL: multi_vec_min_multi_x4_u16:
703 ; CHECK-NEXT: mov z30.d, z7.d
704 ; CHECK-NEXT: mov z27.d, z4.d
705 ; CHECK-NEXT: ptrue p0.h
706 ; CHECK-NEXT: mov z29.d, z6.d
707 ; CHECK-NEXT: mov z26.d, z3.d
708 ; CHECK-NEXT: mov z28.d, z5.d
709 ; CHECK-NEXT: mov z25.d, z2.d
710 ; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0]
711 ; CHECK-NEXT: mov z24.d, z1.d
712 ; CHECK-NEXT: umin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
713 ; CHECK-NEXT: mov z0.d, z24.d
714 ; CHECK-NEXT: mov z1.d, z25.d
715 ; CHECK-NEXT: mov z2.d, z26.d
716 ; CHECK-NEXT: mov z3.d, z27.d
718 <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
719 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
720 @llvm.aarch64.sve.umin.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
721 <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4)
722 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
725 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_multi_x4_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
726 ; CHECK-LABEL: multi_vec_min_multi_x4_u32:
728 ; CHECK-NEXT: mov z30.d, z7.d
729 ; CHECK-NEXT: mov z27.d, z4.d
730 ; CHECK-NEXT: ptrue p0.s
731 ; CHECK-NEXT: mov z29.d, z6.d
732 ; CHECK-NEXT: mov z26.d, z3.d
733 ; CHECK-NEXT: mov z28.d, z5.d
734 ; CHECK-NEXT: mov z25.d, z2.d
735 ; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0]
736 ; CHECK-NEXT: mov z24.d, z1.d
737 ; CHECK-NEXT: umin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
738 ; CHECK-NEXT: mov z0.d, z24.d
739 ; CHECK-NEXT: mov z1.d, z25.d
740 ; CHECK-NEXT: mov z2.d, z26.d
741 ; CHECK-NEXT: mov z3.d, z27.d
743 <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
744 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
745 @llvm.aarch64.sve.umin.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
746 <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4)
747 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
750 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_multi_x4_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
751 ; CHECK-LABEL: multi_vec_min_multi_x4_u64:
753 ; CHECK-NEXT: mov z30.d, z7.d
754 ; CHECK-NEXT: mov z27.d, z4.d
755 ; CHECK-NEXT: ptrue p0.d
756 ; CHECK-NEXT: mov z29.d, z6.d
757 ; CHECK-NEXT: mov z26.d, z3.d
758 ; CHECK-NEXT: mov z28.d, z5.d
759 ; CHECK-NEXT: mov z25.d, z2.d
760 ; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0]
761 ; CHECK-NEXT: mov z24.d, z1.d
762 ; CHECK-NEXT: umin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
763 ; CHECK-NEXT: mov z0.d, z24.d
764 ; CHECK-NEXT: mov z1.d, z25.d
765 ; CHECK-NEXT: mov z2.d, z26.d
766 ; CHECK-NEXT: mov z3.d, z27.d
768 <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
769 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
770 @llvm.aarch64.sve.umin.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
771 <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4)
772 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
778 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_min_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) {
779 ; CHECK-LABEL: multi_vec_min_x4_bf16:
781 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
782 ; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
783 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
784 ; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
785 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
786 ; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
787 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
788 ; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
789 ; CHECK-NEXT: bfmin { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
791 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmin.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
792 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
797 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_min_multi_x4_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
798 ; CHECK-LABEL: multi_vec_min_multi_x4_f16:
800 ; CHECK-NEXT: mov z30.d, z7.d
801 ; CHECK-NEXT: mov z27.d, z4.d
802 ; CHECK-NEXT: ptrue p0.h
803 ; CHECK-NEXT: mov z29.d, z6.d
804 ; CHECK-NEXT: mov z26.d, z3.d
805 ; CHECK-NEXT: mov z28.d, z5.d
806 ; CHECK-NEXT: mov z25.d, z2.d
807 ; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0]
808 ; CHECK-NEXT: mov z24.d, z1.d
809 ; CHECK-NEXT: fmin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
810 ; CHECK-NEXT: mov z0.d, z24.d
811 ; CHECK-NEXT: mov z1.d, z25.d
812 ; CHECK-NEXT: mov z2.d, z26.d
813 ; CHECK-NEXT: mov z3.d, z27.d
815 <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
816 %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
817 @llvm.aarch64.sve.fmin.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
818 <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4)
819 ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res
822 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_min_multi_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
823 ; CHECK-LABEL: multi_vec_min_multi_x4_f32:
825 ; CHECK-NEXT: mov z30.d, z7.d
826 ; CHECK-NEXT: mov z27.d, z4.d
827 ; CHECK-NEXT: ptrue p0.s
828 ; CHECK-NEXT: mov z29.d, z6.d
829 ; CHECK-NEXT: mov z26.d, z3.d
830 ; CHECK-NEXT: mov z28.d, z5.d
831 ; CHECK-NEXT: mov z25.d, z2.d
832 ; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0]
833 ; CHECK-NEXT: mov z24.d, z1.d
834 ; CHECK-NEXT: fmin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
835 ; CHECK-NEXT: mov z0.d, z24.d
836 ; CHECK-NEXT: mov z1.d, z25.d
837 ; CHECK-NEXT: mov z2.d, z26.d
838 ; CHECK-NEXT: mov z3.d, z27.d
840 <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
841 %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
842 @llvm.aarch64.sve.fmin.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
843 <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4)
844 ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
847 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_min_multi_x4_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
848 ; CHECK-LABEL: multi_vec_min_multi_x4_f64:
850 ; CHECK-NEXT: mov z30.d, z7.d
851 ; CHECK-NEXT: mov z27.d, z4.d
852 ; CHECK-NEXT: ptrue p0.d
853 ; CHECK-NEXT: mov z29.d, z6.d
854 ; CHECK-NEXT: mov z26.d, z3.d
855 ; CHECK-NEXT: mov z28.d, z5.d
856 ; CHECK-NEXT: mov z25.d, z2.d
857 ; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0]
858 ; CHECK-NEXT: mov z24.d, z1.d
859 ; CHECK-NEXT: fmin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
860 ; CHECK-NEXT: mov z0.d, z24.d
861 ; CHECK-NEXT: mov z1.d, z25.d
862 ; CHECK-NEXT: mov z2.d, z26.d
863 ; CHECK-NEXT: mov z3.d, z27.d
865 <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
866 %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
867 @llvm.aarch64.sve.fmin.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
868 <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4)
869 ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
872 ; BFMINNM (Single, x2)
874 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_minnm_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) {
875 ; CHECK-LABEL: multi_vec_minnm_single_x2_bf16:
877 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
878 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
879 ; CHECK-NEXT: bfminnm { z0.h, z1.h }, { z0.h, z1.h }, z2.h
881 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fminnm.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm)
882 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
885 ; FMINNM (Single, x2)
887 define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_minnm_single_x2_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm) {
888 ; CHECK-LABEL: multi_vec_minnm_single_x2_f16:
890 ; CHECK-NEXT: mov z5.d, z2.d
891 ; CHECK-NEXT: mov z4.d, z1.d
892 ; CHECK-NEXT: fminnm { z4.h, z5.h }, { z4.h, z5.h }, z3.h
893 ; CHECK-NEXT: mov z0.d, z4.d
894 ; CHECK-NEXT: mov z1.d, z5.d
896 %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fminnm.single.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm)
897 ret { <vscale x 8 x half>, <vscale x 8 x half> } %res
900 define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_minnm_single_x2_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm) {
901 ; CHECK-LABEL: multi_vec_minnm_single_x2_f32:
903 ; CHECK-NEXT: mov z5.d, z2.d
904 ; CHECK-NEXT: mov z4.d, z1.d
905 ; CHECK-NEXT: fminnm { z4.s, z5.s }, { z4.s, z5.s }, z3.s
906 ; CHECK-NEXT: mov z0.d, z4.d
907 ; CHECK-NEXT: mov z1.d, z5.d
909 %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fminnm.single.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm)
910 ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
913 define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_minnm_single_x2_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm) {
914 ; CHECK-LABEL: multi_vec_minnm_single_x2_f64:
916 ; CHECK-NEXT: mov z5.d, z2.d
917 ; CHECK-NEXT: mov z4.d, z1.d
918 ; CHECK-NEXT: fminnm { z4.d, z5.d }, { z4.d, z5.d }, z3.d
919 ; CHECK-NEXT: mov z0.d, z4.d
920 ; CHECK-NEXT: mov z1.d, z5.d
922 %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fminnm.single.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm)
923 ret { <vscale x 2 x double>, <vscale x 2 x double> } %res
926 ; BFMINNM (Single, x4)
928 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_minnm_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) {
929 ; CHECK-LABEL: multi_vec_minnm_single_x4_bf16:
931 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
932 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
933 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
934 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
935 ; CHECK-NEXT: bfminnm { z0.h - z3.h }, { z0.h - z3.h }, z4.h
937 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fminnm.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm)
938 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
941 ; FMINNM (Single, x4)
943 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_minnm_single_x4_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm) {
944 ; CHECK-LABEL: multi_vec_minnm_single_x4_f16:
946 ; CHECK-NEXT: mov z27.d, z4.d
947 ; CHECK-NEXT: mov z26.d, z3.d
948 ; CHECK-NEXT: mov z25.d, z2.d
949 ; CHECK-NEXT: mov z24.d, z1.d
950 ; CHECK-NEXT: fminnm { z24.h - z27.h }, { z24.h - z27.h }, z5.h
951 ; CHECK-NEXT: mov z0.d, z24.d
952 ; CHECK-NEXT: mov z1.d, z25.d
953 ; CHECK-NEXT: mov z2.d, z26.d
954 ; CHECK-NEXT: mov z3.d, z27.d
956 %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
957 @llvm.aarch64.sve.fminnm.single.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm)
958 ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res
961 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_minnm_single_x4_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm) {
962 ; CHECK-LABEL: multi_vec_minnm_single_x4_f32:
964 ; CHECK-NEXT: mov z27.d, z4.d
965 ; CHECK-NEXT: mov z26.d, z3.d
966 ; CHECK-NEXT: mov z25.d, z2.d
967 ; CHECK-NEXT: mov z24.d, z1.d
968 ; CHECK-NEXT: fminnm { z24.s - z27.s }, { z24.s - z27.s }, z5.s
969 ; CHECK-NEXT: mov z0.d, z24.d
970 ; CHECK-NEXT: mov z1.d, z25.d
971 ; CHECK-NEXT: mov z2.d, z26.d
972 ; CHECK-NEXT: mov z3.d, z27.d
974 %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
975 @llvm.aarch64.sve.fminnm.single.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm)
976 ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
979 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_minnm_single_x4_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm) {
980 ; CHECK-LABEL: multi_vec_minnm_single_x4_f64:
982 ; CHECK-NEXT: mov z27.d, z4.d
983 ; CHECK-NEXT: mov z26.d, z3.d
984 ; CHECK-NEXT: mov z25.d, z2.d
985 ; CHECK-NEXT: mov z24.d, z1.d
986 ; CHECK-NEXT: fminnm { z24.d - z27.d }, { z24.d - z27.d }, z5.d
987 ; CHECK-NEXT: mov z0.d, z24.d
988 ; CHECK-NEXT: mov z1.d, z25.d
989 ; CHECK-NEXT: mov z2.d, z26.d
990 ; CHECK-NEXT: mov z3.d, z27.d
992 %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
993 @llvm.aarch64.sve.fminnm.single.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm)
994 ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
997 ; BFMINNM (Multi, x2)
999 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_minnm_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) {
1000 ; CHECK-LABEL: multi_vec_minnm_x2_bf16:
1002 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
1003 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
1004 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
1005 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
1006 ; CHECK-NEXT: bfminnm { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
1008 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fminnm.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
1009 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
1012 ; FMINNM (Multi, x2)
1014 define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_minnm_x2_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) {
1015 ; CHECK-LABEL: multi_vec_minnm_x2_f16:
1017 ; CHECK-NEXT: mov z7.d, z4.d
1018 ; CHECK-NEXT: mov z5.d, z2.d
1019 ; CHECK-NEXT: mov z6.d, z3.d
1020 ; CHECK-NEXT: mov z4.d, z1.d
1021 ; CHECK-NEXT: fminnm { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h }
1022 ; CHECK-NEXT: mov z0.d, z4.d
1023 ; CHECK-NEXT: mov z1.d, z5.d
1025 %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fminnm.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2)
1026 ret { <vscale x 8 x half>, <vscale x 8 x half> } %res
1029 define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_minnm_x2_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2) {
1030 ; CHECK-LABEL: multi_vec_minnm_x2_f32:
1032 ; CHECK-NEXT: mov z7.d, z4.d
1033 ; CHECK-NEXT: mov z5.d, z2.d
1034 ; CHECK-NEXT: mov z6.d, z3.d
1035 ; CHECK-NEXT: mov z4.d, z1.d
1036 ; CHECK-NEXT: fminnm { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s }
1037 ; CHECK-NEXT: mov z0.d, z4.d
1038 ; CHECK-NEXT: mov z1.d, z5.d
1040 %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fminnm.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2)
1041 ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
1044 define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_minnm_x2_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2) {
1045 ; CHECK-LABEL: multi_vec_minnm_x2_f64:
1047 ; CHECK-NEXT: mov z7.d, z4.d
1048 ; CHECK-NEXT: mov z5.d, z2.d
1049 ; CHECK-NEXT: mov z6.d, z3.d
1050 ; CHECK-NEXT: mov z4.d, z1.d
1051 ; CHECK-NEXT: fminnm { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d }
1052 ; CHECK-NEXT: mov z0.d, z4.d
1053 ; CHECK-NEXT: mov z1.d, z5.d
1055 %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fminnm.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2)
1056 ret { <vscale x 2 x double>, <vscale x 2 x double> } %res
1059 ; BFMINNM (Multi, x4)
1061 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_minnm_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) {
1062 ; CHECK-LABEL: multi_vec_minnm_x4_bf16:
1064 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
1065 ; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
1066 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
1067 ; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
1068 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
1069 ; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
1070 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
1071 ; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
1072 ; CHECK-NEXT: bfminnm { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
1074 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fminnm.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
1075 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
1078 ; FMINNM (Multi, x4)
1080 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_minnm_x4_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
1081 ; CHECK-LABEL: multi_vec_minnm_x4_f16:
1083 ; CHECK-NEXT: mov z30.d, z7.d
1084 ; CHECK-NEXT: mov z27.d, z4.d
1085 ; CHECK-NEXT: ptrue p0.h
1086 ; CHECK-NEXT: mov z29.d, z6.d
1087 ; CHECK-NEXT: mov z26.d, z3.d
1088 ; CHECK-NEXT: mov z28.d, z5.d
1089 ; CHECK-NEXT: mov z25.d, z2.d
1090 ; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0]
1091 ; CHECK-NEXT: mov z24.d, z1.d
1092 ; CHECK-NEXT: fminnm { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
1093 ; CHECK-NEXT: mov z0.d, z24.d
1094 ; CHECK-NEXT: mov z1.d, z25.d
1095 ; CHECK-NEXT: mov z2.d, z26.d
1096 ; CHECK-NEXT: mov z3.d, z27.d
1098 %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
1099 @llvm.aarch64.sve.fminnm.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
1100 <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4)
1101 ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res
1104 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_minnm_x4_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
1105 ; CHECK-LABEL: multi_vec_minnm_x4_f32:
1107 ; CHECK-NEXT: mov z30.d, z7.d
1108 ; CHECK-NEXT: mov z27.d, z4.d
1109 ; CHECK-NEXT: ptrue p0.s
1110 ; CHECK-NEXT: mov z29.d, z6.d
1111 ; CHECK-NEXT: mov z26.d, z3.d
1112 ; CHECK-NEXT: mov z28.d, z5.d
1113 ; CHECK-NEXT: mov z25.d, z2.d
1114 ; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0]
1115 ; CHECK-NEXT: mov z24.d, z1.d
1116 ; CHECK-NEXT: fminnm { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
1117 ; CHECK-NEXT: mov z0.d, z24.d
1118 ; CHECK-NEXT: mov z1.d, z25.d
1119 ; CHECK-NEXT: mov z2.d, z26.d
1120 ; CHECK-NEXT: mov z3.d, z27.d
1122 %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
1123 @llvm.aarch64.sve.fminnm.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
1124 <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4)
1125 ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
1128 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_minnm_x4_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
1129 ; CHECK-LABEL: multi_vec_minnm_x4_f64:
1131 ; CHECK-NEXT: mov z30.d, z7.d
1132 ; CHECK-NEXT: mov z27.d, z4.d
1133 ; CHECK-NEXT: ptrue p0.d
1134 ; CHECK-NEXT: mov z29.d, z6.d
1135 ; CHECK-NEXT: mov z26.d, z3.d
1136 ; CHECK-NEXT: mov z28.d, z5.d
1137 ; CHECK-NEXT: mov z25.d, z2.d
1138 ; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0]
1139 ; CHECK-NEXT: mov z24.d, z1.d
1140 ; CHECK-NEXT: fminnm { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
1141 ; CHECK-NEXT: mov z0.d, z24.d
1142 ; CHECK-NEXT: mov z1.d, z25.d
1143 ; CHECK-NEXT: mov z2.d, z26.d
1144 ; CHECK-NEXT: mov z3.d, z27.d
1146 %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
1147 @llvm.aarch64.sve.fminnm.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
1148 <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4)
1149 ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
1152 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smin.single.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1153 declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smin.single.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1154 declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smin.single.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1155 declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smin.single.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1157 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umin.single.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1158 declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umin.single.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1159 declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umin.single.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1160 declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umin.single.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1162 declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmin.single.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1163 declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmin.single.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1164 declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmin.single.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1166 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smin.single.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1167 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smin.single.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1168 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smin.single.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1169 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smin.single.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1171 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umin.single.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1172 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umin.single.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1173 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umin.single.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1174 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umin.single.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1176 declare { <vscale x 8 x half>, <vscale x 8 x half> , <vscale x 8 x half>, <vscale x 8 x half> }
1177 @llvm.aarch64.sve.fmin.single.x4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1178 declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
1179 @llvm.aarch64.sve.fmin.single.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1180 declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
1181 @llvm.aarch64.sve.fmin.single.x4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1183 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smin.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1184 declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smin.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> )
1185 declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smin.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1186 declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smin.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1188 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umin.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1189 declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umin.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> )
1190 declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umin.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1191 declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umin.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1193 declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmin.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1194 declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmin.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1195 declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmin.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1197 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
1198 @llvm.aarch64.sve.smin.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1199 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
1200 @llvm.aarch64.sve.smin.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1201 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
1202 @llvm.aarch64.sve.smin.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1203 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
1204 @llvm.aarch64.sve.smin.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1206 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
1207 @llvm.aarch64.sve.umin.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1208 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
1209 @llvm.aarch64.sve.umin.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1210 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
1211 @llvm.aarch64.sve.umin.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1212 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
1213 @llvm.aarch64.sve.umin.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1215 declare { <vscale x 8 x half>, <vscale x 8 x half> , <vscale x 8 x half>, <vscale x 8 x half> }
1216 @llvm.aarch64.sve.fmin.x4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1217 declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
1218 @llvm.aarch64.sve.fmin.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1219 declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
1220 @llvm.aarch64.sve.fmin.x4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1222 declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fminnm.single.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1223 declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fminnm.single.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1224 declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fminnm.single.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1226 declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
1227 @llvm.aarch64.sve.fminnm.single.x4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1228 declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
1229 @llvm.aarch64.sve.fminnm.single.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1230 declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
1231 @llvm.aarch64.sve.fminnm.single.x4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1233 declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fminnm.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1234 declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fminnm.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1235 declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fminnm.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1237 declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
1238 @llvm.aarch64.sve.fminnm.x4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1239 declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
1240 @llvm.aarch64.sve.fminnm.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1241 declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
1242 @llvm.aarch64.sve.fminnm.x4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)