1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE
3 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP
5 define arm_aapcs_vfpcc <16 x i8> @smin_v16i8(<16 x i8> %s1, <16 x i8> %s2) {
6 ; CHECK-LABEL: smin_v16i8:
7 ; CHECK: @ %bb.0: @ %entry
8 ; CHECK-NEXT: vmin.s8 q0, q0, q1
11 %0 = icmp slt <16 x i8> %s1, %s2
12 %1 = select <16 x i1> %0, <16 x i8> %s1, <16 x i8> %s2
16 define arm_aapcs_vfpcc <8 x i16> @smin_v8i16(<8 x i16> %s1, <8 x i16> %s2) {
17 ; CHECK-LABEL: smin_v8i16:
18 ; CHECK: @ %bb.0: @ %entry
19 ; CHECK-NEXT: vmin.s16 q0, q0, q1
22 %0 = icmp slt <8 x i16> %s1, %s2
23 %1 = select <8 x i1> %0, <8 x i16> %s1, <8 x i16> %s2
27 define arm_aapcs_vfpcc <4 x i32> @smin_v4i32(<4 x i32> %s1, <4 x i32> %s2) {
28 ; CHECK-LABEL: smin_v4i32:
29 ; CHECK: @ %bb.0: @ %entry
30 ; CHECK-NEXT: vmin.s32 q0, q0, q1
33 %0 = icmp slt <4 x i32> %s1, %s2
34 %1 = select <4 x i1> %0, <4 x i32> %s1, <4 x i32> %s2
38 define arm_aapcs_vfpcc <2 x i64> @smin_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
39 ; CHECK-LABEL: smin_v2i64:
40 ; CHECK: @ %bb.0: @ %entry
41 ; CHECK-NEXT: .save {r4, r5, r7, lr}
42 ; CHECK-NEXT: push {r4, r5, r7, lr}
43 ; CHECK-NEXT: vmov r0, r1, d2
44 ; CHECK-NEXT: vmov r2, r3, d0
45 ; CHECK-NEXT: vmov r12, lr, d3
46 ; CHECK-NEXT: vmov r4, r5, d1
47 ; CHECK-NEXT: subs r0, r2, r0
48 ; CHECK-NEXT: sbcs.w r0, r3, r1
49 ; CHECK-NEXT: mov.w r1, #0
50 ; CHECK-NEXT: csetm r0, lt
51 ; CHECK-NEXT: bfi r1, r0, #0, #8
52 ; CHECK-NEXT: subs.w r0, r4, r12
53 ; CHECK-NEXT: sbcs.w r0, r5, lr
54 ; CHECK-NEXT: csetm r0, lt
55 ; CHECK-NEXT: bfi r1, r0, #8, #8
56 ; CHECK-NEXT: vmsr p0, r1
57 ; CHECK-NEXT: vpsel q0, q0, q1
58 ; CHECK-NEXT: pop {r4, r5, r7, pc}
60 %0 = icmp slt <2 x i64> %s1, %s2
61 %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2
65 define arm_aapcs_vfpcc <16 x i8> @umin_v16i8(<16 x i8> %s1, <16 x i8> %s2) {
66 ; CHECK-LABEL: umin_v16i8:
67 ; CHECK: @ %bb.0: @ %entry
68 ; CHECK-NEXT: vmin.u8 q0, q0, q1
71 %0 = icmp ult <16 x i8> %s1, %s2
72 %1 = select <16 x i1> %0, <16 x i8> %s1, <16 x i8> %s2
76 define arm_aapcs_vfpcc <8 x i16> @umin_v8i16(<8 x i16> %s1, <8 x i16> %s2) {
77 ; CHECK-LABEL: umin_v8i16:
78 ; CHECK: @ %bb.0: @ %entry
79 ; CHECK-NEXT: vmin.u16 q0, q0, q1
82 %0 = icmp ult <8 x i16> %s1, %s2
83 %1 = select <8 x i1> %0, <8 x i16> %s1, <8 x i16> %s2
87 define arm_aapcs_vfpcc <4 x i32> @umin_v4i32(<4 x i32> %s1, <4 x i32> %s2) {
88 ; CHECK-LABEL: umin_v4i32:
89 ; CHECK: @ %bb.0: @ %entry
90 ; CHECK-NEXT: vmin.u32 q0, q0, q1
93 %0 = icmp ult <4 x i32> %s1, %s2
94 %1 = select <4 x i1> %0, <4 x i32> %s1, <4 x i32> %s2
98 define arm_aapcs_vfpcc <2 x i64> @umin_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
99 ; CHECK-LABEL: umin_v2i64:
100 ; CHECK: @ %bb.0: @ %entry
101 ; CHECK-NEXT: .save {r4, r5, r7, lr}
102 ; CHECK-NEXT: push {r4, r5, r7, lr}
103 ; CHECK-NEXT: vmov r0, r1, d2
104 ; CHECK-NEXT: vmov r2, r3, d0
105 ; CHECK-NEXT: vmov r12, lr, d3
106 ; CHECK-NEXT: vmov r4, r5, d1
107 ; CHECK-NEXT: subs r0, r2, r0
108 ; CHECK-NEXT: sbcs.w r0, r3, r1
109 ; CHECK-NEXT: mov.w r1, #0
110 ; CHECK-NEXT: csetm r0, lo
111 ; CHECK-NEXT: bfi r1, r0, #0, #8
112 ; CHECK-NEXT: subs.w r0, r4, r12
113 ; CHECK-NEXT: sbcs.w r0, r5, lr
114 ; CHECK-NEXT: csetm r0, lo
115 ; CHECK-NEXT: bfi r1, r0, #8, #8
116 ; CHECK-NEXT: vmsr p0, r1
117 ; CHECK-NEXT: vpsel q0, q0, q1
118 ; CHECK-NEXT: pop {r4, r5, r7, pc}
120 %0 = icmp ult <2 x i64> %s1, %s2
121 %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2
126 define arm_aapcs_vfpcc <16 x i8> @smax_v16i8(<16 x i8> %s1, <16 x i8> %s2) {
127 ; CHECK-LABEL: smax_v16i8:
128 ; CHECK: @ %bb.0: @ %entry
129 ; CHECK-NEXT: vmax.s8 q0, q0, q1
132 %0 = icmp sgt <16 x i8> %s1, %s2
133 %1 = select <16 x i1> %0, <16 x i8> %s1, <16 x i8> %s2
137 define arm_aapcs_vfpcc <8 x i16> @smax_v8i16(<8 x i16> %s1, <8 x i16> %s2) {
138 ; CHECK-LABEL: smax_v8i16:
139 ; CHECK: @ %bb.0: @ %entry
140 ; CHECK-NEXT: vmax.s16 q0, q0, q1
143 %0 = icmp sgt <8 x i16> %s1, %s2
144 %1 = select <8 x i1> %0, <8 x i16> %s1, <8 x i16> %s2
148 define arm_aapcs_vfpcc <4 x i32> @smax_v4i32(<4 x i32> %s1, <4 x i32> %s2) {
149 ; CHECK-LABEL: smax_v4i32:
150 ; CHECK: @ %bb.0: @ %entry
151 ; CHECK-NEXT: vmax.s32 q0, q0, q1
154 %0 = icmp sgt <4 x i32> %s1, %s2
155 %1 = select <4 x i1> %0, <4 x i32> %s1, <4 x i32> %s2
159 define arm_aapcs_vfpcc <2 x i64> @smax_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
160 ; CHECK-LABEL: smax_v2i64:
161 ; CHECK: @ %bb.0: @ %entry
162 ; CHECK-NEXT: .save {r4, r5, r7, lr}
163 ; CHECK-NEXT: push {r4, r5, r7, lr}
164 ; CHECK-NEXT: vmov r0, r1, d0
165 ; CHECK-NEXT: vmov r2, r3, d2
166 ; CHECK-NEXT: vmov r12, lr, d1
167 ; CHECK-NEXT: vmov r4, r5, d3
168 ; CHECK-NEXT: subs r0, r2, r0
169 ; CHECK-NEXT: sbcs.w r0, r3, r1
170 ; CHECK-NEXT: mov.w r1, #0
171 ; CHECK-NEXT: csetm r0, lt
172 ; CHECK-NEXT: bfi r1, r0, #0, #8
173 ; CHECK-NEXT: subs.w r0, r4, r12
174 ; CHECK-NEXT: sbcs.w r0, r5, lr
175 ; CHECK-NEXT: csetm r0, lt
176 ; CHECK-NEXT: bfi r1, r0, #8, #8
177 ; CHECK-NEXT: vmsr p0, r1
178 ; CHECK-NEXT: vpsel q0, q0, q1
179 ; CHECK-NEXT: pop {r4, r5, r7, pc}
181 %0 = icmp sgt <2 x i64> %s1, %s2
182 %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2
186 define arm_aapcs_vfpcc <16 x i8> @umax_v16i8(<16 x i8> %s1, <16 x i8> %s2) {
187 ; CHECK-LABEL: umax_v16i8:
188 ; CHECK: @ %bb.0: @ %entry
189 ; CHECK-NEXT: vmax.u8 q0, q0, q1
192 %0 = icmp ugt <16 x i8> %s1, %s2
193 %1 = select <16 x i1> %0, <16 x i8> %s1, <16 x i8> %s2
197 define arm_aapcs_vfpcc <8 x i16> @umax_v8i16(<8 x i16> %s1, <8 x i16> %s2) {
198 ; CHECK-LABEL: umax_v8i16:
199 ; CHECK: @ %bb.0: @ %entry
200 ; CHECK-NEXT: vmax.u16 q0, q0, q1
203 %0 = icmp ugt <8 x i16> %s1, %s2
204 %1 = select <8 x i1> %0, <8 x i16> %s1, <8 x i16> %s2
208 define arm_aapcs_vfpcc <4 x i32> @umax_v4i32(<4 x i32> %s1, <4 x i32> %s2) {
209 ; CHECK-LABEL: umax_v4i32:
210 ; CHECK: @ %bb.0: @ %entry
211 ; CHECK-NEXT: vmax.u32 q0, q0, q1
214 %0 = icmp ugt <4 x i32> %s1, %s2
215 %1 = select <4 x i1> %0, <4 x i32> %s1, <4 x i32> %s2
219 define arm_aapcs_vfpcc <2 x i64> @umax_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
220 ; CHECK-LABEL: umax_v2i64:
221 ; CHECK: @ %bb.0: @ %entry
222 ; CHECK-NEXT: .save {r4, r5, r7, lr}
223 ; CHECK-NEXT: push {r4, r5, r7, lr}
224 ; CHECK-NEXT: vmov r0, r1, d0
225 ; CHECK-NEXT: vmov r2, r3, d2
226 ; CHECK-NEXT: vmov r12, lr, d1
227 ; CHECK-NEXT: vmov r4, r5, d3
228 ; CHECK-NEXT: subs r0, r2, r0
229 ; CHECK-NEXT: sbcs.w r0, r3, r1
230 ; CHECK-NEXT: mov.w r1, #0
231 ; CHECK-NEXT: csetm r0, lo
232 ; CHECK-NEXT: bfi r1, r0, #0, #8
233 ; CHECK-NEXT: subs.w r0, r4, r12
234 ; CHECK-NEXT: sbcs.w r0, r5, lr
235 ; CHECK-NEXT: csetm r0, lo
236 ; CHECK-NEXT: bfi r1, r0, #8, #8
237 ; CHECK-NEXT: vmsr p0, r1
238 ; CHECK-NEXT: vpsel q0, q0, q1
239 ; CHECK-NEXT: pop {r4, r5, r7, pc}
241 %0 = icmp ugt <2 x i64> %s1, %s2
242 %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2
247 define arm_aapcs_vfpcc <4 x float> @maxnm_float32_t(<4 x float> %src1, <4 x float> %src2) {
248 ; CHECK-MVE-LABEL: maxnm_float32_t:
249 ; CHECK-MVE: @ %bb.0: @ %entry
250 ; CHECK-MVE-NEXT: vmaxnm.f32 s3, s7, s3
251 ; CHECK-MVE-NEXT: vmaxnm.f32 s2, s6, s2
252 ; CHECK-MVE-NEXT: vmaxnm.f32 s1, s5, s1
253 ; CHECK-MVE-NEXT: vmaxnm.f32 s0, s4, s0
254 ; CHECK-MVE-NEXT: bx lr
256 ; CHECK-MVEFP-LABEL: maxnm_float32_t:
257 ; CHECK-MVEFP: @ %bb.0: @ %entry
258 ; CHECK-MVEFP-NEXT: vmaxnm.f32 q0, q1, q0
259 ; CHECK-MVEFP-NEXT: bx lr
261 %cmp = fcmp fast ogt <4 x float> %src2, %src1
262 %0 = select <4 x i1> %cmp, <4 x float> %src2, <4 x float> %src1
266 define arm_aapcs_vfpcc <8 x half> @minnm_float16_t(<8 x half> %src1, <8 x half> %src2) {
267 ; CHECK-MVE-LABEL: minnm_float16_t:
268 ; CHECK-MVE: @ %bb.0: @ %entry
269 ; CHECK-MVE-NEXT: vmovx.f16 s8, s0
270 ; CHECK-MVE-NEXT: vmovx.f16 s10, s4
271 ; CHECK-MVE-NEXT: vminnm.f16 s0, s4, s0
272 ; CHECK-MVE-NEXT: vminnm.f16 s8, s10, s8
273 ; CHECK-MVE-NEXT: vins.f16 s0, s8
274 ; CHECK-MVE-NEXT: vmovx.f16 s4, s1
275 ; CHECK-MVE-NEXT: vmovx.f16 s8, s5
276 ; CHECK-MVE-NEXT: vminnm.f16 s1, s5, s1
277 ; CHECK-MVE-NEXT: vminnm.f16 s4, s8, s4
278 ; CHECK-MVE-NEXT: vmovx.f16 s8, s6
279 ; CHECK-MVE-NEXT: vins.f16 s1, s4
280 ; CHECK-MVE-NEXT: vmovx.f16 s4, s2
281 ; CHECK-MVE-NEXT: vminnm.f16 s2, s6, s2
282 ; CHECK-MVE-NEXT: vminnm.f16 s4, s8, s4
283 ; CHECK-MVE-NEXT: vins.f16 s2, s4
284 ; CHECK-MVE-NEXT: vmovx.f16 s4, s3
285 ; CHECK-MVE-NEXT: vmovx.f16 s6, s7
286 ; CHECK-MVE-NEXT: vminnm.f16 s3, s7, s3
287 ; CHECK-MVE-NEXT: vminnm.f16 s4, s6, s4
288 ; CHECK-MVE-NEXT: vins.f16 s3, s4
289 ; CHECK-MVE-NEXT: bx lr
291 ; CHECK-MVEFP-LABEL: minnm_float16_t:
292 ; CHECK-MVEFP: @ %bb.0: @ %entry
293 ; CHECK-MVEFP-NEXT: vminnm.f16 q0, q1, q0
294 ; CHECK-MVEFP-NEXT: bx lr
296 %cmp = fcmp fast ogt <8 x half> %src2, %src1
297 %0 = select <8 x i1> %cmp, <8 x half> %src1, <8 x half> %src2
301 define arm_aapcs_vfpcc <2 x double> @maxnm_float64_t(<2 x double> %src1, <2 x double> %src2) {
302 ; CHECK-LABEL: maxnm_float64_t:
303 ; CHECK: @ %bb.0: @ %entry
304 ; CHECK-NEXT: .save {r4, lr}
305 ; CHECK-NEXT: push {r4, lr}
306 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
307 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
308 ; CHECK-NEXT: vmov q4, q1
309 ; CHECK-NEXT: vmov q5, q0
310 ; CHECK-NEXT: vmov r0, r1, d8
311 ; CHECK-NEXT: vmov r2, r3, d10
312 ; CHECK-NEXT: bl __aeabi_dcmpgt
313 ; CHECK-NEXT: vmov r12, r1, d9
314 ; CHECK-NEXT: cmp r0, #0
315 ; CHECK-NEXT: vmov r2, r3, d11
316 ; CHECK-NEXT: csetm r0, ne
317 ; CHECK-NEXT: movs r4, #0
318 ; CHECK-NEXT: bfi r4, r0, #0, #8
319 ; CHECK-NEXT: mov r0, r12
320 ; CHECK-NEXT: bl __aeabi_dcmpgt
321 ; CHECK-NEXT: cmp r0, #0
322 ; CHECK-NEXT: csetm r0, ne
323 ; CHECK-NEXT: bfi r4, r0, #8, #8
324 ; CHECK-NEXT: vmsr p0, r4
325 ; CHECK-NEXT: vpsel q0, q4, q5
326 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
327 ; CHECK-NEXT: pop {r4, pc}
329 %cmp = fcmp fast ogt <2 x double> %src2, %src1
330 %0 = select <2 x i1> %cmp, <2 x double> %src2, <2 x double> %src1