From 4495a812f81c2f12fa7ce6c7aafac688f33d3361 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 24 Jul 2019 16:58:41 +0000 Subject: [PATCH] [ARM] MVE compare vector splat combine MVE VCMP instructions can use a general purpose register as the second operand. This adds the combines for it, selecting from a compare of a vdup. Differential Revision: https://reviews.llvm.org/D65061 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366924 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrMVE.td | 12 + test/CodeGen/Thumb2/mve-vcmpfr.ll | 3343 +++++++++++++++++++++++++++++++++++++ test/CodeGen/Thumb2/mve-vcmpr.ll | 615 +++++++ 3 files changed, 3970 insertions(+) create mode 100644 test/CodeGen/Thumb2/mve-vcmpfr.ll create mode 100644 test/CodeGen/Thumb2/mve-vcmpr.ll diff --git a/lib/Target/ARM/ARMInstrMVE.td b/lib/Target/ARM/ARMInstrMVE.td index a0cc4916bda..b2899c1ddd4 100644 --- a/lib/Target/ARM/ARMInstrMVE.td +++ b/lib/Target/ARM/ARMInstrMVE.td @@ -3006,6 +3006,13 @@ multiclass unpred_vcmp_r { def i32 : Pat<(v4i1 (opnode (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), (v4i1 (!cast("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc))>; + def i8r : Pat<(v16i1 (opnode (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)))), + (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc))>; + def i16r : Pat<(v8i1 (opnode (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)))), + (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc))>; + def i32r : Pat<(v4i1 (opnode (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)))), + (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc))>; + def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (opnode (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))))), (v16i1 (!cast("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, 1, VCCR:$p1))>; def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (opnode (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))))), @@ -3032,6 +3039,11 @@ multiclass unpred_vcmpf_r { def f32 : Pat<(v4i1 (opnode (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))), (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc))>; + def f16r : Pat<(v8i1 (opnode (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)))), + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc))>; + def f32r : Pat<(v4i1 (opnode (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (opnode (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))))), (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, 1, VCCR:$p1))>; def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (opnode (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))))), diff --git a/test/CodeGen/Thumb2/mve-vcmpfr.ll b/test/CodeGen/Thumb2/mve-vcmpfr.ll new file mode 100644 index 00000000000..f94efa33cf7 --- /dev/null +++ b/test/CodeGen/Thumb2/mve-vcmpfr.ll @@ -0,0 +1,3343 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP + +define arm_aapcs_vfpcc <4 x float> @vcmp_oeq_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_oeq_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, s4 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, s4 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r3, #1 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_oeq_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: vcmp.f32 eq, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp oeq <4 x float> %src, %sp + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_one_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r2, #1 +; CHECK-MVE-NEXT: vcmp.f32 s3, s4 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r3, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, s4 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r3, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_one_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: movw r1, #65535 +; CHECK-MVEFP-NEXT: vdup.32 q1, r0 +; CHECK-MVEFP-NEXT: vcmp.f32 le, q1, q0 +; CHECK-MVEFP-NEXT: vpst +; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, q1 +; CHECK-MVEFP-NEXT: vmrs r0, p0 +; CHECK-MVEFP-NEXT: eors r0, r1 +; CHECK-MVEFP-NEXT: vmsr p0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp one <4 x float> %src, %sp + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_ogt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_ogt_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmpe.f32 s1, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s3, s4 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s2, s4 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r3, #1 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_ogt_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp ogt <4 x float> %src, %sp + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_oge_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_oge_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmpe.f32 s1, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s3, s4 +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s2, s4 +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r3, #1 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_oge_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp oge <4 x float> %src, %sp + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_olt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_olt_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmpe.f32 s1, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s3, s4 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s2, s4 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r3, #1 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_olt_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: vdup.32 q1, r0 +; CHECK-MVEFP-NEXT: vcmp.f32 gt, q1, q0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp olt <4 x float> %src, %sp + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_ole_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_ole_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmpe.f32 s1, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s3, s4 +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s2, s4 +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r3, #1 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_ole_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: vdup.32 q1, r0 +; CHECK-MVEFP-NEXT: vcmp.f32 ge, q1, q0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp ole <4 x float> %src, %sp + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_ueq_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r2, #1 +; CHECK-MVE-NEXT: vcmp.f32 s3, s4 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r3, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, s4 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r3, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_ueq_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: vdup.32 q1, r0 +; CHECK-MVEFP-NEXT: vcmp.f32 le, q1, q0 +; CHECK-MVEFP-NEXT: vpst +; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, q1 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp ueq <4 x float> %src, %sp + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_une_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_une_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, s4 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, s4 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r3, #1 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_une_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: vcmp.f32 ne, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp une <4 x float> %src, %sp + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_ugt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_ugt_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmpe.f32 s1, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s3, s4 +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s2, s4 +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r3, #1 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_ugt_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: movw r1, #65535 +; CHECK-MVEFP-NEXT: vdup.32 q1, r0 +; CHECK-MVEFP-NEXT: vcmp.f32 ge, q1, q0 +; CHECK-MVEFP-NEXT: vmrs r0, p0 +; CHECK-MVEFP-NEXT: eors r0, r1 +; CHECK-MVEFP-NEXT: vmsr p0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp ugt <4 x float> %src, %sp + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_uge_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_uge_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmpe.f32 s1, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s3, s4 +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s2, s4 +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r3, #1 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_uge_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: movw r1, #65535 +; CHECK-MVEFP-NEXT: vdup.32 q1, r0 +; CHECK-MVEFP-NEXT: vcmp.f32 gt, q1, q0 +; CHECK-MVEFP-NEXT: vmrs r0, p0 +; CHECK-MVEFP-NEXT: eors r0, r1 +; CHECK-MVEFP-NEXT: vmsr p0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp uge <4 x float> %src, %sp + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_ult_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_ult_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmpe.f32 s1, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s3, s4 +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s2, s4 +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r3, #1 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_ult_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: movw r1, #65535 +; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, r0 +; CHECK-MVEFP-NEXT: vmrs r0, p0 +; CHECK-MVEFP-NEXT: eors r0, r1 +; CHECK-MVEFP-NEXT: vmsr p0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp ult <4 x float> %src, %sp + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_ule_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_ule_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmpe.f32 s1, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s3, s4 +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s2, s4 +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r3, #1 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_ule_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: movw r1, #65535 +; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, r0 +; CHECK-MVEFP-NEXT: vmrs r0, p0 +; CHECK-MVEFP-NEXT: eors r0, r1 +; CHECK-MVEFP-NEXT: vmsr p0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp ule <4 x float> %src, %sp + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_ord_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_ord_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmpe.f32 s1, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s3, s4 +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s2, s4 +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r3, #1 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_ord_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: movw r1, #65535 +; CHECK-MVEFP-NEXT: vdup.32 q1, r0 +; CHECK-MVEFP-NEXT: vcmp.f32 le, q1, q0 +; CHECK-MVEFP-NEXT: vpst +; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, q1 +; CHECK-MVEFP-NEXT: vmrs r0, p0 +; CHECK-MVEFP-NEXT: eors r0, r1 +; CHECK-MVEFP-NEXT: vmsr p0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp ord <4 x float> %src, %sp + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_uno_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_uno_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmpe.f32 s1, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s3, s4 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmpe.f32 s2, s4 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r3, #1 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_uno_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: vdup.32 q1, r0 +; CHECK-MVEFP-NEXT: vcmp.f32 le, q1, q0 +; CHECK-MVEFP-NEXT: vpst +; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, q1 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp uno <4 x float> %src, %sp + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + + + +define arm_aapcs_vfpcc <8 x half> @vcmp_oeq_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_oeq_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vmov.u16 r2, q0[0] +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[0] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[0] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s14, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov.u16 r2, q0[1] +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: vmov.u16 r3, q1[1] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, s16 +; CHECK-MVE-NEXT: vmov s12, r3 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vmov.u16 r3, q2[1] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmov s14, r3 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmov.16 q3[0], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov.16 q3[1], r2 +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[2] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[2] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[3] +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[3] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[4] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[4] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[5] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[5] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[6] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[6] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[7] +; CHECK-MVE-NEXT: vcmp.f16 s0, s16 +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.u16 r0, q2[7] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmov s2, r0 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_oeq_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vcmp.f16 eq, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp oeq <8 x half> %src, %sp + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_one_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_one_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vmov.u16 r2, q0[0] +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[0] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[0] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s14, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q0[1] +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov.u16 r3, q1[1] +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, s16 +; CHECK-MVE-NEXT: vmov s12, r3 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r2, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vmov.u16 r3, q2[1] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmov s14, r3 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmov.16 q3[0], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov.16 q3[1], r2 +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[2] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[2] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[3] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[3] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[4] +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[4] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[5] +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[5] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[6] +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[6] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[7] +; CHECK-MVE-NEXT: vcmp.f16 s0, s16 +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.u16 r0, q2[7] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmov s2, r0 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_one_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: movw r1, #65535 +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vdup.16 q3, r0 +; CHECK-MVEFP-NEXT: vcmp.f16 le, q3, q0 +; CHECK-MVEFP-NEXT: vpst +; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, q3 +; CHECK-MVEFP-NEXT: vmrs r0, p0 +; CHECK-MVEFP-NEXT: eors r0, r1 +; CHECK-MVEFP-NEXT: vmsr p0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp one <8 x half> %src, %sp + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_ogt_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_ogt_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vmov.u16 r2, q0[0] +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[0] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[0] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s14, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov.u16 r2, q0[1] +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: vmov.u16 r3, q1[1] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vmov s12, r3 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vmov.u16 r3, q2[1] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmov s14, r3 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmov.16 q3[0], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov.16 q3[1], r2 +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[2] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[2] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[3] +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[3] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[4] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[4] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[5] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[5] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[6] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[6] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[7] +; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.u16 r0, q2[7] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmov s2, r0 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_ogt_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp ogt <8 x half> %src, %sp + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_oge_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_oge_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vmov.u16 r2, q0[0] +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[0] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[0] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s14, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov.u16 r2, q0[1] +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: vmov.u16 r3, q1[1] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vmov s12, r3 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vmov.u16 r3, q2[1] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmov s14, r3 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmov.16 q3[0], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov.16 q3[1], r2 +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[2] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[2] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[3] +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[3] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[4] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[4] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[5] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[5] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[6] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[6] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[7] +; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.u16 r0, q2[7] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmov s2, r0 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_oge_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp oge <8 x half> %src, %sp + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_olt_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_olt_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vmov.u16 r2, q0[0] +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[0] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[0] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s14, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov.u16 r2, q0[1] +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: vmov.u16 r3, q1[1] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vmov s12, r3 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vmov.u16 r3, q2[1] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmov s14, r3 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmov.16 q3[0], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov.16 q3[1], r2 +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[2] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[2] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[3] +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[3] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[4] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[4] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[5] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[5] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[6] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[6] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[7] +; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.u16 r0, q2[7] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmov s2, r0 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_olt_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vdup.16 q3, r0 +; CHECK-MVEFP-NEXT: vcmp.f16 gt, q3, q0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp olt <8 x half> %src, %sp + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_ole_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_ole_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vmov.u16 r2, q0[0] +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[0] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[0] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s14, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov.u16 r2, q0[1] +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: vmov.u16 r3, q1[1] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vmov s12, r3 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vmov.u16 r3, q2[1] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmov s14, r3 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmov.16 q3[0], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov.16 q3[1], r2 +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[2] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[2] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[3] +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[3] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[4] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[4] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[5] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[5] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[6] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[6] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[7] +; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.u16 r0, q2[7] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmov s2, r0 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_ole_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vdup.16 q3, r0 +; CHECK-MVEFP-NEXT: vcmp.f16 ge, q3, q0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp ole <8 x half> %src, %sp + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_ueq_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_ueq_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vmov.u16 r2, q0[0] +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[0] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[0] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s14, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q0[1] +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov.u16 r3, q1[1] +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, s16 +; CHECK-MVE-NEXT: vmov s12, r3 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r2, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vmov.u16 r3, q2[1] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmov s14, r3 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmov.16 q3[0], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov.16 q3[1], r2 +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[2] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[2] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[3] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[3] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[4] +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[4] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[5] +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[5] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[6] +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[6] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[7] +; CHECK-MVE-NEXT: vcmp.f16 s0, s16 +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.u16 r0, q2[7] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmov s2, r0 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_ueq_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vdup.16 q3, r0 +; CHECK-MVEFP-NEXT: vcmp.f16 le, q3, q0 +; CHECK-MVEFP-NEXT: vpst +; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp ueq <8 x half> %src, %sp + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_une_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_une_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vmov.u16 r2, q0[0] +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[0] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[0] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s14, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov.u16 r2, q0[1] +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: vmov.u16 r3, q1[1] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, s16 +; CHECK-MVE-NEXT: vmov s12, r3 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vmov.u16 r3, q2[1] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmov s14, r3 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmov.16 q3[0], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov.16 q3[1], r2 +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[2] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[2] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[3] +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[3] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[4] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[4] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[5] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[5] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[6] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[6] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[7] +; CHECK-MVE-NEXT: vcmp.f16 s0, s16 +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.u16 r0, q2[7] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmov s2, r0 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_une_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vcmp.f16 ne, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp une <8 x half> %src, %sp + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_ugt_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vmov.u16 r2, q0[0] +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[0] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[0] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s14, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov.u16 r2, q0[1] +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: vmov.u16 r3, q1[1] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vmov s12, r3 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vmov.u16 r3, q2[1] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmov s14, r3 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmov.16 q3[0], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov.16 q3[1], r2 +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[2] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[2] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[3] +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[3] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[4] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[4] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[5] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[5] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[6] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[6] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[7] +; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.u16 r0, q2[7] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmov s2, r0 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_ugt_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: movw r1, #65535 +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vdup.16 q3, r0 +; CHECK-MVEFP-NEXT: vcmp.f16 ge, q3, q0 +; CHECK-MVEFP-NEXT: vmrs r0, p0 +; CHECK-MVEFP-NEXT: eors r0, r1 +; CHECK-MVEFP-NEXT: vmsr p0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp ugt <8 x half> %src, %sp + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_uge_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vmov.u16 r2, q0[0] +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[0] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[0] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s14, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov.u16 r2, q0[1] +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: vmov.u16 r3, q1[1] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vmov s12, r3 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vmov.u16 r3, q2[1] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmov s14, r3 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmov.16 q3[0], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov.16 q3[1], r2 +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[2] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[2] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[3] +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[3] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[4] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[4] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[5] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[5] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[6] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[6] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[7] +; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.u16 r0, q2[7] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmov s2, r0 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_uge_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: movw r1, #65535 +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vdup.16 q3, r0 +; CHECK-MVEFP-NEXT: vcmp.f16 gt, q3, q0 +; CHECK-MVEFP-NEXT: vmrs r0, p0 +; CHECK-MVEFP-NEXT: eors r0, r1 +; CHECK-MVEFP-NEXT: vmsr p0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp uge <8 x half> %src, %sp + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_ult_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vmov.u16 r2, q0[0] +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[0] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[0] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s14, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov.u16 r2, q0[1] +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: vmov.u16 r3, q1[1] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vmov s12, r3 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vmov.u16 r3, q2[1] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmov s14, r3 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmov.16 q3[0], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov.16 q3[1], r2 +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[2] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[2] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[3] +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[3] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[4] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[4] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[5] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[5] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[6] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[6] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[7] +; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.u16 r0, q2[7] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmov s2, r0 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_ult_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: movw r1, #65535 +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, r0 +; CHECK-MVEFP-NEXT: vmrs r0, p0 +; CHECK-MVEFP-NEXT: eors r0, r1 +; CHECK-MVEFP-NEXT: vmsr p0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp ult <8 x half> %src, %sp + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_ule_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vmov.u16 r2, q0[0] +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[0] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[0] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s14, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov.u16 r2, q0[1] +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: vmov.u16 r3, q1[1] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vmov s12, r3 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vmov.u16 r3, q2[1] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmov s14, r3 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmov.16 q3[0], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov.16 q3[1], r2 +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[2] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[2] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[3] +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[3] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[4] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[4] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[5] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[5] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[6] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[6] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[7] +; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.u16 r0, q2[7] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmov s2, r0 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_ule_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: movw r1, #65535 +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, r0 +; CHECK-MVEFP-NEXT: vmrs r0, p0 +; CHECK-MVEFP-NEXT: eors r0, r1 +; CHECK-MVEFP-NEXT: vmsr p0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp ule <8 x half> %src, %sp + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_ord_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vmov.u16 r2, q0[0] +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[0] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[0] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s14, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov.u16 r2, q0[1] +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: vmov.u16 r3, q1[1] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vmov s12, r3 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vmov.u16 r3, q2[1] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmov s14, r3 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmov.16 q3[0], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov.16 q3[1], r2 +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[2] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[2] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[3] +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[3] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[4] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[4] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[5] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[5] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[6] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[6] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[7] +; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.u16 r0, q2[7] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmov s2, r0 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_ord_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: movw r1, #65535 +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vdup.16 q3, r0 +; CHECK-MVEFP-NEXT: vcmp.f16 le, q3, q0 +; CHECK-MVEFP-NEXT: vpst +; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, q3 +; CHECK-MVEFP-NEXT: vmrs r0, p0 +; CHECK-MVEFP-NEXT: eors r0, r1 +; CHECK-MVEFP-NEXT: vmsr p0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp ord <8 x half> %src, %sp + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_uno_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vmov.u16 r2, q0[0] +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[0] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[0] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s14, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov.u16 r2, q0[1] +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: vmov.u16 r3, q1[1] +; CHECK-MVE-NEXT: vmov s12, r2 +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vmov s12, r3 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vmov.u16 r3, q2[1] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: vmov s14, r3 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmov.16 q3[0], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov.16 q3[1], r2 +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[2] +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[2] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[3] +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[3] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[4] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[4] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[5] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[5] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[6] +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vmov s18, r2 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.u16 r2, q2[6] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: vmov s20, r2 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[7] +; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.u16 r0, q2[7] +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmov s2, r0 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_uno_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vdup.16 q3, r0 +; CHECK-MVEFP-NEXT: vcmp.f16 le, q3, q0 +; CHECK-MVEFP-NEXT: vpst +; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp uno <8 x half> %src, %sp + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} diff --git a/test/CodeGen/Thumb2/mve-vcmpr.ll b/test/CodeGen/Thumb2/mve-vcmpr.ll new file mode 100644 index 00000000000..3adc4cb55b2 --- /dev/null +++ b/test/CodeGen/Thumb2/mve-vcmpr.ll @@ -0,0 +1,615 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s + +define arm_aapcs_vfpcc <4 x i32> @vcmp_eq_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_eq_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i32 eq, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x i32> undef, i32 %src2, i32 0 + %sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer + %c = icmp eq <4 x i32> %src, %sp + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_ne_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_ne_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i32 ne, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x i32> undef, i32 %src2, i32 0 + %sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer + %c = icmp ne <4 x i32> %src, %sp + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_sgt_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_sgt_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s32 gt, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x i32> undef, i32 %src2, i32 0 + %sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer + %c = icmp sgt <4 x i32> %src, %sp + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_sge_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_sge_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s32 ge, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x i32> undef, i32 %src2, i32 0 + %sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer + %c = icmp sge <4 x i32> %src, %sp + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_slt_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_slt_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.32 q3, r0 +; CHECK-NEXT: vcmp.s32 gt, q3, q0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x i32> undef, i32 %src2, i32 0 + %sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer + %c = icmp slt <4 x i32> %src, %sp + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_sle_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_sle_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.32 q3, r0 +; CHECK-NEXT: vcmp.s32 ge, q3, q0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x i32> undef, i32 %src2, i32 0 + %sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer + %c = icmp sle <4 x i32> %src, %sp + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_ugt_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_ugt_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u32 hi, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x i32> undef, i32 %src2, i32 0 + %sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer + %c = icmp ugt <4 x i32> %src, %sp + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_uge_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_uge_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u32 cs, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x i32> undef, i32 %src2, i32 0 + %sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer + %c = icmp uge <4 x i32> %src, %sp + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_ult_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_ult_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.32 q3, r0 +; CHECK-NEXT: vcmp.u32 hi, q3, q0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x i32> undef, i32 %src2, i32 0 + %sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer + %c = icmp ult <4 x i32> %src, %sp + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_ule_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_ule_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.32 q3, r0 +; CHECK-NEXT: vcmp.u32 cs, q3, q0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x i32> undef, i32 %src2, i32 0 + %sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer + %c = icmp ule <4 x i32> %src, %sp + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + + +define arm_aapcs_vfpcc <8 x i16> @vcmp_eq_v8i16(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_eq_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i16 eq, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x i16> undef, i16 %src2, i32 0 + %sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer + %c = icmp eq <8 x i16> %src, %sp + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_ne_v8i16(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_ne_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i16 ne, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x i16> undef, i16 %src2, i32 0 + %sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer + %c = icmp ne <8 x i16> %src, %sp + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_sgt_v8i16(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_sgt_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s16 gt, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x i16> undef, i16 %src2, i32 0 + %sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer + %c = icmp sgt <8 x i16> %src, %sp + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_sge_v8i16(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_sge_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s16 ge, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x i16> undef, i16 %src2, i32 0 + %sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer + %c = icmp sge <8 x i16> %src, %sp + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_slt_v8i16(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_slt_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.16 q3, r0 +; CHECK-NEXT: vcmp.s16 gt, q3, q0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x i16> undef, i16 %src2, i32 0 + %sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer + %c = icmp slt <8 x i16> %src, %sp + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_sle_v8i16(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_sle_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.16 q3, r0 +; CHECK-NEXT: vcmp.s16 ge, q3, q0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x i16> undef, i16 %src2, i32 0 + %sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer + %c = icmp sle <8 x i16> %src, %sp + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_ugt_v8i16(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_ugt_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u16 hi, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x i16> undef, i16 %src2, i32 0 + %sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer + %c = icmp ugt <8 x i16> %src, %sp + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_uge_v8i16(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_uge_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u16 cs, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x i16> undef, i16 %src2, i32 0 + %sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer + %c = icmp uge <8 x i16> %src, %sp + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_ult_v8i16(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_ult_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.16 q3, r0 +; CHECK-NEXT: vcmp.u16 hi, q3, q0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x i16> undef, i16 %src2, i32 0 + %sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer + %c = icmp ult <8 x i16> %src, %sp + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_ule_v8i16(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_ule_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.16 q3, r0 +; CHECK-NEXT: vcmp.u16 cs, q3, q0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x i16> undef, i16 %src2, i32 0 + %sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer + %c = icmp ule <8 x i16> %src, %sp + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + + +define arm_aapcs_vfpcc <16 x i8> @vcmp_eq_v16i8(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_eq_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i8 eq, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <16 x i8> undef, i8 %src2, i32 0 + %sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer + %c = icmp eq <16 x i8> %src, %sp + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_ne_v16i8(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_ne_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i8 ne, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <16 x i8> undef, i8 %src2, i32 0 + %sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer + %c = icmp ne <16 x i8> %src, %sp + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_sgt_v16i8(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_sgt_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s8 gt, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <16 x i8> undef, i8 %src2, i32 0 + %sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer + %c = icmp sgt <16 x i8> %src, %sp + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_sge_v16i8(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_sge_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s8 ge, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <16 x i8> undef, i8 %src2, i32 0 + %sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer + %c = icmp sge <16 x i8> %src, %sp + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_slt_v16i8(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_slt_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.8 q3, r0 +; CHECK-NEXT: vcmp.s8 gt, q3, q0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <16 x i8> undef, i8 %src2, i32 0 + %sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer + %c = icmp slt <16 x i8> %src, %sp + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_sle_v16i8(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_sle_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.8 q3, r0 +; CHECK-NEXT: vcmp.s8 ge, q3, q0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <16 x i8> undef, i8 %src2, i32 0 + %sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer + %c = icmp sle <16 x i8> %src, %sp + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_ugt_v16i8(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_ugt_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u8 hi, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <16 x i8> undef, i8 %src2, i32 0 + %sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer + %c = icmp ugt <16 x i8> %src, %sp + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_uge_v16i8(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_uge_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u8 cs, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <16 x i8> undef, i8 %src2, i32 0 + %sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer + %c = icmp uge <16 x i8> %src, %sp + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_ult_v16i8(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_ult_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.8 q3, r0 +; CHECK-NEXT: vcmp.u8 hi, q3, q0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <16 x i8> undef, i8 %src2, i32 0 + %sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer + %c = icmp ult <16 x i8> %src, %sp + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_ule_v16i8(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_ule_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.8 q3, r0 +; CHECK-NEXT: vcmp.u8 cs, q3, q0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <16 x i8> undef, i8 %src2, i32 0 + %sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer + %c = icmp ule <16 x i8> %src, %sp + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + + +define arm_aapcs_vfpcc <2 x i64> @vcmp_eq_v2i64(<2 x i64> %src, i64 %src2, <2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: vcmp_eq_v2i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r2, s1 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: eors r2, r1 +; CHECK-NEXT: eors r3, r0 +; CHECK-NEXT: orrs r2, r3 +; CHECK-NEXT: clz r2, r2 +; CHECK-NEXT: lsrs r2, r2, #5 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r2, #-1 +; CHECK-NEXT: vmov.32 q3[0], r2 +; CHECK-NEXT: vmov.32 q3[1], r2 +; CHECK-NEXT: vmov r2, s3 +; CHECK-NEXT: eors r1, r2 +; CHECK-NEXT: vmov r2, s2 +; CHECK-NEXT: eors r0, r2 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsrs r0, r0, #5 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r0, #-1 +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vbic q0, q2, q3 +; CHECK-NEXT: vand q1, q1, q3 +; CHECK-NEXT: vorr q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <2 x i64> undef, i64 %src2, i32 0 + %sp = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer + %c = icmp eq <2 x i64> %src, %sp + %s = select <2 x i1> %c, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %s +} + +define arm_aapcs_vfpcc <2 x i32> @vcmp_eq_v2i32(<2 x i64> %src, i64 %src2, <2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: vcmp_eq_v2i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r2, s1 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: eors r2, r1 +; CHECK-NEXT: eors r3, r0 +; CHECK-NEXT: orrs r2, r3 +; CHECK-NEXT: clz r2, r2 +; CHECK-NEXT: lsrs r2, r2, #5 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r2, #-1 +; CHECK-NEXT: vmov.32 q3[0], r2 +; CHECK-NEXT: vmov.32 q3[1], r2 +; CHECK-NEXT: vmov r2, s3 +; CHECK-NEXT: eors r1, r2 +; CHECK-NEXT: vmov r2, s2 +; CHECK-NEXT: eors r0, r2 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsrs r0, r0, #5 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r0, #-1 +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vbic q0, q2, q3 +; CHECK-NEXT: vand q1, q1, q3 +; CHECK-NEXT: vorr q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <2 x i64> undef, i64 %src2, i32 0 + %sp = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer + %c = icmp eq <2 x i64> %src, %sp + %s = select <2 x i1> %c, <2 x i32> %a, <2 x i32> %b + ret <2 x i32> %s +} + +define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { +; CHECK-LABEL: vcmp_multi_v2i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov r0, s1 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: vmov r2, s8 +; CHECK-NEXT: vmov lr, s10 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsrs r0, r0, #5 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r0, #-1 +; CHECK-NEXT: vmov.32 q3[0], r0 +; CHECK-NEXT: vmov.32 q3[1], r0 +; CHECK-NEXT: vmov r0, s3 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsrs r0, r0, #5 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r0, #-1 +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vbic q0, q2, q3 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: subs r1, r0, r2 +; CHECK-NEXT: asr.w r12, r0, #31 +; CHECK-NEXT: sbcs.w r1, r12, r2, asr #31 +; CHECK-NEXT: mov.w r1, #0 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r1, #1 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r1, #-1 +; CHECK-NEXT: vmov.32 q3[0], r1 +; CHECK-NEXT: vmov.32 q3[1], r1 +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: subs.w r2, r1, lr +; CHECK-NEXT: asr.w r12, r1, #31 +; CHECK-NEXT: sbcs.w r2, r12, lr, asr #31 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r3, #1 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r3, #-1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r0, #-1 +; CHECK-NEXT: vmov.32 q4[0], r0 +; CHECK-NEXT: vmov.32 q4[1], r0 +; CHECK-NEXT: vmov r0, s4 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r1, #1 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r1, #-1 +; CHECK-NEXT: vmov.32 q4[2], r1 +; CHECK-NEXT: vmov.32 q3[2], r3 +; CHECK-NEXT: vmov.32 q4[3], r1 +; CHECK-NEXT: vmov.32 q3[3], r3 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r0, #-1 +; CHECK-NEXT: vmov.32 q5[0], r0 +; CHECK-NEXT: vmov.32 q5[1], r0 +; CHECK-NEXT: vmov r0, s6 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r0, #-1 +; CHECK-NEXT: vmov.32 q5[2], r0 +; CHECK-NEXT: vmov.32 q5[3], r0 +; CHECK-NEXT: vand q1, q5, q4 +; CHECK-NEXT: vand q1, q3, q1 +; CHECK-NEXT: vbic q0, q0, q1 +; CHECK-NEXT: vand q1, q2, q1 +; CHECK-NEXT: vorr q0, q1, q0 +; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: pop {r7, pc} + %a4 = icmp eq <2 x i64> %a, zeroinitializer + %a5 = select <2 x i1> %a4, <2 x i32> zeroinitializer, <2 x i32> %c + %a6 = icmp ne <2 x i32> %b, zeroinitializer + %a7 = icmp slt <2 x i32> %a5, %c + %a8 = icmp ne <2 x i32> %a5, zeroinitializer + %a9 = and <2 x i1> %a6, %a8 + %a10 = and <2 x i1> %a7, %a9 + %a11 = select <2 x i1> %a10, <2 x i32> %c, <2 x i32> %a5 + ret <2 x i32> %a11 +} -- 2.11.4.GIT