From 1a6b090896c403d7abd97086f73666cbc0e3df3f Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 16 Sep 2019 17:29:07 +0000 Subject: [PATCH] [ARM] A predicate cast of a predicate cast is a predicate cast The adds some very basic folding of PREDICATE_CASTS, removing cases when they are chained together. These would already be removed eventually, as these are lowered to copies. This just allows it to happen earlier, which can help other simplifications. Differential Revision: https://reviews.llvm.org/D67591 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@372012 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 20 +++ test/CodeGen/Thumb2/mve-masked-ldst.ll | 290 +++++++++++++++--------------- test/CodeGen/Thumb2/mve-pred-bitcast.ll | 46 +++-- test/CodeGen/Thumb2/mve-pred-loadstore.ll | 64 ++++--- 4 files changed, 221 insertions(+), 199 deletions(-) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 6f7b6214e95..44c43edfcd0 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -12629,6 +12629,24 @@ PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { return Vec; } +static SDValue +PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { + EVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + SDLoc dl(N); + + // PREDICATE_CAST(PREDICATE_CAST(x)) == PREDICATE_CAST(x) + if (Op->getOpcode() == ARMISD::PREDICATE_CAST) { + // If the valuetypes are the same, we can remove the cast entirely. + if (Op->getOperand(0).getValueType() == VT) + return Op->getOperand(0); + return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, + Op->getOperand(0).getValueType(), Op->getOperand(0)); + } + + return SDValue(); +} + /// PerformInsertEltCombine - Target-specific dag combine xforms for /// ISD::INSERT_VECTOR_ELT. static SDValue PerformInsertEltCombine(SDNode *N, @@ -14169,6 +14187,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, return PerformVLDCombine(N, DCI); case ARMISD::BUILD_VECTOR: return PerformARMBUILD_VECTORCombine(N, DCI); + case ARMISD::PREDICATE_CAST: + return PerformPREDICATE_CASTCombine(N, DCI); case ARMISD::SMULWB: { unsigned BitWidth = N->getValueType(0).getSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16); diff --git a/test/CodeGen/Thumb2/mve-masked-ldst.ll b/test/CodeGen/Thumb2/mve-masked-ldst.ll index 0bb5a7c0958..4302b9a8e2a 100644 --- a/test/CodeGen/Thumb2/mve-masked-ldst.ll +++ b/test/CodeGen/Thumb2/mve-masked-ldst.ll @@ -21,26 +21,27 @@ entry: define void @foo_sext_v4i32_v4i8(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i8> *%src) { ; CHECK-LABEL: foo_sext_v4i32_v4i8: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: vcmp.s32 gt, q0, zr ; CHECK-NEXT: @ implicit-def: $q0 -; CHECK-NEXT: vmrs r12, p0 -; CHECK-NEXT: and r1, r12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #0, #1 -; CHECK-NEXT: ubfx r1, r12, #4, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #1, #1 -; CHECK-NEXT: ubfx r1, r12, #8, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #2, #1 -; CHECK-NEXT: ubfx r1, r12, #12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #3, #1 -; CHECK-NEXT: and r1, r3, #15 +; CHECK-NEXT: vmrs lr, p0 +; CHECK-NEXT: and r1, lr, #1 +; CHECK-NEXT: ubfx r3, lr, #4, #1 +; CHECK-NEXT: rsb.w r12, r1, #0 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r1, r12, #0, #1 +; CHECK-NEXT: bfi r1, r3, #1, #1 +; CHECK-NEXT: ubfx r3, lr, #8, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r1, r3, #2, #1 +; CHECK-NEXT: ubfx r3, lr, #12, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r1, r3, #3, #1 ; CHECK-NEXT: lsls r3, r1, #31 ; CHECK-NEXT: itt ne ; CHECK-NEXT: ldrbne r3, [r2] @@ -62,7 +63,7 @@ define void @foo_sext_v4i32_v4i8(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i8> *% ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q0, [r0] ; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: bx lr +; CHECK-NEXT: pop {r7, pc} entry: %0 = load <4 x i32>, <4 x i32>* %mask, align 4 %1 = icmp sgt <4 x i32> %0, zeroinitializer @@ -75,26 +76,27 @@ entry: define void @foo_sext_v4i32_v4i16(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i16> *%src) { ; CHECK-LABEL: foo_sext_v4i32_v4i16: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: vcmp.s32 gt, q0, zr ; CHECK-NEXT: @ implicit-def: $q0 -; CHECK-NEXT: vmrs r12, p0 -; CHECK-NEXT: and r1, r12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #0, #1 -; CHECK-NEXT: ubfx r1, r12, #4, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #1, #1 -; CHECK-NEXT: ubfx r1, r12, #8, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #2, #1 -; CHECK-NEXT: ubfx r1, r12, #12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #3, #1 -; CHECK-NEXT: and r1, r3, #15 +; CHECK-NEXT: vmrs lr, p0 +; CHECK-NEXT: and r1, lr, #1 +; CHECK-NEXT: ubfx r3, lr, #4, #1 +; CHECK-NEXT: rsb.w r12, r1, #0 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r1, r12, #0, #1 +; CHECK-NEXT: bfi r1, r3, #1, #1 +; CHECK-NEXT: ubfx r3, lr, #8, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r1, r3, #2, #1 +; CHECK-NEXT: ubfx r3, lr, #12, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r1, r3, #3, #1 ; CHECK-NEXT: lsls r3, r1, #31 ; CHECK-NEXT: itt ne ; CHECK-NEXT: ldrhne r3, [r2] @@ -115,7 +117,7 @@ define void @foo_sext_v4i32_v4i16(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i16> ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q0, [r0] ; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: bx lr +; CHECK-NEXT: pop {r7, pc} entry: %0 = load <4 x i32>, <4 x i32>* %mask, align 4 %1 = icmp sgt <4 x i32> %0, zeroinitializer @@ -128,27 +130,28 @@ entry: define void @foo_zext_v4i32_v4i8(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i8> *%src) { ; CHECK-LABEL: foo_zext_v4i32_v4i8: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: vmov.i32 q1, #0xff ; CHECK-NEXT: vcmp.s32 gt, q0, zr ; CHECK-NEXT: @ implicit-def: $q0 -; CHECK-NEXT: vmrs r12, p0 -; CHECK-NEXT: and r1, r12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #0, #1 -; CHECK-NEXT: ubfx r1, r12, #4, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #1, #1 -; CHECK-NEXT: ubfx r1, r12, #8, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #2, #1 -; CHECK-NEXT: ubfx r1, r12, #12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #3, #1 -; CHECK-NEXT: and r1, r3, #15 +; CHECK-NEXT: vmrs lr, p0 +; CHECK-NEXT: and r1, lr, #1 +; CHECK-NEXT: ubfx r3, lr, #4, #1 +; CHECK-NEXT: rsb.w r12, r1, #0 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r1, r12, #0, #1 +; CHECK-NEXT: bfi r1, r3, #1, #1 +; CHECK-NEXT: ubfx r3, lr, #8, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r1, r3, #2, #1 +; CHECK-NEXT: ubfx r3, lr, #12, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r1, r3, #3, #1 ; CHECK-NEXT: lsls r3, r1, #31 ; CHECK-NEXT: itt ne ; CHECK-NEXT: ldrbne r3, [r2] @@ -169,7 +172,7 @@ define void @foo_zext_v4i32_v4i8(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i8> *% ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q0, [r0] ; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: bx lr +; CHECK-NEXT: pop {r7, pc} entry: %0 = load <4 x i32>, <4 x i32>* %mask, align 4 %1 = icmp sgt <4 x i32> %0, zeroinitializer @@ -182,26 +185,27 @@ entry: define void @foo_zext_v4i32_v4i16(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i16> *%src) { ; CHECK-LABEL: foo_zext_v4i32_v4i16: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: vcmp.s32 gt, q0, zr ; CHECK-NEXT: @ implicit-def: $q0 -; CHECK-NEXT: vmrs r12, p0 -; CHECK-NEXT: and r1, r12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #0, #1 -; CHECK-NEXT: ubfx r1, r12, #4, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #1, #1 -; CHECK-NEXT: ubfx r1, r12, #8, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #2, #1 -; CHECK-NEXT: ubfx r1, r12, #12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #3, #1 -; CHECK-NEXT: and r1, r3, #15 +; CHECK-NEXT: vmrs lr, p0 +; CHECK-NEXT: and r1, lr, #1 +; CHECK-NEXT: ubfx r3, lr, #4, #1 +; CHECK-NEXT: rsb.w r12, r1, #0 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r1, r12, #0, #1 +; CHECK-NEXT: bfi r1, r3, #1, #1 +; CHECK-NEXT: ubfx r3, lr, #8, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r1, r3, #2, #1 +; CHECK-NEXT: ubfx r3, lr, #12, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r1, r3, #3, #1 ; CHECK-NEXT: lsls r3, r1, #31 ; CHECK-NEXT: itt ne ; CHECK-NEXT: ldrhne r3, [r2] @@ -222,7 +226,7 @@ define void @foo_zext_v4i32_v4i16(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i16> ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q0, [r0] ; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: bx lr +; CHECK-NEXT: pop {r7, pc} entry: %0 = load <4 x i32>, <4 x i32>* %mask, align 4 %1 = icmp sgt <4 x i32> %0, zeroinitializer @@ -251,35 +255,37 @@ entry: define void @foo_sext_v8i16_v8i8(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i8> *%src) { ; CHECK-LABEL: foo_sext_v8i16_v8i8: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .pad #8 ; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: vcmp.s16 gt, q0, zr ; CHECK-NEXT: @ implicit-def: $q0 -; CHECK-NEXT: vmrs r12, p0 -; CHECK-NEXT: and r1, r12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #0, #1 -; CHECK-NEXT: ubfx r1, r12, #2, #1 +; CHECK-NEXT: vmrs lr, p0 +; CHECK-NEXT: and r3, lr, #1 +; CHECK-NEXT: ubfx r1, lr, #2, #1 +; CHECK-NEXT: rsb.w r12, r3, #0 +; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r12, #0, #1 ; CHECK-NEXT: bfi r3, r1, #1, #1 -; CHECK-NEXT: ubfx r1, r12, #4, #1 +; CHECK-NEXT: ubfx r1, lr, #4, #1 ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: bfi r3, r1, #2, #1 -; CHECK-NEXT: ubfx r1, r12, #6, #1 +; CHECK-NEXT: ubfx r1, lr, #6, #1 ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: bfi r3, r1, #3, #1 -; CHECK-NEXT: ubfx r1, r12, #8, #1 +; CHECK-NEXT: ubfx r1, lr, #8, #1 ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: bfi r3, r1, #4, #1 -; CHECK-NEXT: ubfx r1, r12, #10, #1 +; CHECK-NEXT: ubfx r1, lr, #10, #1 ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: bfi r3, r1, #5, #1 -; CHECK-NEXT: ubfx r1, r12, #12, #1 +; CHECK-NEXT: ubfx r1, lr, #12, #1 ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: bfi r3, r1, #6, #1 -; CHECK-NEXT: ubfx r1, r12, #14, #1 +; CHECK-NEXT: ubfx r1, lr, #14, #1 ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: bfi r3, r1, #7, #1 ; CHECK-NEXT: uxtb r1, r3 @@ -319,7 +325,7 @@ define void @foo_sext_v8i16_v8i8(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i8> *% ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrht.16 q0, [r0] ; CHECK-NEXT: add sp, #8 -; CHECK-NEXT: bx lr +; CHECK-NEXT: pop {r7, pc} entry: %0 = load <8 x i16>, <8 x i16>* %mask, align 2 %1 = icmp sgt <8 x i16> %0, zeroinitializer @@ -332,35 +338,37 @@ entry: define void @foo_zext_v8i16_v8i8(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i8> *%src) { ; CHECK-LABEL: foo_zext_v8i16_v8i8: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .pad #8 ; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: vcmp.s16 gt, q0, zr ; CHECK-NEXT: @ implicit-def: $q0 -; CHECK-NEXT: vmrs r12, p0 -; CHECK-NEXT: and r1, r12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #0, #1 -; CHECK-NEXT: ubfx r1, r12, #2, #1 +; CHECK-NEXT: vmrs lr, p0 +; CHECK-NEXT: and r3, lr, #1 +; CHECK-NEXT: ubfx r1, lr, #2, #1 +; CHECK-NEXT: rsb.w r12, r3, #0 +; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r12, #0, #1 ; CHECK-NEXT: bfi r3, r1, #1, #1 -; CHECK-NEXT: ubfx r1, r12, #4, #1 +; CHECK-NEXT: ubfx r1, lr, #4, #1 ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: bfi r3, r1, #2, #1 -; CHECK-NEXT: ubfx r1, r12, #6, #1 +; CHECK-NEXT: ubfx r1, lr, #6, #1 ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: bfi r3, r1, #3, #1 -; CHECK-NEXT: ubfx r1, r12, #8, #1 +; CHECK-NEXT: ubfx r1, lr, #8, #1 ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: bfi r3, r1, #4, #1 -; CHECK-NEXT: ubfx r1, r12, #10, #1 +; CHECK-NEXT: ubfx r1, lr, #10, #1 ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: bfi r3, r1, #5, #1 -; CHECK-NEXT: ubfx r1, r12, #12, #1 +; CHECK-NEXT: ubfx r1, lr, #12, #1 ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: bfi r3, r1, #6, #1 -; CHECK-NEXT: ubfx r1, r12, #14, #1 +; CHECK-NEXT: ubfx r1, lr, #14, #1 ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: bfi r3, r1, #7, #1 ; CHECK-NEXT: uxtb r1, r3 @@ -400,7 +408,7 @@ define void @foo_zext_v8i16_v8i8(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i8> *% ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrht.16 q0, [r0] ; CHECK-NEXT: add sp, #8 -; CHECK-NEXT: bx lr +; CHECK-NEXT: pop {r7, pc} entry: %0 = load <8 x i16>, <8 x i16>* %mask, align 2 %1 = icmp sgt <8 x i16> %0, zeroinitializer @@ -432,36 +440,36 @@ define void @foo_trunc_v8i8_v8i16(<8 x i8> *%dest, <8 x i16> *%mask, <8 x i16> * ; CHECK-NEXT: .pad #8 ; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: vpt.s16 gt, q0, zr ; CHECK-NEXT: vldrht.u16 q0, [r2] ; CHECK-NEXT: vmrs r1, p0 ; CHECK-NEXT: and r2, r1, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r3, r2, #0, #1 -; CHECK-NEXT: ubfx r2, r1, #2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r3, r2, #1, #1 -; CHECK-NEXT: ubfx r2, r1, #4, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r3, r2, #2, #1 -; CHECK-NEXT: ubfx r2, r1, #6, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r3, r2, #3, #1 -; CHECK-NEXT: ubfx r2, r1, #8, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r3, r2, #4, #1 -; CHECK-NEXT: ubfx r2, r1, #10, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r3, r2, #5, #1 -; CHECK-NEXT: ubfx r2, r1, #12, #1 +; CHECK-NEXT: rsbs r3, r2, #0 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: bfi r2, r3, #0, #1 +; CHECK-NEXT: ubfx r3, r1, #2, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r2, r3, #1, #1 +; CHECK-NEXT: ubfx r3, r1, #4, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r2, r3, #2, #1 +; CHECK-NEXT: ubfx r3, r1, #6, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r2, r3, #3, #1 +; CHECK-NEXT: ubfx r3, r1, #8, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r2, r3, #4, #1 +; CHECK-NEXT: ubfx r3, r1, #10, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r2, r3, #5, #1 +; CHECK-NEXT: ubfx r3, r1, #12, #1 ; CHECK-NEXT: ubfx r1, r1, #14, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r3, r2, #6, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r2, r3, #6, #1 ; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #7, #1 -; CHECK-NEXT: lsls r2, r3, #31 -; CHECK-NEXT: uxtb r1, r3 +; CHECK-NEXT: bfi r2, r1, #7, #1 +; CHECK-NEXT: uxtb r1, r2 +; CHECK-NEXT: lsls r2, r2, #31 ; CHECK-NEXT: itt ne ; CHECK-NEXT: vmovne.u16 r2, q0[0] ; CHECK-NEXT: strbne r2, [r0] @@ -510,23 +518,22 @@ define void @foo_trunc_v4i8_v4i32(<4 x i8> *%dest, <4 x i32> *%mask, <4 x i32> * ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: vpt.s32 gt, q0, zr ; CHECK-NEXT: vldrwt.u32 q0, [r2] -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: and r2, r1, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r3, r2, #0, #1 -; CHECK-NEXT: ubfx r2, r1, #4, #1 +; CHECK-NEXT: vmrs r2, p0 +; CHECK-NEXT: and r1, r2, #1 +; CHECK-NEXT: rsbs r3, r1, #0 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: bfi r1, r3, #0, #1 +; CHECK-NEXT: ubfx r3, r2, #4, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r1, r3, #1, #1 +; CHECK-NEXT: ubfx r3, r2, #8, #1 +; CHECK-NEXT: ubfx r2, r2, #12, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r1, r3, #2, #1 ; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r3, r2, #1, #1 -; CHECK-NEXT: ubfx r2, r1, #8, #1 -; CHECK-NEXT: ubfx r1, r1, #12, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r3, r2, #2, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #3, #1 -; CHECK-NEXT: and r1, r3, #15 +; CHECK-NEXT: bfi r1, r2, #3, #1 ; CHECK-NEXT: lsls r2, r1, #31 ; CHECK-NEXT: itt ne ; CHECK-NEXT: vmovne r2, s0 @@ -560,23 +567,22 @@ define void @foo_trunc_v4i16_v4i32(<4 x i16> *%dest, <4 x i32> *%mask, <4 x i32> ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: vpt.s32 gt, q0, zr ; CHECK-NEXT: vldrwt.u32 q0, [r2] -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: and r2, r1, #1 +; CHECK-NEXT: vmrs r2, p0 +; CHECK-NEXT: and r1, r2, #1 +; CHECK-NEXT: rsbs r3, r1, #0 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: bfi r1, r3, #0, #1 +; CHECK-NEXT: ubfx r3, r2, #4, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r1, r3, #1, #1 +; CHECK-NEXT: ubfx r3, r2, #8, #1 +; CHECK-NEXT: ubfx r2, r2, #12, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r1, r3, #2, #1 ; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r3, r2, #0, #1 -; CHECK-NEXT: ubfx r2, r1, #4, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r3, r2, #1, #1 -; CHECK-NEXT: ubfx r2, r1, #8, #1 -; CHECK-NEXT: ubfx r1, r1, #12, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r3, r2, #2, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #3, #1 -; CHECK-NEXT: and r1, r3, #15 +; CHECK-NEXT: bfi r1, r2, #3, #1 ; CHECK-NEXT: lsls r2, r1, #31 ; CHECK-NEXT: itt ne ; CHECK-NEXT: vmovne r2, s0 diff --git a/test/CodeGen/Thumb2/mve-pred-bitcast.ll b/test/CodeGen/Thumb2/mve-pred-bitcast.ll index f8f2e0b5613..61a78eb722e 100644 --- a/test/CodeGen/Thumb2/mve-pred-bitcast.ll +++ b/test/CodeGen/Thumb2/mve-pred-bitcast.ll @@ -218,10 +218,10 @@ define arm_aapcs_vfpcc i4 @bitcast_from_v4i1(<4 x i32> %a) { ; CHECK-LE-NEXT: .pad #4 ; CHECK-LE-NEXT: sub sp, #4 ; CHECK-LE-NEXT: vcmp.i32 eq, q0, zr -; CHECK-LE-NEXT: movs r0, #0 ; CHECK-LE-NEXT: vmrs r1, p0 -; CHECK-LE-NEXT: and r2, r1, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 +; CHECK-LE-NEXT: and r0, r1, #1 +; CHECK-LE-NEXT: rsbs r2, r0, #0 +; CHECK-LE-NEXT: movs r0, #0 ; CHECK-LE-NEXT: bfi r0, r2, #0, #1 ; CHECK-LE-NEXT: ubfx r2, r1, #4, #1 ; CHECK-LE-NEXT: rsbs r2, r2, #0 @@ -232,7 +232,6 @@ define arm_aapcs_vfpcc i4 @bitcast_from_v4i1(<4 x i32> %a) { ; CHECK-LE-NEXT: bfi r0, r2, #2, #1 ; CHECK-LE-NEXT: rsbs r1, r1, #0 ; CHECK-LE-NEXT: bfi r0, r1, #3, #1 -; CHECK-LE-NEXT: and r0, r0, #15 ; CHECK-LE-NEXT: add sp, #4 ; CHECK-LE-NEXT: bx lr ; @@ -241,22 +240,21 @@ define arm_aapcs_vfpcc i4 @bitcast_from_v4i1(<4 x i32> %a) { ; CHECK-BE-NEXT: .pad #4 ; CHECK-BE-NEXT: sub sp, #4 ; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: movs r3, #0 ; CHECK-BE-NEXT: vcmp.i32 eq, q1, zr -; CHECK-BE-NEXT: vmrs r0, p0 -; CHECK-BE-NEXT: and r2, r0, #1 -; CHECK-BE-NEXT: ubfx r1, r0, #4, #1 +; CHECK-BE-NEXT: vmrs r1, p0 +; CHECK-BE-NEXT: and r0, r1, #1 +; CHECK-BE-NEXT: rsbs r2, r0, #0 +; CHECK-BE-NEXT: movs r0, #0 +; CHECK-BE-NEXT: bfi r0, r2, #0, #1 +; CHECK-BE-NEXT: ubfx r2, r1, #4, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 +; CHECK-BE-NEXT: bfi r0, r2, #1, #1 +; CHECK-BE-NEXT: ubfx r2, r1, #8, #1 +; CHECK-BE-NEXT: ubfx r1, r1, #12, #1 +; CHECK-BE-NEXT: rsbs r2, r2, #0 +; CHECK-BE-NEXT: bfi r0, r2, #2, #1 ; CHECK-BE-NEXT: rsbs r1, r1, #0 -; CHECK-BE-NEXT: bfi r3, r2, #0, #1 -; CHECK-BE-NEXT: bfi r3, r1, #1, #1 -; CHECK-BE-NEXT: ubfx r1, r0, #8, #1 -; CHECK-BE-NEXT: ubfx r0, r0, #12, #1 -; CHECK-BE-NEXT: rsbs r1, r1, #0 -; CHECK-BE-NEXT: bfi r3, r1, #2, #1 -; CHECK-BE-NEXT: rsbs r0, r0, #0 -; CHECK-BE-NEXT: bfi r3, r0, #3, #1 -; CHECK-BE-NEXT: and r0, r3, #15 +; CHECK-BE-NEXT: bfi r0, r1, #3, #1 ; CHECK-BE-NEXT: add sp, #4 ; CHECK-BE-NEXT: bx lr entry: @@ -271,10 +269,10 @@ define arm_aapcs_vfpcc i8 @bitcast_from_v8i1(<8 x i16> %a) { ; CHECK-LE-NEXT: .pad #8 ; CHECK-LE-NEXT: sub sp, #8 ; CHECK-LE-NEXT: vcmp.i16 eq, q0, zr -; CHECK-LE-NEXT: movs r0, #0 ; CHECK-LE-NEXT: vmrs r1, p0 -; CHECK-LE-NEXT: and r2, r1, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 +; CHECK-LE-NEXT: and r0, r1, #1 +; CHECK-LE-NEXT: rsbs r2, r0, #0 +; CHECK-LE-NEXT: movs r0, #0 ; CHECK-LE-NEXT: bfi r0, r2, #0, #1 ; CHECK-LE-NEXT: ubfx r2, r1, #2, #1 ; CHECK-LE-NEXT: rsbs r2, r2, #0 @@ -308,12 +306,12 @@ define arm_aapcs_vfpcc i8 @bitcast_from_v8i1(<8 x i16> %a) { ; CHECK-BE-NEXT: vrev64.16 q1, q0 ; CHECK-BE-NEXT: vcmp.i16 eq, q1, zr ; CHECK-BE-NEXT: vmrs r1, p0 -; CHECK-BE-NEXT: ubfx r0, r1, #2, #1 -; CHECK-BE-NEXT: rsbs r2, r0, #0 ; CHECK-BE-NEXT: and r0, r1, #1 -; CHECK-BE-NEXT: rsbs r3, r0, #0 +; CHECK-BE-NEXT: rsbs r2, r0, #0 ; CHECK-BE-NEXT: movs r0, #0 -; CHECK-BE-NEXT: bfi r0, r3, #0, #1 +; CHECK-BE-NEXT: bfi r0, r2, #0, #1 +; CHECK-BE-NEXT: ubfx r2, r1, #2, #1 +; CHECK-BE-NEXT: rsbs r2, r2, #0 ; CHECK-BE-NEXT: bfi r0, r2, #1, #1 ; CHECK-BE-NEXT: ubfx r2, r1, #4, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 diff --git a/test/CodeGen/Thumb2/mve-pred-loadstore.ll b/test/CodeGen/Thumb2/mve-pred-loadstore.ll index d314d0525c4..21af261170e 100644 --- a/test/CodeGen/Thumb2/mve-pred-loadstore.ll +++ b/test/CodeGen/Thumb2/mve-pred-loadstore.ll @@ -176,44 +176,42 @@ define arm_aapcs_vfpcc void @store_v4i1(<4 x i1> *%dst, <4 x i32> %a) { ; CHECK-LE-LABEL: store_v4i1: ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: vcmp.i32 eq, q0, zr -; CHECK-LE-NEXT: movs r1, #0 -; CHECK-LE-NEXT: vmrs r2, p0 -; CHECK-LE-NEXT: and r3, r2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r2, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-LE-NEXT: ubfx r2, r2, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #2, #1 +; CHECK-LE-NEXT: movs r3, #0 +; CHECK-LE-NEXT: vmrs r1, p0 +; CHECK-LE-NEXT: and r2, r1, #1 ; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r1, r2, #3, #1 -; CHECK-LE-NEXT: and r1, r1, #15 -; CHECK-LE-NEXT: strb r1, [r0] +; CHECK-LE-NEXT: bfi r3, r2, #0, #1 +; CHECK-LE-NEXT: ubfx r2, r1, #4, #1 +; CHECK-LE-NEXT: rsbs r2, r2, #0 +; CHECK-LE-NEXT: bfi r3, r2, #1, #1 +; CHECK-LE-NEXT: ubfx r2, r1, #8, #1 +; CHECK-LE-NEXT: ubfx r1, r1, #12, #1 +; CHECK-LE-NEXT: rsbs r2, r2, #0 +; CHECK-LE-NEXT: bfi r3, r2, #2, #1 +; CHECK-LE-NEXT: rsbs r1, r1, #0 +; CHECK-LE-NEXT: bfi r3, r1, #3, #1 +; CHECK-LE-NEXT: strb r3, [r0] ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: store_v4i1: ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: movs r3, #0 ; CHECK-BE-NEXT: vcmp.i32 eq, q1, zr ; CHECK-BE-NEXT: vmrs r1, p0 -; CHECK-BE-NEXT: and r3, r1, #1 +; CHECK-BE-NEXT: and r2, r1, #1 +; CHECK-BE-NEXT: rsbs r2, r2, #0 +; CHECK-BE-NEXT: bfi r3, r2, #0, #1 ; CHECK-BE-NEXT: ubfx r2, r1, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: rsb.w r12, r2, #0 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 +; CHECK-BE-NEXT: rsbs r2, r2, #0 +; CHECK-BE-NEXT: bfi r3, r2, #1, #1 +; CHECK-BE-NEXT: ubfx r2, r1, #8, #1 ; CHECK-BE-NEXT: ubfx r1, r1, #12, #1 -; CHECK-BE-NEXT: bfi r2, r12, #1, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 +; CHECK-BE-NEXT: rsbs r2, r2, #0 +; CHECK-BE-NEXT: bfi r3, r2, #2, #1 ; CHECK-BE-NEXT: rsbs r1, r1, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: bfi r2, r1, #3, #1 -; CHECK-BE-NEXT: and r1, r2, #15 -; CHECK-BE-NEXT: strb r1, [r0] +; CHECK-BE-NEXT: bfi r3, r1, #3, #1 +; CHECK-BE-NEXT: strb r3, [r0] ; CHECK-BE-NEXT: bx lr entry: %c = icmp eq <4 x i32> %a, zeroinitializer @@ -225,10 +223,10 @@ define arm_aapcs_vfpcc void @store_v8i1(<8 x i1> *%dst, <8 x i16> %a) { ; CHECK-LE-LABEL: store_v8i1: ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: vcmp.i16 eq, q0, zr -; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: vmrs r2, p0 -; CHECK-LE-NEXT: and r3, r2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 +; CHECK-LE-NEXT: and r1, r2, #1 +; CHECK-LE-NEXT: rsbs r3, r1, #0 +; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: bfi r1, r3, #0, #1 ; CHECK-LE-NEXT: ubfx r3, r2, #2, #1 ; CHECK-LE-NEXT: rsbs r3, r3, #0 @@ -259,14 +257,14 @@ define arm_aapcs_vfpcc void @store_v8i1(<8 x i1> *%dst, <8 x i16> %a) { ; CHECK-BE-NEXT: vrev64.16 q1, q0 ; CHECK-BE-NEXT: vcmp.i16 eq, q1, zr ; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: ubfx r1, r2, #2, #1 -; CHECK-BE-NEXT: rsb.w r12, r1, #0 ; CHECK-BE-NEXT: and r1, r2, #1 ; CHECK-BE-NEXT: rsbs r3, r1, #0 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: bfi r1, r3, #0, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #2, #1 +; CHECK-BE-NEXT: rsbs r3, r3, #0 +; CHECK-BE-NEXT: bfi r1, r3, #1, #1 ; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 -; CHECK-BE-NEXT: bfi r1, r12, #1, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #2, #1 ; CHECK-BE-NEXT: ubfx r3, r2, #6, #1 -- 2.11.4.GIT