From 63ca9371c7b4eb8fff01d998963cf901b658c7d4 Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Tue, 7 Sep 2021 10:21:38 +0800 Subject: [PATCH] [ARM] Implement target hook function to decide folding (mul (add x, c1), c2) Prevent the folding in DAGCombine if it leads to worse code. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D109124 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 25 +++ llvm/lib/Target/ARM/ARMISelLowering.h | 3 + llvm/test/CodeGen/ARM/addimm-mulimm.ll | 199 +++++++-------------- llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll | 114 ++++++------ .../test/CodeGen/Thumb/urem-seteq-illegal-types.ll | 26 ++- 5 files changed, 156 insertions(+), 211 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 08994019da79..492e928b5536 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -18793,6 +18793,31 @@ bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const { return AbsImm >= 0 && AbsImm <= 255; } +// Return false to prevent folding +// (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine, +// if the folding leads to worse code. +bool ARMTargetLowering::isMulAddWithConstProfitable( + const SDValue &AddNode, const SDValue &ConstNode) const { + // Let the DAGCombiner decide for vector types and large types. + const EVT VT = AddNode.getValueType(); + if (VT.isVector() || VT.getScalarSizeInBits() > 32) + return true; + + // It is worse if c0 is legal add immediate, while c1*c0 is not + // and has to be composed by at least two instructions. + const ConstantSDNode *C0Node = cast(AddNode.getOperand(1)); + const ConstantSDNode *C1Node = cast(ConstNode); + const int64_t C0 = C0Node->getSExtValue(); + APInt CA = C0Node->getAPIntValue() * C1Node->getAPIntValue(); + if (!isLegalAddImmediate(C0) || isLegalAddImmediate(CA.getSExtValue())) + return true; + if (ConstantMaterializationCost((unsigned)CA.getZExtValue(), Subtarget) > 1) + return false; + + // Default to true and let the DAGCombiner decide. + return true; +} + static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 360fe692efa3..0fddd58e178e 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -712,6 +712,9 @@ class VectorType; Align Alignment, const DataLayout &DL) const; + bool isMulAddWithConstProfitable(const SDValue &AddNode, + const SDValue &ConstNode) const override; + bool alignLoopsWithOptSize() const override; /// Returns the number of interleaved accesses that will be generated when diff --git a/llvm/test/CodeGen/ARM/addimm-mulimm.ll b/llvm/test/CodeGen/ARM/addimm-mulimm.ll index 2c0ec58261c4..d5c6bbfd3cff 100644 --- a/llvm/test/CodeGen/ARM/addimm-mulimm.ll +++ b/llvm/test/CodeGen/ARM/addimm-mulimm.ll @@ -243,10 +243,9 @@ define i16 @fold_sub301_mul19_i16(i16 %a) { define i32 @fold_add251_mul253_i32(i32 %a) { ; CHECK-ARMV6-LABEL: fold_add251_mul253_i32: ; CHECK-ARMV6: @ %bb.0: -; CHECK-ARMV6-NEXT: mov r1, #15 -; CHECK-ARMV6-NEXT: mov r2, #253 -; CHECK-ARMV6-NEXT: orr r1, r1, #63488 -; CHECK-ARMV6-NEXT: mla r0, r0, r2, r1 +; CHECK-ARMV6-NEXT: add r0, r0, #251 +; CHECK-ARMV6-NEXT: mov r1, #253 +; CHECK-ARMV6-NEXT: mul r0, r0, r1 ; CHECK-ARMV6-NEXT: bx lr ; ; CHECK-ARMV7-LABEL: fold_add251_mul253_i32: @@ -258,15 +257,10 @@ define i32 @fold_add251_mul253_i32(i32 %a) { ; ; CHECK-THUMBV6M-LABEL: fold_add251_mul253_i32: ; CHECK-THUMBV6M: @ %bb.0: +; CHECK-THUMBV6M-NEXT: adds r0, #251 ; CHECK-THUMBV6M-NEXT: movs r1, #253 -; CHECK-THUMBV6M-NEXT: muls r1, r0, r1 -; CHECK-THUMBV6M-NEXT: ldr r0, .LCPI8_0 -; CHECK-THUMBV6M-NEXT: adds r0, r1, r0 +; CHECK-THUMBV6M-NEXT: muls r0, r1, r0 ; CHECK-THUMBV6M-NEXT: bx lr -; CHECK-THUMBV6M-NEXT: .p2align 2 -; CHECK-THUMBV6M-NEXT: @ %bb.1: -; CHECK-THUMBV6M-NEXT: .LCPI8_0: -; CHECK-THUMBV6M-NEXT: .long 63503 @ 0xf80f ; ; CHECK-THUMBV7M-LABEL: fold_add251_mul253_i32: ; CHECK-THUMBV7M: @ %bb.0: @@ -282,9 +276,9 @@ define i32 @fold_add251_mul253_i32(i32 %a) { define i16 @fold_add251_mul253_i16(i16 %a) { ; CHECK-ARMV6-LABEL: fold_add251_mul253_i16: ; CHECK-ARMV6: @ %bb.0: -; CHECK-ARMV6-NEXT: mvn r1, #2032 -; CHECK-ARMV6-NEXT: mov r2, #253 -; CHECK-ARMV6-NEXT: mla r0, r0, r2, r1 +; CHECK-ARMV6-NEXT: add r0, r0, #251 +; CHECK-ARMV6-NEXT: mov r1, #253 +; CHECK-ARMV6-NEXT: mul r0, r0, r1 ; CHECK-ARMV6-NEXT: bx lr ; ; CHECK-ARMV7-LABEL: fold_add251_mul253_i16: @@ -297,15 +291,10 @@ define i16 @fold_add251_mul253_i16(i16 %a) { ; ; CHECK-THUMBV6M-LABEL: fold_add251_mul253_i16: ; CHECK-THUMBV6M: @ %bb.0: +; CHECK-THUMBV6M-NEXT: adds r0, #251 ; CHECK-THUMBV6M-NEXT: movs r1, #253 -; CHECK-THUMBV6M-NEXT: muls r1, r0, r1 -; CHECK-THUMBV6M-NEXT: ldr r0, .LCPI9_0 -; CHECK-THUMBV6M-NEXT: adds r0, r1, r0 +; CHECK-THUMBV6M-NEXT: muls r0, r1, r0 ; CHECK-THUMBV6M-NEXT: bx lr -; CHECK-THUMBV6M-NEXT: .p2align 2 -; CHECK-THUMBV6M-NEXT: @ %bb.1: -; CHECK-THUMBV6M-NEXT: .LCPI9_0: -; CHECK-THUMBV6M-NEXT: .long 4294965263 @ 0xfffff80f ; ; CHECK-THUMBV7M-LABEL: fold_add251_mul253_i16: ; CHECK-THUMBV7M: @ %bb.0: @@ -319,41 +308,19 @@ define i16 @fold_add251_mul253_i16(i16 %a) { } define i32 @fold_sub251_mul253_i32(i32 %a) { -; CHECK-ARMV6-LABEL: fold_sub251_mul253_i32: -; CHECK-ARMV6: @ %bb.0: -; CHECK-ARMV6-NEXT: mvn r1, #14 -; CHECK-ARMV6-NEXT: mov r2, #253 -; CHECK-ARMV6-NEXT: sub r1, r1, #63488 -; CHECK-ARMV6-NEXT: mla r0, r0, r2, r1 -; CHECK-ARMV6-NEXT: bx lr -; -; CHECK-ARMV7-LABEL: fold_sub251_mul253_i32: -; CHECK-ARMV7: @ %bb.0: -; CHECK-ARMV7-NEXT: mov r1, #253 -; CHECK-ARMV7-NEXT: mul r0, r0, r1 -; CHECK-ARMV7-NEXT: movw r1, #63503 -; CHECK-ARMV7-NEXT: sub r0, r0, r1 -; CHECK-ARMV7-NEXT: bx lr -; -; CHECK-THUMBV6M-LABEL: fold_sub251_mul253_i32: -; CHECK-THUMBV6M: @ %bb.0: -; CHECK-THUMBV6M-NEXT: movs r1, #253 -; CHECK-THUMBV6M-NEXT: muls r1, r0, r1 -; CHECK-THUMBV6M-NEXT: ldr r0, .LCPI10_0 -; CHECK-THUMBV6M-NEXT: adds r0, r1, r0 -; CHECK-THUMBV6M-NEXT: bx lr -; CHECK-THUMBV6M-NEXT: .p2align 2 -; CHECK-THUMBV6M-NEXT: @ %bb.1: -; CHECK-THUMBV6M-NEXT: .LCPI10_0: -; CHECK-THUMBV6M-NEXT: .long 4294903793 @ 0xffff07f1 +; CHECK-ARM-LABEL: fold_sub251_mul253_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: sub r0, r0, #251 +; CHECK-ARM-NEXT: mov r1, #253 +; CHECK-ARM-NEXT: mul r0, r0, r1 +; CHECK-ARM-NEXT: bx lr ; -; CHECK-THUMBV7M-LABEL: fold_sub251_mul253_i32: -; CHECK-THUMBV7M: @ %bb.0: -; CHECK-THUMBV7M-NEXT: movs r1, #253 -; CHECK-THUMBV7M-NEXT: muls r0, r1, r0 -; CHECK-THUMBV7M-NEXT: movw r1, #63503 -; CHECK-THUMBV7M-NEXT: subs r0, r0, r1 -; CHECK-THUMBV7M-NEXT: bx lr +; CHECK-THUMB-LABEL: fold_sub251_mul253_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, #251 +; CHECK-THUMB-NEXT: movs r1, #253 +; CHECK-THUMB-NEXT: muls r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr %b = add i32 %a, -251 %c = mul i32 %b, 253 ret i32 %c @@ -362,10 +329,9 @@ define i32 @fold_sub251_mul253_i32(i32 %a) { define i16 @fold_sub251_mul253_i16(i16 %a) { ; CHECK-ARMV6-LABEL: fold_sub251_mul253_i16: ; CHECK-ARMV6: @ %bb.0: -; CHECK-ARMV6-NEXT: mov r1, #241 -; CHECK-ARMV6-NEXT: mov r2, #253 -; CHECK-ARMV6-NEXT: orr r1, r1, #1792 -; CHECK-ARMV6-NEXT: mla r0, r0, r2, r1 +; CHECK-ARMV6-NEXT: sub r0, r0, #251 +; CHECK-ARMV6-NEXT: mov r1, #253 +; CHECK-ARMV6-NEXT: mul r0, r0, r1 ; CHECK-ARMV6-NEXT: bx lr ; ; CHECK-ARMV7-LABEL: fold_sub251_mul253_i16: @@ -377,15 +343,10 @@ define i16 @fold_sub251_mul253_i16(i16 %a) { ; ; CHECK-THUMBV6M-LABEL: fold_sub251_mul253_i16: ; CHECK-THUMBV6M: @ %bb.0: +; CHECK-THUMBV6M-NEXT: subs r0, #251 ; CHECK-THUMBV6M-NEXT: movs r1, #253 -; CHECK-THUMBV6M-NEXT: muls r1, r0, r1 -; CHECK-THUMBV6M-NEXT: ldr r0, .LCPI11_0 -; CHECK-THUMBV6M-NEXT: adds r0, r1, r0 +; CHECK-THUMBV6M-NEXT: muls r0, r1, r0 ; CHECK-THUMBV6M-NEXT: bx lr -; CHECK-THUMBV6M-NEXT: .p2align 2 -; CHECK-THUMBV6M-NEXT: @ %bb.1: -; CHECK-THUMBV6M-NEXT: .LCPI11_0: -; CHECK-THUMBV6M-NEXT: .long 2033 @ 0x7f1 ; ; CHECK-THUMBV7M-LABEL: fold_sub251_mul253_i16: ; CHECK-THUMBV7M: @ %bb.0: @@ -401,43 +362,32 @@ define i16 @fold_sub251_mul253_i16(i16 %a) { define i32 @fold_add251_mul353_i32(i32 %a) { ; CHECK-ARMV6-LABEL: fold_add251_mul353_i32: ; CHECK-ARMV6: @ %bb.0: -; CHECK-ARMV6-NEXT: mov r2, #97 -; CHECK-ARMV6-NEXT: ldr r1, .LCPI12_0 -; CHECK-ARMV6-NEXT: orr r2, r2, #256 -; CHECK-ARMV6-NEXT: mla r0, r0, r2, r1 +; CHECK-ARMV6-NEXT: mov r1, #97 +; CHECK-ARMV6-NEXT: add r0, r0, #251 +; CHECK-ARMV6-NEXT: orr r1, r1, #256 +; CHECK-ARMV6-NEXT: mul r0, r0, r1 ; CHECK-ARMV6-NEXT: bx lr -; CHECK-ARMV6-NEXT: .p2align 2 -; CHECK-ARMV6-NEXT: @ %bb.1: -; CHECK-ARMV6-NEXT: .LCPI12_0: -; CHECK-ARMV6-NEXT: .long 88603 @ 0x15a1b ; ; CHECK-ARMV7-LABEL: fold_add251_mul353_i32: ; CHECK-ARMV7: @ %bb.0: -; CHECK-ARMV7-NEXT: movw r1, #23067 -; CHECK-ARMV7-NEXT: movw r2, #353 -; CHECK-ARMV7-NEXT: movt r1, #1 -; CHECK-ARMV7-NEXT: mla r0, r0, r2, r1 +; CHECK-ARMV7-NEXT: add r0, r0, #251 +; CHECK-ARMV7-NEXT: movw r1, #353 +; CHECK-ARMV7-NEXT: mul r0, r0, r1 ; CHECK-ARMV7-NEXT: bx lr ; ; CHECK-THUMBV6M-LABEL: fold_add251_mul353_i32: ; CHECK-THUMBV6M: @ %bb.0: ; CHECK-THUMBV6M-NEXT: movs r1, #255 ; CHECK-THUMBV6M-NEXT: adds r1, #98 -; CHECK-THUMBV6M-NEXT: muls r1, r0, r1 -; CHECK-THUMBV6M-NEXT: ldr r0, .LCPI12_0 -; CHECK-THUMBV6M-NEXT: adds r0, r1, r0 +; CHECK-THUMBV6M-NEXT: adds r0, #251 +; CHECK-THUMBV6M-NEXT: muls r0, r1, r0 ; CHECK-THUMBV6M-NEXT: bx lr -; CHECK-THUMBV6M-NEXT: .p2align 2 -; CHECK-THUMBV6M-NEXT: @ %bb.1: -; CHECK-THUMBV6M-NEXT: .LCPI12_0: -; CHECK-THUMBV6M-NEXT: .long 88603 @ 0x15a1b ; ; CHECK-THUMBV7M-LABEL: fold_add251_mul353_i32: ; CHECK-THUMBV7M: @ %bb.0: -; CHECK-THUMBV7M-NEXT: movw r1, #23067 -; CHECK-THUMBV7M-NEXT: movw r2, #353 -; CHECK-THUMBV7M-NEXT: movt r1, #1 -; CHECK-THUMBV7M-NEXT: mla r0, r0, r2, r1 +; CHECK-THUMBV7M-NEXT: adds r0, #251 +; CHECK-THUMBV7M-NEXT: movw r1, #353 +; CHECK-THUMBV7M-NEXT: muls r0, r1, r0 ; CHECK-THUMBV7M-NEXT: bx lr %b = add i32 %a, 251 %c = mul i32 %b, 353 @@ -447,11 +397,10 @@ define i32 @fold_add251_mul353_i32(i32 %a) { define i16 @fold_add251_mul353_i16(i16 %a) { ; CHECK-ARMV6-LABEL: fold_add251_mul353_i16: ; CHECK-ARMV6: @ %bb.0: -; CHECK-ARMV6-NEXT: mov r2, #97 -; CHECK-ARMV6-NEXT: mov r1, #27 -; CHECK-ARMV6-NEXT: orr r2, r2, #256 -; CHECK-ARMV6-NEXT: orr r1, r1, #23040 -; CHECK-ARMV6-NEXT: mla r0, r0, r2, r1 +; CHECK-ARMV6-NEXT: mov r1, #97 +; CHECK-ARMV6-NEXT: add r0, r0, #251 +; CHECK-ARMV6-NEXT: orr r1, r1, #256 +; CHECK-ARMV6-NEXT: mul r0, r0, r1 ; CHECK-ARMV6-NEXT: bx lr ; ; CHECK-ARMV7-LABEL: fold_add251_mul353_i16: @@ -465,14 +414,9 @@ define i16 @fold_add251_mul353_i16(i16 %a) { ; CHECK-THUMBV6M: @ %bb.0: ; CHECK-THUMBV6M-NEXT: movs r1, #255 ; CHECK-THUMBV6M-NEXT: adds r1, #98 -; CHECK-THUMBV6M-NEXT: muls r1, r0, r1 -; CHECK-THUMBV6M-NEXT: ldr r0, .LCPI13_0 -; CHECK-THUMBV6M-NEXT: adds r0, r1, r0 +; CHECK-THUMBV6M-NEXT: adds r0, #251 +; CHECK-THUMBV6M-NEXT: muls r0, r1, r0 ; CHECK-THUMBV6M-NEXT: bx lr -; CHECK-THUMBV6M-NEXT: .p2align 2 -; CHECK-THUMBV6M-NEXT: @ %bb.1: -; CHECK-THUMBV6M-NEXT: .LCPI13_0: -; CHECK-THUMBV6M-NEXT: .long 23067 @ 0x5a1b ; ; CHECK-THUMBV7M-LABEL: fold_add251_mul353_i16: ; CHECK-THUMBV7M: @ %bb.0: @@ -488,43 +432,32 @@ define i16 @fold_add251_mul353_i16(i16 %a) { define i32 @fold_sub251_mul353_i32(i32 %a) { ; CHECK-ARMV6-LABEL: fold_sub251_mul353_i32: ; CHECK-ARMV6: @ %bb.0: -; CHECK-ARMV6-NEXT: mov r2, #97 -; CHECK-ARMV6-NEXT: ldr r1, .LCPI14_0 -; CHECK-ARMV6-NEXT: orr r2, r2, #256 -; CHECK-ARMV6-NEXT: mla r0, r0, r2, r1 +; CHECK-ARMV6-NEXT: mov r1, #97 +; CHECK-ARMV6-NEXT: sub r0, r0, #251 +; CHECK-ARMV6-NEXT: orr r1, r1, #256 +; CHECK-ARMV6-NEXT: mul r0, r0, r1 ; CHECK-ARMV6-NEXT: bx lr -; CHECK-ARMV6-NEXT: .p2align 2 -; CHECK-ARMV6-NEXT: @ %bb.1: -; CHECK-ARMV6-NEXT: .LCPI14_0: -; CHECK-ARMV6-NEXT: .long 4294878693 @ 0xfffea5e5 ; ; CHECK-ARMV7-LABEL: fold_sub251_mul353_i32: ; CHECK-ARMV7: @ %bb.0: -; CHECK-ARMV7-NEXT: movw r1, #42469 -; CHECK-ARMV7-NEXT: movw r2, #353 -; CHECK-ARMV7-NEXT: movt r1, #65534 -; CHECK-ARMV7-NEXT: mla r0, r0, r2, r1 +; CHECK-ARMV7-NEXT: sub r0, r0, #251 +; CHECK-ARMV7-NEXT: movw r1, #353 +; CHECK-ARMV7-NEXT: mul r0, r0, r1 ; CHECK-ARMV7-NEXT: bx lr ; ; CHECK-THUMBV6M-LABEL: fold_sub251_mul353_i32: ; CHECK-THUMBV6M: @ %bb.0: ; CHECK-THUMBV6M-NEXT: movs r1, #255 ; CHECK-THUMBV6M-NEXT: adds r1, #98 -; CHECK-THUMBV6M-NEXT: muls r1, r0, r1 -; CHECK-THUMBV6M-NEXT: ldr r0, .LCPI14_0 -; CHECK-THUMBV6M-NEXT: adds r0, r1, r0 +; CHECK-THUMBV6M-NEXT: subs r0, #251 +; CHECK-THUMBV6M-NEXT: muls r0, r1, r0 ; CHECK-THUMBV6M-NEXT: bx lr -; CHECK-THUMBV6M-NEXT: .p2align 2 -; CHECK-THUMBV6M-NEXT: @ %bb.1: -; CHECK-THUMBV6M-NEXT: .LCPI14_0: -; CHECK-THUMBV6M-NEXT: .long 4294878693 @ 0xfffea5e5 ; ; CHECK-THUMBV7M-LABEL: fold_sub251_mul353_i32: ; CHECK-THUMBV7M: @ %bb.0: -; CHECK-THUMBV7M-NEXT: movw r1, #42469 -; CHECK-THUMBV7M-NEXT: movw r2, #353 -; CHECK-THUMBV7M-NEXT: movt r1, #65534 -; CHECK-THUMBV7M-NEXT: mla r0, r0, r2, r1 +; CHECK-THUMBV7M-NEXT: subs r0, #251 +; CHECK-THUMBV7M-NEXT: movw r1, #353 +; CHECK-THUMBV7M-NEXT: muls r0, r1, r0 ; CHECK-THUMBV7M-NEXT: bx lr %b = add i32 %a, -251 %c = mul i32 %b, 353 @@ -534,11 +467,10 @@ define i32 @fold_sub251_mul353_i32(i32 %a) { define i16 @fold_sub251_mul353_i16(i16 %a) { ; CHECK-ARMV6-LABEL: fold_sub251_mul353_i16: ; CHECK-ARMV6: @ %bb.0: -; CHECK-ARMV6-NEXT: mov r2, #97 -; CHECK-ARMV6-NEXT: mvn r1, #26 -; CHECK-ARMV6-NEXT: orr r2, r2, #256 -; CHECK-ARMV6-NEXT: sub r1, r1, #23040 -; CHECK-ARMV6-NEXT: mla r0, r0, r2, r1 +; CHECK-ARMV6-NEXT: mov r1, #97 +; CHECK-ARMV6-NEXT: sub r0, r0, #251 +; CHECK-ARMV6-NEXT: orr r1, r1, #256 +; CHECK-ARMV6-NEXT: mul r0, r0, r1 ; CHECK-ARMV6-NEXT: bx lr ; ; CHECK-ARMV7-LABEL: fold_sub251_mul353_i16: @@ -553,14 +485,9 @@ define i16 @fold_sub251_mul353_i16(i16 %a) { ; CHECK-THUMBV6M: @ %bb.0: ; CHECK-THUMBV6M-NEXT: movs r1, #255 ; CHECK-THUMBV6M-NEXT: adds r1, #98 -; CHECK-THUMBV6M-NEXT: muls r1, r0, r1 -; CHECK-THUMBV6M-NEXT: ldr r0, .LCPI15_0 -; CHECK-THUMBV6M-NEXT: adds r0, r1, r0 +; CHECK-THUMBV6M-NEXT: subs r0, #251 +; CHECK-THUMBV6M-NEXT: muls r0, r1, r0 ; CHECK-THUMBV6M-NEXT: bx lr -; CHECK-THUMBV6M-NEXT: .p2align 2 -; CHECK-THUMBV6M-NEXT: @ %bb.1: -; CHECK-THUMBV6M-NEXT: .LCPI15_0: -; CHECK-THUMBV6M-NEXT: .long 4294944229 @ 0xffffa5e5 ; ; CHECK-THUMBV7M-LABEL: fold_sub251_mul353_i16: ; CHECK-THUMBV7M: @ %bb.0: diff --git a/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll index dd20fd07dff0..8900d5f541e8 100644 --- a/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll @@ -329,85 +329,81 @@ define i1 @test_urem_negative_odd(i9 %X) nounwind { define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { ; ARM5-LABEL: test_urem_vec: ; ARM5: @ %bb.0: -; ARM5-NEXT: push {r4, r5, r11, lr} -; ARM5-NEXT: mov r3, #183 -; ARM5-NEXT: mvn r12, #182 -; ARM5-NEXT: orr r3, r3, #1280 -; ARM5-NEXT: sub r12, r12, #1280 -; ARM5-NEXT: mov r4, #51 -; ARM5-NEXT: mla lr, r1, r3, r12 +; ARM5-NEXT: push {r4, lr} +; ARM5-NEXT: mov r3, #171 +; ARM5-NEXT: orr r3, r3, #512 +; ARM5-NEXT: mul r12, r0, r3 +; ARM5-NEXT: mov r0, #1020 +; ARM5-NEXT: orr r0, r0, #1024 +; ARM5-NEXT: mov r3, #254 +; ARM5-NEXT: orr r3, r3, #1792 +; ARM5-NEXT: and r0, r12, r0 +; ARM5-NEXT: lsr r0, r0, #1 +; ARM5-NEXT: orr r0, r0, r12, lsl #10 +; ARM5-NEXT: sub r12, r1, #1 +; ARM5-NEXT: mov r1, #183 +; ARM5-NEXT: and r0, r0, r3 +; ARM5-NEXT: orr r1, r1, #1280 +; ARM5-NEXT: mov r3, #0 +; ARM5-NEXT: lsr r0, r0, #1 +; ARM5-NEXT: cmp r0, #170 +; ARM5-NEXT: mul lr, r12, r1 ; ARM5-NEXT: mov r12, #255 ; ARM5-NEXT: orr r12, r12, #1792 -; ARM5-NEXT: orr r4, r4, #768 -; ARM5-NEXT: mov r3, #0 +; ARM5-NEXT: mov r0, #0 +; ARM5-NEXT: movhi r0, #1 ; ARM5-NEXT: and r1, lr, r12 -; ARM5-NEXT: mvn lr, #101 -; ARM5-NEXT: sub lr, lr, #1536 +; ARM5-NEXT: sub lr, r2, #2 +; ARM5-NEXT: mov r2, #51 ; ARM5-NEXT: cmp r1, #292 -; ARM5-NEXT: mla r5, r2, r4, lr +; ARM5-NEXT: orr r2, r2, #768 ; ARM5-NEXT: mov r1, #0 ; ARM5-NEXT: movhi r1, #1 -; ARM5-NEXT: and r2, r5, r12 -; ARM5-NEXT: mov r5, #171 -; ARM5-NEXT: orr r5, r5, #512 +; ARM5-NEXT: mul r4, lr, r2 +; ARM5-NEXT: and r2, r4, r12 ; ARM5-NEXT: cmp r2, #1 -; ARM5-NEXT: mov r2, #0 -; ARM5-NEXT: mul r4, r0, r5 -; ARM5-NEXT: mov r0, #1020 -; ARM5-NEXT: orr r0, r0, #1024 -; ARM5-NEXT: mov r5, #254 -; ARM5-NEXT: movhi r2, #1 -; ARM5-NEXT: orr r5, r5, #1792 -; ARM5-NEXT: and r0, r4, r0 -; ARM5-NEXT: lsr r0, r0, #1 -; ARM5-NEXT: orr r0, r0, r4, lsl #10 -; ARM5-NEXT: and r0, r0, r5 -; ARM5-NEXT: lsr r0, r0, #1 -; ARM5-NEXT: cmp r0, #170 ; ARM5-NEXT: movhi r3, #1 -; ARM5-NEXT: mov r0, r3 -; ARM5-NEXT: pop {r4, r5, r11, pc} +; ARM5-NEXT: mov r2, r3 +; ARM5-NEXT: pop {r4, pc} ; ; ARM6-LABEL: test_urem_vec: ; ARM6: @ %bb.0: -; ARM6-NEXT: push {r4, lr} -; ARM6-NEXT: mov r4, #51 -; ARM6-NEXT: mvn lr, #101 -; ARM6-NEXT: orr r4, r4, #768 -; ARM6-NEXT: sub lr, lr, #1536 -; ARM6-NEXT: mov r3, #183 -; ARM6-NEXT: mvn r12, #182 -; ARM6-NEXT: mla r2, r2, r4, lr -; ARM6-NEXT: mov r4, #171 -; ARM6-NEXT: orr r4, r4, #512 -; ARM6-NEXT: orr r3, r3, #1280 -; ARM6-NEXT: sub r12, r12, #1280 -; ARM6-NEXT: mul r0, r0, r4 -; ARM6-NEXT: mov r4, #1020 -; ARM6-NEXT: orr r4, r4, #1024 -; ARM6-NEXT: mla r1, r1, r3, r12 +; ARM6-NEXT: push {r11, lr} +; ARM6-NEXT: mov r3, #171 +; ARM6-NEXT: sub r12, r1, #1 +; ARM6-NEXT: orr r3, r3, #512 +; ARM6-NEXT: mov r1, #183 +; ARM6-NEXT: orr r1, r1, #1280 +; ARM6-NEXT: sub lr, r2, #2 +; ARM6-NEXT: mul r0, r0, r3 +; ARM6-NEXT: mov r3, #1020 +; ARM6-NEXT: orr r3, r3, #1024 +; ARM6-NEXT: mov r2, #51 +; ARM6-NEXT: mul r1, r12, r1 +; ARM6-NEXT: orr r2, r2, #768 ; ARM6-NEXT: mov r12, #255 +; ARM6-NEXT: and r3, r0, r3 +; ARM6-NEXT: mul r2, lr, r2 ; ARM6-NEXT: orr r12, r12, #1792 +; ARM6-NEXT: lsr r3, r3, #1 +; ARM6-NEXT: orr r0, r3, r0, lsl #10 +; ARM6-NEXT: mov r3, #254 +; ARM6-NEXT: and r1, r1, r12 +; ARM6-NEXT: orr r3, r3, #1792 +; ARM6-NEXT: and r0, r0, r3 ; ARM6-NEXT: and r2, r2, r12 ; ARM6-NEXT: mov r3, #0 -; ARM6-NEXT: and r4, r0, r4 -; ARM6-NEXT: lsr r4, r4, #1 -; ARM6-NEXT: orr r0, r4, r0, lsl #10 -; ARM6-NEXT: mov r4, #254 -; ARM6-NEXT: and r1, r1, r12 -; ARM6-NEXT: orr r4, r4, #1792 +; ARM6-NEXT: lsr r0, r0, #1 +; ARM6-NEXT: cmp r0, #170 +; ARM6-NEXT: mov r0, #0 +; ARM6-NEXT: movhi r0, #1 ; ARM6-NEXT: cmp r1, #292 ; ARM6-NEXT: mov r1, #0 -; ARM6-NEXT: and r0, r0, r4 ; ARM6-NEXT: movhi r1, #1 ; ARM6-NEXT: cmp r2, #1 -; ARM6-NEXT: mov r2, #0 -; ARM6-NEXT: lsr r0, r0, #1 -; ARM6-NEXT: movhi r2, #1 -; ARM6-NEXT: cmp r0, #170 ; ARM6-NEXT: movhi r3, #1 -; ARM6-NEXT: mov r0, r3 -; ARM6-NEXT: pop {r4, pc} +; ARM6-NEXT: mov r2, r3 +; ARM6-NEXT: pop {r11, pc} ; ; ARM7-LABEL: test_urem_vec: ; ARM7: @ %bb.0: diff --git a/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll index 571f8e5026e5..d2470f546a5a 100644 --- a/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll @@ -128,26 +128,24 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: movs r0, r4 ; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: subs r1, r1, #1 ; CHECK-NEXT: ldr r5, .LCPI4_3 ; CHECK-NEXT: muls r5, r1, r5 -; CHECK-NEXT: ldr r1, .LCPI4_4 -; CHECK-NEXT: adds r1, r5, r1 -; CHECK-NEXT: movs r5, #73 -; CHECK-NEXT: lsls r5, r5, #23 -; CHECK-NEXT: cmp r1, r5 +; CHECK-NEXT: movs r1, #73 +; CHECK-NEXT: lsls r1, r1, #23 +; CHECK-NEXT: cmp r5, r1 ; CHECK-NEXT: push {r3} ; CHECK-NEXT: pop {r1} ; CHECK-NEXT: bhi .LBB4_4 ; CHECK-NEXT: @ %bb.3: ; CHECK-NEXT: movs r1, r4 ; CHECK-NEXT: .LBB4_4: -; CHECK-NEXT: ldr r5, .LCPI4_5 +; CHECK-NEXT: subs r2, r2, #2 +; CHECK-NEXT: ldr r5, .LCPI4_4 ; CHECK-NEXT: muls r5, r2, r5 -; CHECK-NEXT: ldr r2, .LCPI4_6 -; CHECK-NEXT: adds r2, r5, r2 -; CHECK-NEXT: ldr r5, .LCPI4_7 -; CHECK-NEXT: ands r5, r2 -; CHECK-NEXT: cmp r5, #1 +; CHECK-NEXT: ldr r2, .LCPI4_5 +; CHECK-NEXT: ands r2, r5 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: bhi .LBB4_6 ; CHECK-NEXT: @ %bb.5: ; CHECK-NEXT: movs r3, r4 @@ -167,12 +165,8 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { ; CHECK-NEXT: .LCPI4_3: ; CHECK-NEXT: .long 3068133376 @ 0xb6e00000 ; CHECK-NEXT: .LCPI4_4: -; CHECK-NEXT: .long 1226833920 @ 0x49200000 -; CHECK-NEXT: .LCPI4_5: ; CHECK-NEXT: .long 819 @ 0x333 -; CHECK-NEXT: .LCPI4_6: -; CHECK-NEXT: .long 4294965658 @ 0xfffff99a -; CHECK-NEXT: .LCPI4_7: +; CHECK-NEXT: .LCPI4_5: ; CHECK-NEXT: .long 2047 @ 0x7ff %urem = urem <3 x i11> %X, %cmp = icmp ne <3 x i11> %urem, -- 2.11.4.GIT