From 076dbc02724681c7d3664959d5ae742099b7edb6 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 4 Jan 2024 16:54:38 +0000 Subject: [PATCH] [X86] SimplifyDemandedVectorEltsForTargetNode - add X86ISD::VZEXT_LOAD handling. Simplify to a scalar_to_vector(load()) if we don't demand any of the upper vector elements. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 14 ++++++++++++++ llvm/test/CodeGen/X86/buildvec-insertvec.ll | 6 ++---- llvm/test/CodeGen/X86/fminimum-fmaximum.ll | 5 ++--- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e0679f5f27d8..fe3ba2ae2917 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -41348,6 +41348,20 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( return TLO.CombineTo(Op, Src); break; } + case X86ISD::VZEXT_LOAD: { + // If upper demanded elements are not demanded then simplify to a + // scalar_to_vector(load()). + MVT SVT = VT.getSimpleVT().getVectorElementType(); + if (DemandedElts == 1 && Op.getValue(1).use_empty() && isTypeLegal(SVT)) { + SDLoc DL(Op); + auto *Mem = cast(Op); + SDValue Elt = TLO.DAG.getLoad(SVT, DL, Mem->getChain(), Mem->getBasePtr(), + Mem->getMemOperand()); + SDValue Vec = TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Elt); + return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Vec)); + } + break; + } case X86ISD::VBROADCAST: { SDValue Src = Op.getOperand(0); MVT SrcVT = Src.getSimpleValueType(); diff --git a/llvm/test/CodeGen/X86/buildvec-insertvec.ll b/llvm/test/CodeGen/X86/buildvec-insertvec.ll index a3568716edd9..3fdfde8576f7 100644 --- a/llvm/test/CodeGen/X86/buildvec-insertvec.ll +++ b/llvm/test/CodeGen/X86/buildvec-insertvec.ll @@ -799,9 +799,8 @@ define i32 @PR46586(ptr %p, <4 x i32> %v) { ; ; SSE41-LABEL: PR46586: ; SSE41: # %bb.0: -; SSE41-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSE41-NEXT: movzbl 3(%rdi), %eax ; SSE41-NEXT: extractps $3, %xmm0, %ecx -; SSE41-NEXT: pextrb $3, %xmm1, %eax ; SSE41-NEXT: xorl %edx, %edx ; SSE41-NEXT: divl %ecx ; SSE41-NEXT: movl %edx, %eax @@ -809,9 +808,8 @@ define i32 @PR46586(ptr %p, <4 x i32> %v) { ; ; AVX-LABEL: PR46586: ; AVX: # %bb.0: -; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: movzbl 3(%rdi), %eax ; AVX-NEXT: vextractps $3, %xmm0, %ecx -; AVX-NEXT: vpextrb $3, %xmm1, %eax ; AVX-NEXT: xorl %edx, %edx ; AVX-NEXT: divl %ecx ; AVX-NEXT: movl %edx, %eax diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll index 5bb5d1e9c17e..8905d2bce5e9 100644 --- a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll +++ b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll @@ -699,10 +699,9 @@ define double @test_fminimum_nnan(double %x, double %y) "no-nans-fp-math"="true" ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp -; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; X86-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; X86-NEXT: vextractps $1, %xmm2, %eax +; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: vextractps $1, %xmm0, %eax ; X86-NEXT: testl %eax, %eax ; X86-NEXT: js .LBB14_1 ; X86-NEXT: # %bb.2: -- 2.11.4.GIT