lib/Target/ARM/ARMSelectionDAGInfo.cpp

   1 //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements the ARMSelectionDAGInfo class.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "ARMTargetMachine.h"
  14 #include "llvm/CodeGen/SelectionDAG.h"
  15 #include "llvm/IR/DerivedTypes.h"
  16 using namespace llvm;
  17
  18 #define DEBUG_TYPE "arm-selectiondag-info"
  19
  20 // Emit, if possible, a specialized version of the given Libcall. Typically this
  21 // means selecting the appropriately aligned version, but we also convert memset
  22 // of 0 into memclr.
  23 SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
  24     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
  25     SDValue Size, unsigned Align, RTLIB::Libcall LC) const {
  26   const ARMSubtarget &Subtarget =
  27       DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
  28   const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
  29
  30   // Only use a specialized AEABI function if the default version of this
  31   // Libcall is an AEABI function.
  32   if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
  33     return SDValue();
  34
  35   // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
  36   // able to translate memset to memclr and use the value to index the function
  37   // name array.
  38   enum {
  39     AEABI_MEMCPY = 0,
  40     AEABI_MEMMOVE,
  41     AEABI_MEMSET,
  42     AEABI_MEMCLR
  43   } AEABILibcall;
  44   switch (LC) {
  45   case RTLIB::MEMCPY:
  46     AEABILibcall = AEABI_MEMCPY;
  47     break;
  48   case RTLIB::MEMMOVE:
  49     AEABILibcall = AEABI_MEMMOVE;
  50     break;
  51   case RTLIB::MEMSET:
  52     AEABILibcall = AEABI_MEMSET;
  53     if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
  54       if (ConstantSrc->getZExtValue() == 0)
  55         AEABILibcall = AEABI_MEMCLR;
  56     break;
  57   default:
  58     return SDValue();
  59   }
  60
  61   // Choose the most-aligned libcall variant that we can
  62   enum {
  63     ALIGN1 = 0,
  64     ALIGN4,
  65     ALIGN8
  66   } AlignVariant;
  67   if ((Align & 7) == 0)
  68     AlignVariant = ALIGN8;
  69   else if ((Align & 3) == 0)
  70     AlignVariant = ALIGN4;
  71   else
  72     AlignVariant = ALIGN1;
  73
  74   TargetLowering::ArgListTy Args;
  75   TargetLowering::ArgListEntry Entry;
  76   Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
  77   Entry.Node = Dst;
  78   Args.push_back(Entry);
  79   if (AEABILibcall == AEABI_MEMCLR) {
  80     Entry.Node = Size;
  81     Args.push_back(Entry);
  82   } else if (AEABILibcall == AEABI_MEMSET) {
  83     // Adjust parameters for memset, EABI uses format (ptr, size, value),
  84     // GNU library uses (ptr, value, size)
  85     // See RTABI section 4.3.4
  86     Entry.Node = Size;
  87     Args.push_back(Entry);
  88
  89     // Extend or truncate the argument to be an i32 value for the call.
  90     if (Src.getValueType().bitsGT(MVT::i32))
  91       Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
  92     else if (Src.getValueType().bitsLT(MVT::i32))
  93       Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
  94
  95     Entry.Node = Src;
  96     Entry.Ty = Type::getInt32Ty(*DAG.getContext());
  97     Entry.IsSExt = false;
  98     Args.push_back(Entry);
  99   } else {
 100     Entry.Node = Src;
 101     Args.push_back(Entry);
 102
 103     Entry.Node = Size;
 104     Args.push_back(Entry);
 105   }
 106
 107   char const *FunctionNames[4][3] = {
 108     { "__aeabi_memcpy",  "__aeabi_memcpy4",  "__aeabi_memcpy8"  },
 109     { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
 110     { "__aeabi_memset",  "__aeabi_memset4",  "__aeabi_memset8"  },
 111     { "__aeabi_memclr",  "__aeabi_memclr4",  "__aeabi_memclr8"  }
 112   };
 113   TargetLowering::CallLoweringInfo CLI(DAG);
 114   CLI.setDebugLoc(dl)
 115       .setChain(Chain)
 116       .setLibCallee(
 117           TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
 118           DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
 119                                 TLI->getPointerTy(DAG.getDataLayout())),
 120           std::move(Args))
 121       .setDiscardResult();
 122   std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
 123
 124   return CallResult.second;
 125 }
 126
 127 SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
 128     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
 129     SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
 130     MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
 131   const ARMSubtarget &Subtarget =
 132       DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
 133   // Do repeated 4-byte loads and stores. To be improved.
 134   // This requires 4-byte alignment.
 135   if ((Align & 3) != 0)
 136     return SDValue();
 137   // This requires the copy size to be a constant, preferably
 138   // within a subtarget-specific limit.
 139   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
 140   if (!ConstantSize)
 141     return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
 142                                   RTLIB::MEMCPY);
 143   uint64_t SizeVal = ConstantSize->getZExtValue();
 144   if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
 145     return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
 146                                   RTLIB::MEMCPY);
 147
 148   unsigned BytesLeft = SizeVal & 3;
 149   unsigned NumMemOps = SizeVal >> 2;
 150   unsigned EmittedNumMemOps = 0;
 151   EVT VT = MVT::i32;
 152   unsigned VTSize = 4;
 153   unsigned i = 0;
 154   // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
 155   const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
 156   SDValue TFOps[6];
 157   SDValue Loads[6];
 158   uint64_t SrcOff = 0, DstOff = 0;
 159
 160   // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
 161   // VLDM/VSTM and make this code emit it when appropriate. This would reduce
 162   // pressure on the general purpose registers. However this seems harder to map
 163   // onto the register allocator's view of the world.
 164
 165   // The number of MEMCPY pseudo-instructions to emit. We use up to
 166   // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
 167   // later on. This is a lower bound on the number of MEMCPY operations we must
 168   // emit.
 169   unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
 170
 171   // Code size optimisation: do not inline memcpy if expansion results in
 172   // more instructions than the libary call.
 173   if (NumMEMCPYs > 1 && Subtarget.hasMinSize()) {
 174     return SDValue();
 175   }
 176
 177   SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
 178
 179   for (unsigned I = 0; I != NumMEMCPYs; ++I) {
 180     // Evenly distribute registers among MEMCPY operations to reduce register
 181     // pressure.
 182     unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
 183     unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
 184
 185     Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
 186                       DAG.getConstant(NumRegs, dl, MVT::i32));
 187     Src = Dst.getValue(1);
 188     Chain = Dst.getValue(2);
 189
 190     DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
 191     SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
 192
 193     EmittedNumMemOps = NextEmittedNumMemOps;
 194   }
 195
 196   if (BytesLeft == 0)
 197     return Chain;
 198
 199   // Issue loads / stores for the trailing (1 - 3) bytes.
 200   auto getRemainingValueType = [](unsigned BytesLeft) {
 201     return (BytesLeft >= 2) ? MVT::i16 : MVT::i8;
 202   };
 203   auto getRemainingSize = [](unsigned BytesLeft) {
 204     return (BytesLeft >= 2) ? 2 : 1;
 205   };
 206
 207   unsigned BytesLeftSave = BytesLeft;
 208   i = 0;
 209   while (BytesLeft) {
 210     VT = getRemainingValueType(BytesLeft);
 211     VTSize = getRemainingSize(BytesLeft);
 212     Loads[i] = DAG.getLoad(VT, dl, Chain,
 213                            DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
 214                                        DAG.getConstant(SrcOff, dl, MVT::i32)),
 215                            SrcPtrInfo.getWithOffset(SrcOff));
 216     TFOps[i] = Loads[i].getValue(1);
 217     ++i;
 218     SrcOff += VTSize;
 219     BytesLeft -= VTSize;
 220   }
 221   Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
 222                       makeArrayRef(TFOps, i));
 223
 224   i = 0;
 225   BytesLeft = BytesLeftSave;
 226   while (BytesLeft) {
 227     VT = getRemainingValueType(BytesLeft);
 228     VTSize = getRemainingSize(BytesLeft);
 229     TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
 230                             DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
 231                                         DAG.getConstant(DstOff, dl, MVT::i32)),
 232                             DstPtrInfo.getWithOffset(DstOff));
 233     ++i;
 234     DstOff += VTSize;
 235     BytesLeft -= VTSize;
 236   }
 237   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
 238                      makeArrayRef(TFOps, i));
 239 }
 240
 241 SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove(
 242     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
 243     SDValue Size, unsigned Align, bool isVolatile,
 244     MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
 245   return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
 246                                 RTLIB::MEMMOVE);
 247 }
 248
 249 SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset(
 250     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
 251     SDValue Size, unsigned Align, bool isVolatile,
 252     MachinePointerInfo DstPtrInfo) const {
 253   return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
 254                                 RTLIB::MEMSET);
 255 }