llvm/lib/Target/AVR/AVRISelLowering.cpp

   1 //===-- AVRISelLowering.cpp - AVR DAG Lowering Implementation -------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines the interfaces that AVR uses to lower LLVM code into a
  10 // selection DAG.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "AVRISelLowering.h"
  15
  16 #include "llvm/ADT/ArrayRef.h"
  17 #include "llvm/ADT/StringSwitch.h"
  18 #include "llvm/CodeGen/CallingConvLower.h"
  19 #include "llvm/CodeGen/MachineFrameInfo.h"
  20 #include "llvm/CodeGen/MachineInstrBuilder.h"
  21 #include "llvm/CodeGen/MachineRegisterInfo.h"
  22 #include "llvm/CodeGen/SelectionDAG.h"
  23 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  24 #include "llvm/IR/Function.h"
  25 #include "llvm/Support/ErrorHandling.h"
  26
  27 #include "AVR.h"
  28 #include "AVRMachineFunctionInfo.h"
  29 #include "AVRSubtarget.h"
  30 #include "AVRTargetMachine.h"
  31 #include "MCTargetDesc/AVRMCTargetDesc.h"
  32
  33 namespace llvm {
  34
  35 AVRTargetLowering::AVRTargetLowering(const AVRTargetMachine &TM,
  36                                      const AVRSubtarget &STI)
  37     : TargetLowering(TM), Subtarget(STI) {
  38   // Set up the register classes.
  39   addRegisterClass(MVT::i8, &AVR::GPR8RegClass);
  40   addRegisterClass(MVT::i16, &AVR::DREGSRegClass);
  41
  42   // Compute derived properties from the register classes.
  43   computeRegisterProperties(Subtarget.getRegisterInfo());
  44
  45   setBooleanContents(ZeroOrOneBooleanContent);
  46   setBooleanVectorContents(ZeroOrOneBooleanContent);
  47   setSchedulingPreference(Sched::RegPressure);
  48   setStackPointerRegisterToSaveRestore(AVR::SP);
  49   setSupportsUnalignedAtomics(true);
  50
  51   setOperationAction(ISD::GlobalAddress, MVT::i16, Custom);
  52   setOperationAction(ISD::BlockAddress, MVT::i16, Custom);
  53
  54   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
  55   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
  56   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i8, Expand);
  57   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i16, Expand);
  58
  59   setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
  60
  61   for (MVT VT : MVT::integer_valuetypes()) {
  62     for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
  63       setLoadExtAction(N, VT, MVT::i1, Promote);
  64       setLoadExtAction(N, VT, MVT::i8, Expand);
  65     }
  66   }
  67
  68   setTruncStoreAction(MVT::i16, MVT::i8, Expand);
  69
  70   for (MVT VT : MVT::integer_valuetypes()) {
  71     setOperationAction(ISD::ADDC, VT, Legal);
  72     setOperationAction(ISD::SUBC, VT, Legal);
  73     setOperationAction(ISD::ADDE, VT, Legal);
  74     setOperationAction(ISD::SUBE, VT, Legal);
  75   }
  76
  77   // sub (x, imm) gets canonicalized to add (x, -imm), so for illegal types
  78   // revert into a sub since we don't have an add with immediate instruction.
  79   setOperationAction(ISD::ADD, MVT::i32, Custom);
  80   setOperationAction(ISD::ADD, MVT::i64, Custom);
  81
  82   // our shift instructions are only able to shift 1 bit at a time, so handle
  83   // this in a custom way.
  84   setOperationAction(ISD::SRA, MVT::i8, Custom);
  85   setOperationAction(ISD::SHL, MVT::i8, Custom);
  86   setOperationAction(ISD::SRL, MVT::i8, Custom);
  87   setOperationAction(ISD::SRA, MVT::i16, Custom);
  88   setOperationAction(ISD::SHL, MVT::i16, Custom);
  89   setOperationAction(ISD::SRL, MVT::i16, Custom);
  90   setOperationAction(ISD::SRA, MVT::i32, Custom);
  91   setOperationAction(ISD::SHL, MVT::i32, Custom);
  92   setOperationAction(ISD::SRL, MVT::i32, Custom);
  93   setOperationAction(ISD::SHL_PARTS, MVT::i16, Expand);
  94   setOperationAction(ISD::SRA_PARTS, MVT::i16, Expand);
  95   setOperationAction(ISD::SRL_PARTS, MVT::i16, Expand);
  96
  97   setOperationAction(ISD::ROTL, MVT::i8, Custom);
  98   setOperationAction(ISD::ROTL, MVT::i16, Expand);
  99   setOperationAction(ISD::ROTR, MVT::i8, Custom);
 100   setOperationAction(ISD::ROTR, MVT::i16, Expand);
 101
 102   setOperationAction(ISD::BR_CC, MVT::i8, Custom);
 103   setOperationAction(ISD::BR_CC, MVT::i16, Custom);
 104   setOperationAction(ISD::BR_CC, MVT::i32, Custom);
 105   setOperationAction(ISD::BR_CC, MVT::i64, Custom);
 106   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
 107
 108   setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
 109   setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
 110   setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
 111   setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
 112   setOperationAction(ISD::SETCC, MVT::i8, Custom);
 113   setOperationAction(ISD::SETCC, MVT::i16, Custom);
 114   setOperationAction(ISD::SETCC, MVT::i32, Custom);
 115   setOperationAction(ISD::SETCC, MVT::i64, Custom);
 116   setOperationAction(ISD::SELECT, MVT::i8, Expand);
 117   setOperationAction(ISD::SELECT, MVT::i16, Expand);
 118
 119   setOperationAction(ISD::BSWAP, MVT::i16, Expand);
 120
 121   // Add support for postincrement and predecrement load/stores.
 122   setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
 123   setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
 124   setIndexedLoadAction(ISD::PRE_DEC, MVT::i8, Legal);
 125   setIndexedLoadAction(ISD::PRE_DEC, MVT::i16, Legal);
 126   setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal);
 127   setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal);
 128   setIndexedStoreAction(ISD::PRE_DEC, MVT::i8, Legal);
 129   setIndexedStoreAction(ISD::PRE_DEC, MVT::i16, Legal);
 130
 131   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
 132
 133   setOperationAction(ISD::VASTART, MVT::Other, Custom);
 134   setOperationAction(ISD::VAEND, MVT::Other, Expand);
 135   setOperationAction(ISD::VAARG, MVT::Other, Expand);
 136   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
 137
 138   // Atomic operations which must be lowered to rtlib calls
 139   for (MVT VT : MVT::integer_valuetypes()) {
 140     setOperationAction(ISD::ATOMIC_SWAP, VT, Expand);
 141     setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Expand);
 142     setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
 143     setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
 144     setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
 145     setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
 146     setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
 147   }
 148
 149   // Division/remainder
 150   setOperationAction(ISD::UDIV, MVT::i8, Expand);
 151   setOperationAction(ISD::UDIV, MVT::i16, Expand);
 152   setOperationAction(ISD::UREM, MVT::i8, Expand);
 153   setOperationAction(ISD::UREM, MVT::i16, Expand);
 154   setOperationAction(ISD::SDIV, MVT::i8, Expand);
 155   setOperationAction(ISD::SDIV, MVT::i16, Expand);
 156   setOperationAction(ISD::SREM, MVT::i8, Expand);
 157   setOperationAction(ISD::SREM, MVT::i16, Expand);
 158
 159   // Make division and modulus custom
 160   setOperationAction(ISD::UDIVREM, MVT::i8, Custom);
 161   setOperationAction(ISD::UDIVREM, MVT::i16, Custom);
 162   setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
 163   setOperationAction(ISD::SDIVREM, MVT::i8, Custom);
 164   setOperationAction(ISD::SDIVREM, MVT::i16, Custom);
 165   setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
 166
 167   // Do not use MUL. The AVR instructions are closer to SMUL_LOHI &co.
 168   setOperationAction(ISD::MUL, MVT::i8, Expand);
 169   setOperationAction(ISD::MUL, MVT::i16, Expand);
 170
 171   // Expand 16 bit multiplications.
 172   setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
 173   setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
 174
 175   // Expand multiplications to libcalls when there is
 176   // no hardware MUL.
 177   if (!Subtarget.supportsMultiplication()) {
 178     setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
 179     setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
 180   }
 181
 182   for (MVT VT : MVT::integer_valuetypes()) {
 183     setOperationAction(ISD::MULHS, VT, Expand);
 184     setOperationAction(ISD::MULHU, VT, Expand);
 185   }
 186
 187   for (MVT VT : MVT::integer_valuetypes()) {
 188     setOperationAction(ISD::CTPOP, VT, Expand);
 189     setOperationAction(ISD::CTLZ, VT, Expand);
 190     setOperationAction(ISD::CTTZ, VT, Expand);
 191   }
 192
 193   for (MVT VT : MVT::integer_valuetypes()) {
 194     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
 195     // TODO: The generated code is pretty poor. Investigate using the
 196     // same "shift and subtract with carry" trick that we do for
 197     // extending 8-bit to 16-bit. This may require infrastructure
 198     // improvements in how we treat 16-bit "registers" to be feasible.
 199   }
 200
 201   // Division and modulus rtlib functions
 202   setLibcallName(RTLIB::SDIVREM_I8, "__divmodqi4");
 203   setLibcallName(RTLIB::SDIVREM_I16, "__divmodhi4");
 204   setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
 205   setLibcallName(RTLIB::UDIVREM_I8, "__udivmodqi4");
 206   setLibcallName(RTLIB::UDIVREM_I16, "__udivmodhi4");
 207   setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
 208
 209   // Several of the runtime library functions use a special calling conv
 210   setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::AVR_BUILTIN);
 211   setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::AVR_BUILTIN);
 212   setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::AVR_BUILTIN);
 213   setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::AVR_BUILTIN);
 214
 215   // Trigonometric rtlib functions
 216   setLibcallName(RTLIB::SIN_F32, "sin");
 217   setLibcallName(RTLIB::COS_F32, "cos");
 218
 219   setMinFunctionAlignment(Align(2));
 220   setMinimumJumpTableEntries(UINT_MAX);
 221 }
 222
 223 const char *AVRTargetLowering::getTargetNodeName(unsigned Opcode) const {
 224 #define NODE(name)                                                             \
 225   case AVRISD::name:                                                           \
 226     return #name
 227
 228   switch (Opcode) {
 229   default:
 230     return nullptr;
 231     NODE(RET_GLUE);
 232     NODE(RETI_GLUE);
 233     NODE(CALL);
 234     NODE(WRAPPER);
 235     NODE(LSL);
 236     NODE(LSLW);
 237     NODE(LSR);
 238     NODE(LSRW);
 239     NODE(ROL);
 240     NODE(ROR);
 241     NODE(ASR);
 242     NODE(ASRW);
 243     NODE(LSLLOOP);
 244     NODE(LSRLOOP);
 245     NODE(ROLLOOP);
 246     NODE(RORLOOP);
 247     NODE(ASRLOOP);
 248     NODE(BRCOND);
 249     NODE(CMP);
 250     NODE(CMPC);
 251     NODE(TST);
 252     NODE(SELECT_CC);
 253 #undef NODE
 254   }
 255 }
 256
 257 EVT AVRTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
 258                                           EVT VT) const {
 259   assert(!VT.isVector() && "No AVR SetCC type for vectors!");
 260   return MVT::i8;
 261 }
 262
 263 SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
 264   unsigned Opc8;
 265   const SDNode *N = Op.getNode();
 266   EVT VT = Op.getValueType();
 267   SDLoc dl(N);
 268   assert(llvm::has_single_bit<uint32_t>(VT.getSizeInBits()) &&
 269          "Expected power-of-2 shift amount");
 270
 271   if (VT.getSizeInBits() == 32) {
 272     if (!isa<ConstantSDNode>(N->getOperand(1))) {
 273       // 32-bit shifts are converted to a loop in IR.
 274       // This should be unreachable.
 275       report_fatal_error("Expected a constant shift amount!");
 276     }
 277     SDVTList ResTys = DAG.getVTList(MVT::i16, MVT::i16);
 278     SDValue SrcLo =
 279         DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(0),
 280                     DAG.getConstant(0, dl, MVT::i16));
 281     SDValue SrcHi =
 282         DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(0),
 283                     DAG.getConstant(1, dl, MVT::i16));
 284     uint64_t ShiftAmount = N->getConstantOperandVal(1);
 285     if (ShiftAmount == 16) {
 286       // Special case these two operations because they appear to be used by the
 287       // generic codegen parts to lower 32-bit numbers.
 288       // TODO: perhaps we can lower shift amounts bigger than 16 to a 16-bit
 289       // shift of a part of the 32-bit value?
 290       switch (Op.getOpcode()) {
 291       case ISD::SHL: {
 292         SDValue Zero = DAG.getConstant(0, dl, MVT::i16);
 293         return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, Zero, SrcLo);
 294       }
 295       case ISD::SRL: {
 296         SDValue Zero = DAG.getConstant(0, dl, MVT::i16);
 297         return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, SrcHi, Zero);
 298       }
 299       }
 300     }
 301     SDValue Cnt = DAG.getTargetConstant(ShiftAmount, dl, MVT::i8);
 302     unsigned Opc;
 303     switch (Op.getOpcode()) {
 304     default:
 305       llvm_unreachable("Invalid 32-bit shift opcode!");
 306     case ISD::SHL:
 307       Opc = AVRISD::LSLW;
 308       break;
 309     case ISD::SRL:
 310       Opc = AVRISD::LSRW;
 311       break;
 312     case ISD::SRA:
 313       Opc = AVRISD::ASRW;
 314       break;
 315     }
 316     SDValue Result = DAG.getNode(Opc, dl, ResTys, SrcLo, SrcHi, Cnt);
 317     return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, Result.getValue(0),
 318                        Result.getValue(1));
 319   }
 320
 321   // Expand non-constant shifts to loops.
 322   if (!isa<ConstantSDNode>(N->getOperand(1))) {
 323     switch (Op.getOpcode()) {
 324     default:
 325       llvm_unreachable("Invalid shift opcode!");
 326     case ISD::SHL:
 327       return DAG.getNode(AVRISD::LSLLOOP, dl, VT, N->getOperand(0),
 328                          N->getOperand(1));
 329     case ISD::SRL:
 330       return DAG.getNode(AVRISD::LSRLOOP, dl, VT, N->getOperand(0),
 331                          N->getOperand(1));
 332     case ISD::ROTL: {
 333       SDValue Amt = N->getOperand(1);
 334       EVT AmtVT = Amt.getValueType();
 335       Amt = DAG.getNode(ISD::AND, dl, AmtVT, Amt,
 336                         DAG.getConstant(VT.getSizeInBits() - 1, dl, AmtVT));
 337       return DAG.getNode(AVRISD::ROLLOOP, dl, VT, N->getOperand(0), Amt);
 338     }
 339     case ISD::ROTR: {
 340       SDValue Amt = N->getOperand(1);
 341       EVT AmtVT = Amt.getValueType();
 342       Amt = DAG.getNode(ISD::AND, dl, AmtVT, Amt,
 343                         DAG.getConstant(VT.getSizeInBits() - 1, dl, AmtVT));
 344       return DAG.getNode(AVRISD::RORLOOP, dl, VT, N->getOperand(0), Amt);
 345     }
 346     case ISD::SRA:
 347       return DAG.getNode(AVRISD::ASRLOOP, dl, VT, N->getOperand(0),
 348                          N->getOperand(1));
 349     }
 350   }
 351
 352   uint64_t ShiftAmount = N->getConstantOperandVal(1);
 353   SDValue Victim = N->getOperand(0);
 354
 355   switch (Op.getOpcode()) {
 356   case ISD::SRA:
 357     Opc8 = AVRISD::ASR;
 358     break;
 359   case ISD::ROTL:
 360     Opc8 = AVRISD::ROL;
 361     ShiftAmount = ShiftAmount % VT.getSizeInBits();
 362     break;
 363   case ISD::ROTR:
 364     Opc8 = AVRISD::ROR;
 365     ShiftAmount = ShiftAmount % VT.getSizeInBits();
 366     break;
 367   case ISD::SRL:
 368     Opc8 = AVRISD::LSR;
 369     break;
 370   case ISD::SHL:
 371     Opc8 = AVRISD::LSL;
 372     break;
 373   default:
 374     llvm_unreachable("Invalid shift opcode");
 375   }
 376
 377   // Optimize int8/int16 shifts.
 378   if (VT.getSizeInBits() == 8) {
 379     if (Op.getOpcode() == ISD::SHL && 4 <= ShiftAmount && ShiftAmount < 7) {
 380       // Optimize LSL when 4 <= ShiftAmount <= 6.
 381       Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);
 382       Victim =
 383           DAG.getNode(ISD::AND, dl, VT, Victim, DAG.getConstant(0xf0, dl, VT));
 384       ShiftAmount -= 4;
 385     } else if (Op.getOpcode() == ISD::SRL && 4 <= ShiftAmount &&
 386                ShiftAmount < 7) {
 387       // Optimize LSR when 4 <= ShiftAmount <= 6.
 388       Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);
 389       Victim =
 390           DAG.getNode(ISD::AND, dl, VT, Victim, DAG.getConstant(0x0f, dl, VT));
 391       ShiftAmount -= 4;
 392     } else if (Op.getOpcode() == ISD::SHL && ShiftAmount == 7) {
 393       // Optimize LSL when ShiftAmount == 7.
 394       Victim = DAG.getNode(AVRISD::LSLBN, dl, VT, Victim,
 395                            DAG.getConstant(7, dl, VT));
 396       ShiftAmount = 0;
 397     } else if (Op.getOpcode() == ISD::SRL && ShiftAmount == 7) {
 398       // Optimize LSR when ShiftAmount == 7.
 399       Victim = DAG.getNode(AVRISD::LSRBN, dl, VT, Victim,
 400                            DAG.getConstant(7, dl, VT));
 401       ShiftAmount = 0;
 402     } else if (Op.getOpcode() == ISD::SRA && ShiftAmount == 6) {
 403       // Optimize ASR when ShiftAmount == 6.
 404       Victim = DAG.getNode(AVRISD::ASRBN, dl, VT, Victim,
 405                            DAG.getConstant(6, dl, VT));
 406       ShiftAmount = 0;
 407     } else if (Op.getOpcode() == ISD::SRA && ShiftAmount == 7) {
 408       // Optimize ASR when ShiftAmount == 7.
 409       Victim = DAG.getNode(AVRISD::ASRBN, dl, VT, Victim,
 410                            DAG.getConstant(7, dl, VT));
 411       ShiftAmount = 0;
 412     } else if (Op.getOpcode() == ISD::ROTL && ShiftAmount == 3) {
 413       // Optimize left rotation 3 bits to swap then right rotation 1 bit.
 414       Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);
 415       Victim = DAG.getNode(AVRISD::ROR, dl, VT, Victim);
 416       ShiftAmount = 0;
 417     } else if (Op.getOpcode() == ISD::ROTR && ShiftAmount == 3) {
 418       // Optimize right rotation 3 bits to swap then left rotation 1 bit.
 419       Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);
 420       Victim = DAG.getNode(AVRISD::ROL, dl, VT, Victim);
 421       ShiftAmount = 0;
 422     } else if (Op.getOpcode() == ISD::ROTL && ShiftAmount == 7) {
 423       // Optimize left rotation 7 bits to right rotation 1 bit.
 424       Victim = DAG.getNode(AVRISD::ROR, dl, VT, Victim);
 425       ShiftAmount = 0;
 426     } else if (Op.getOpcode() == ISD::ROTR && ShiftAmount == 7) {
 427       // Optimize right rotation 7 bits to left rotation 1 bit.
 428       Victim = DAG.getNode(AVRISD::ROL, dl, VT, Victim);
 429       ShiftAmount = 0;
 430     } else if ((Op.getOpcode() == ISD::ROTR || Op.getOpcode() == ISD::ROTL) &&
 431                ShiftAmount >= 4) {
 432       // Optimize left/right rotation with the SWAP instruction.
 433       Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);
 434       ShiftAmount -= 4;
 435     }
 436   } else if (VT.getSizeInBits() == 16) {
 437     if (Op.getOpcode() == ISD::SRA)
 438       // Special optimization for int16 arithmetic right shift.
 439       switch (ShiftAmount) {
 440       case 15:
 441         Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
 442                              DAG.getConstant(15, dl, VT));
 443         ShiftAmount = 0;
 444         break;
 445       case 14:
 446         Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
 447                              DAG.getConstant(14, dl, VT));
 448         ShiftAmount = 0;
 449         break;
 450       case 7:
 451         Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
 452                              DAG.getConstant(7, dl, VT));
 453         ShiftAmount = 0;
 454         break;
 455       default:
 456         break;
 457       }
 458     if (4 <= ShiftAmount && ShiftAmount < 8)
 459       switch (Op.getOpcode()) {
 460       case ISD::SHL:
 461         Victim = DAG.getNode(AVRISD::LSLWN, dl, VT, Victim,
 462                              DAG.getConstant(4, dl, VT));
 463         ShiftAmount -= 4;
 464         break;
 465       case ISD::SRL:
 466         Victim = DAG.getNode(AVRISD::LSRWN, dl, VT, Victim,
 467                              DAG.getConstant(4, dl, VT));
 468         ShiftAmount -= 4;
 469         break;
 470       default:
 471         break;
 472       }
 473     else if (8 <= ShiftAmount && ShiftAmount < 12)
 474       switch (Op.getOpcode()) {
 475       case ISD::SHL:
 476         Victim = DAG.getNode(AVRISD::LSLWN, dl, VT, Victim,
 477                              DAG.getConstant(8, dl, VT));
 478         ShiftAmount -= 8;
 479         // Only operate on the higher byte for remaining shift bits.
 480         Opc8 = AVRISD::LSLHI;
 481         break;
 482       case ISD::SRL:
 483         Victim = DAG.getNode(AVRISD::LSRWN, dl, VT, Victim,
 484                              DAG.getConstant(8, dl, VT));
 485         ShiftAmount -= 8;
 486         // Only operate on the lower byte for remaining shift bits.
 487         Opc8 = AVRISD::LSRLO;
 488         break;
 489       case ISD::SRA:
 490         Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
 491                              DAG.getConstant(8, dl, VT));
 492         ShiftAmount -= 8;
 493         // Only operate on the lower byte for remaining shift bits.
 494         Opc8 = AVRISD::ASRLO;
 495         break;
 496       default:
 497         break;
 498       }
 499     else if (12 <= ShiftAmount)
 500       switch (Op.getOpcode()) {
 501       case ISD::SHL:
 502         Victim = DAG.getNode(AVRISD::LSLWN, dl, VT, Victim,
 503                              DAG.getConstant(12, dl, VT));
 504         ShiftAmount -= 12;
 505         // Only operate on the higher byte for remaining shift bits.
 506         Opc8 = AVRISD::LSLHI;
 507         break;
 508       case ISD::SRL:
 509         Victim = DAG.getNode(AVRISD::LSRWN, dl, VT, Victim,
 510                              DAG.getConstant(12, dl, VT));
 511         ShiftAmount -= 12;
 512         // Only operate on the lower byte for remaining shift bits.
 513         Opc8 = AVRISD::LSRLO;
 514         break;
 515       case ISD::SRA:
 516         Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
 517                              DAG.getConstant(8, dl, VT));
 518         ShiftAmount -= 8;
 519         // Only operate on the lower byte for remaining shift bits.
 520         Opc8 = AVRISD::ASRLO;
 521         break;
 522       default:
 523         break;
 524       }
 525   }
 526
 527   while (ShiftAmount--) {
 528     Victim = DAG.getNode(Opc8, dl, VT, Victim);
 529   }
 530
 531   return Victim;
 532 }
 533
 534 SDValue AVRTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
 535   unsigned Opcode = Op->getOpcode();
 536   assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
 537          "Invalid opcode for Div/Rem lowering");
 538   bool IsSigned = (Opcode == ISD::SDIVREM);
 539   EVT VT = Op->getValueType(0);
 540   Type *Ty = VT.getTypeForEVT(*DAG.getContext());
 541
 542   RTLIB::Libcall LC;
 543   switch (VT.getSimpleVT().SimpleTy) {
 544   default:
 545     llvm_unreachable("Unexpected request for libcall!");
 546   case MVT::i8:
 547     LC = IsSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8;
 548     break;
 549   case MVT::i16:
 550     LC = IsSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16;
 551     break;
 552   case MVT::i32:
 553     LC = IsSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32;
 554     break;
 555   }
 556
 557   SDValue InChain = DAG.getEntryNode();
 558
 559   TargetLowering::ArgListTy Args;
 560   TargetLowering::ArgListEntry Entry;
 561   for (SDValue const &Value : Op->op_values()) {
 562     Entry.Node = Value;
 563     Entry.Ty = Value.getValueType().getTypeForEVT(*DAG.getContext());
 564     Entry.IsSExt = IsSigned;
 565     Entry.IsZExt = !IsSigned;
 566     Args.push_back(Entry);
 567   }
 568
 569   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
 570                                          getPointerTy(DAG.getDataLayout()));
 571
 572   Type *RetTy = (Type *)StructType::get(Ty, Ty);
 573
 574   SDLoc dl(Op);
 575   TargetLowering::CallLoweringInfo CLI(DAG);
 576   CLI.setDebugLoc(dl)
 577       .setChain(InChain)
 578       .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
 579       .setInRegister()
 580       .setSExtResult(IsSigned)
 581       .setZExtResult(!IsSigned);
 582
 583   std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
 584   return CallInfo.first;
 585 }
 586
 587 SDValue AVRTargetLowering::LowerGlobalAddress(SDValue Op,
 588                                               SelectionDAG &DAG) const {
 589   auto DL = DAG.getDataLayout();
 590
 591   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
 592   int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
 593
 594   // Create the TargetGlobalAddress node, folding in the constant offset.
 595   SDValue Result =
 596       DAG.getTargetGlobalAddress(GV, SDLoc(Op), getPointerTy(DL), Offset);
 597   return DAG.getNode(AVRISD::WRAPPER, SDLoc(Op), getPointerTy(DL), Result);
 598 }
 599
 600 SDValue AVRTargetLowering::LowerBlockAddress(SDValue Op,
 601                                              SelectionDAG &DAG) const {
 602   auto DL = DAG.getDataLayout();
 603   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
 604
 605   SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy(DL));
 606
 607   return DAG.getNode(AVRISD::WRAPPER, SDLoc(Op), getPointerTy(DL), Result);
 608 }
 609
 610 /// IntCCToAVRCC - Convert a DAG integer condition code to an AVR CC.
 611 static AVRCC::CondCodes intCCToAVRCC(ISD::CondCode CC) {
 612   switch (CC) {
 613   default:
 614     llvm_unreachable("Unknown condition code!");
 615   case ISD::SETEQ:
 616     return AVRCC::COND_EQ;
 617   case ISD::SETNE:
 618     return AVRCC::COND_NE;
 619   case ISD::SETGE:
 620     return AVRCC::COND_GE;
 621   case ISD::SETLT:
 622     return AVRCC::COND_LT;
 623   case ISD::SETUGE:
 624     return AVRCC::COND_SH;
 625   case ISD::SETULT:
 626     return AVRCC::COND_LO;
 627   }
 628 }
 629
 630 /// Returns appropriate CP/CPI/CPC nodes code for the given 8/16-bit operands.
 631 SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS,
 632                                      SelectionDAG &DAG, SDLoc DL) const {
 633   assert((LHS.getSimpleValueType() == RHS.getSimpleValueType()) &&
 634          "LHS and RHS have different types");
 635   assert(((LHS.getSimpleValueType() == MVT::i16) ||
 636           (LHS.getSimpleValueType() == MVT::i8)) &&
 637          "invalid comparison type");
 638
 639   SDValue Cmp;
 640
 641   if (LHS.getSimpleValueType() == MVT::i16 && isa<ConstantSDNode>(RHS)) {
 642     uint64_t Imm = RHS->getAsZExtVal();
 643     // Generate a CPI/CPC pair if RHS is a 16-bit constant. Use the zero
 644     // register for the constant RHS if its lower or higher byte is zero.
 645     SDValue LHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,
 646                                 DAG.getIntPtrConstant(0, DL));
 647     SDValue LHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,
 648                                 DAG.getIntPtrConstant(1, DL));
 649     SDValue RHSlo = (Imm & 0xff) == 0
 650                         ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)
 651                         : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,
 652                                       DAG.getIntPtrConstant(0, DL));
 653     SDValue RHShi = (Imm & 0xff00) == 0
 654                         ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)
 655                         : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,
 656                                       DAG.getIntPtrConstant(1, DL));
 657     Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHSlo, RHSlo);
 658     Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp);
 659   } else if (RHS.getSimpleValueType() == MVT::i16 && isa<ConstantSDNode>(LHS)) {
 660     // Generate a CPI/CPC pair if LHS is a 16-bit constant. Use the zero
 661     // register for the constant LHS if its lower or higher byte is zero.
 662     uint64_t Imm = LHS->getAsZExtVal();
 663     SDValue LHSlo = (Imm & 0xff) == 0
 664                         ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)
 665                         : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,
 666                                       DAG.getIntPtrConstant(0, DL));
 667     SDValue LHShi = (Imm & 0xff00) == 0
 668                         ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)
 669                         : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,
 670                                       DAG.getIntPtrConstant(1, DL));
 671     SDValue RHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,
 672                                 DAG.getIntPtrConstant(0, DL));
 673     SDValue RHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,
 674                                 DAG.getIntPtrConstant(1, DL));
 675     Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHSlo, RHSlo);
 676     Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp);
 677   } else {
 678     // Generate ordinary 16-bit comparison.
 679     Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHS, RHS);
 680   }
 681
 682   return Cmp;
 683 }
 684
 685 /// Returns appropriate AVR CMP/CMPC nodes and corresponding condition code for
 686 /// the given operands.
 687 SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
 688                                      SDValue &AVRcc, SelectionDAG &DAG,
 689                                      SDLoc DL) const {
 690   SDValue Cmp;
 691   EVT VT = LHS.getValueType();
 692   bool UseTest = false;
 693
 694   switch (CC) {
 695   default:
 696     break;
 697   case ISD::SETLE: {
 698     // Swap operands and reverse the branching condition.
 699     std::swap(LHS, RHS);
 700     CC = ISD::SETGE;
 701     break;
 702   }
 703   case ISD::SETGT: {
 704     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
 705       switch (C->getSExtValue()) {
 706       case -1: {
 707         // When doing lhs > -1 use a tst instruction on the top part of lhs
 708         // and use brpl instead of using a chain of cp/cpc.
 709         UseTest = true;
 710         AVRcc = DAG.getConstant(AVRCC::COND_PL, DL, MVT::i8);
 711         break;
 712       }
 713       case 0: {
 714         // Turn lhs > 0 into 0 < lhs since 0 can be materialized with
 715         // __zero_reg__ in lhs.
 716         RHS = LHS;
 717         LHS = DAG.getConstant(0, DL, VT);
 718         CC = ISD::SETLT;
 719         break;
 720       }
 721       default: {
 722         // Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows
 723         // us to  fold the constant into the cmp instruction.
 724         RHS = DAG.getConstant(C->getSExtValue() + 1, DL, VT);
 725         CC = ISD::SETGE;
 726         break;
 727       }
 728       }
 729       break;
 730     }
 731     // Swap operands and reverse the branching condition.
 732     std::swap(LHS, RHS);
 733     CC = ISD::SETLT;
 734     break;
 735   }
 736   case ISD::SETLT: {
 737     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
 738       switch (C->getSExtValue()) {
 739       case 1: {
 740         // Turn lhs < 1 into 0 >= lhs since 0 can be materialized with
 741         // __zero_reg__ in lhs.
 742         RHS = LHS;
 743         LHS = DAG.getConstant(0, DL, VT);
 744         CC = ISD::SETGE;
 745         break;
 746       }
 747       case 0: {
 748         // When doing lhs < 0 use a tst instruction on the top part of lhs
 749         // and use brmi instead of using a chain of cp/cpc.
 750         UseTest = true;
 751         AVRcc = DAG.getConstant(AVRCC::COND_MI, DL, MVT::i8);
 752         break;
 753       }
 754       }
 755     }
 756     break;
 757   }
 758   case ISD::SETULE: {
 759     // Swap operands and reverse the branching condition.
 760     std::swap(LHS, RHS);
 761     CC = ISD::SETUGE;
 762     break;
 763   }
 764   case ISD::SETUGT: {
 765     // Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows us to
 766     // fold the constant into the cmp instruction.
 767     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
 768       RHS = DAG.getConstant(C->getSExtValue() + 1, DL, VT);
 769       CC = ISD::SETUGE;
 770       break;
 771     }
 772     // Swap operands and reverse the branching condition.
 773     std::swap(LHS, RHS);
 774     CC = ISD::SETULT;
 775     break;
 776   }
 777   }
 778
 779   // Expand 32 and 64 bit comparisons with custom CMP and CMPC nodes instead of
 780   // using the default and/or/xor expansion code which is much longer.
 781   if (VT == MVT::i32) {
 782     SDValue LHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS,
 783                                 DAG.getIntPtrConstant(0, DL));
 784     SDValue LHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS,
 785                                 DAG.getIntPtrConstant(1, DL));
 786     SDValue RHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS,
 787                                 DAG.getIntPtrConstant(0, DL));
 788     SDValue RHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS,
 789                                 DAG.getIntPtrConstant(1, DL));
 790
 791     if (UseTest) {
 792       // When using tst we only care about the highest part.
 793       SDValue Top = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHShi,
 794                                 DAG.getIntPtrConstant(1, DL));
 795       Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, Top);
 796     } else {
 797       Cmp = getAVRCmp(LHSlo, RHSlo, DAG, DL);
 798       Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp);
 799     }
 800   } else if (VT == MVT::i64) {
 801     SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS,
 802                                 DAG.getIntPtrConstant(0, DL));
 803     SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS,
 804                                 DAG.getIntPtrConstant(1, DL));
 805
 806     SDValue LHS0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_0,
 807                                DAG.getIntPtrConstant(0, DL));
 808     SDValue LHS1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_0,
 809                                DAG.getIntPtrConstant(1, DL));
 810     SDValue LHS2 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_1,
 811                                DAG.getIntPtrConstant(0, DL));
 812     SDValue LHS3 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_1,
 813                                DAG.getIntPtrConstant(1, DL));
 814
 815     SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS,
 816                                 DAG.getIntPtrConstant(0, DL));
 817     SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS,
 818                                 DAG.getIntPtrConstant(1, DL));
 819
 820     SDValue RHS0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_0,
 821                                DAG.getIntPtrConstant(0, DL));
 822     SDValue RHS1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_0,
 823                                DAG.getIntPtrConstant(1, DL));
 824     SDValue RHS2 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_1,
 825                                DAG.getIntPtrConstant(0, DL));
 826     SDValue RHS3 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_1,
 827                                DAG.getIntPtrConstant(1, DL));
 828
 829     if (UseTest) {
 830       // When using tst we only care about the highest part.
 831       SDValue Top = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS3,
 832                                 DAG.getIntPtrConstant(1, DL));
 833       Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, Top);
 834     } else {
 835       Cmp = getAVRCmp(LHS0, RHS0, DAG, DL);
 836       Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS1, RHS1, Cmp);
 837       Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS2, RHS2, Cmp);
 838       Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS3, RHS3, Cmp);
 839     }
 840   } else if (VT == MVT::i8 || VT == MVT::i16) {
 841     if (UseTest) {
 842       // When using tst we only care about the highest part.
 843       Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue,
 844                         (VT == MVT::i8)
 845                             ? LHS
 846                             : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8,
 847                                           LHS, DAG.getIntPtrConstant(1, DL)));
 848     } else {
 849       Cmp = getAVRCmp(LHS, RHS, DAG, DL);
 850     }
 851   } else {
 852     llvm_unreachable("Invalid comparison size");
 853   }
 854
 855   // When using a test instruction AVRcc is already set.
 856   if (!UseTest) {
 857     AVRcc = DAG.getConstant(intCCToAVRCC(CC), DL, MVT::i8);
 858   }
 859
 860   return Cmp;
 861 }
 862
 863 SDValue AVRTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
 864   SDValue Chain = Op.getOperand(0);
 865   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
 866   SDValue LHS = Op.getOperand(2);
 867   SDValue RHS = Op.getOperand(3);
 868   SDValue Dest = Op.getOperand(4);
 869   SDLoc dl(Op);
 870
 871   SDValue TargetCC;
 872   SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, dl);
 873
 874   return DAG.getNode(AVRISD::BRCOND, dl, MVT::Other, Chain, Dest, TargetCC,
 875                      Cmp);
 876 }
 877
 878 SDValue AVRTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
 879   SDValue LHS = Op.getOperand(0);
 880   SDValue RHS = Op.getOperand(1);
 881   SDValue TrueV = Op.getOperand(2);
 882   SDValue FalseV = Op.getOperand(3);
 883   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
 884   SDLoc dl(Op);
 885
 886   SDValue TargetCC;
 887   SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, dl);
 888
 889   SDValue Ops[] = {TrueV, FalseV, TargetCC, Cmp};
 890
 891   return DAG.getNode(AVRISD::SELECT_CC, dl, Op.getValueType(), Ops);
 892 }
 893
 894 SDValue AVRTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
 895   SDValue LHS = Op.getOperand(0);
 896   SDValue RHS = Op.getOperand(1);
 897   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
 898   SDLoc DL(Op);
 899
 900   SDValue TargetCC;
 901   SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, DL);
 902
 903   SDValue TrueV = DAG.getConstant(1, DL, Op.getValueType());
 904   SDValue FalseV = DAG.getConstant(0, DL, Op.getValueType());
 905   SDValue Ops[] = {TrueV, FalseV, TargetCC, Cmp};
 906
 907   return DAG.getNode(AVRISD::SELECT_CC, DL, Op.getValueType(), Ops);
 908 }
 909
 910 SDValue AVRTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
 911   const MachineFunction &MF = DAG.getMachineFunction();
 912   const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
 913   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
 914   auto DL = DAG.getDataLayout();
 915   SDLoc dl(Op);
 916
 917   // Vastart just stores the address of the VarArgsFrameIndex slot into the
 918   // memory location argument.
 919   SDValue FI = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), getPointerTy(DL));
 920
 921   return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
 922                       MachinePointerInfo(SV));
 923 }
 924
 925 // Modify the existing ISD::INLINEASM node to add the implicit zero register.
 926 SDValue AVRTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
 927   SDValue ZeroReg = DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8);
 928   if (Op.getOperand(Op.getNumOperands() - 1) == ZeroReg ||
 929       Op.getOperand(Op.getNumOperands() - 2) == ZeroReg) {
 930     // Zero register has already been added. Don't add it again.
 931     // If this isn't handled, we get called over and over again.
 932     return Op;
 933   }
 934
 935   // Get a list of operands to the new INLINEASM node. This is mostly a copy,
 936   // with some edits.
 937   // Add the following operands at the end (but before the glue node, if it's
 938   // there):
 939   //  - The flags of the implicit zero register operand.
 940   //  - The implicit zero register operand itself.
 941   SDLoc dl(Op);
 942   SmallVector<SDValue, 8> Ops;
 943   SDNode *N = Op.getNode();
 944   SDValue Glue;
 945   for (unsigned I = 0; I < N->getNumOperands(); I++) {
 946     SDValue Operand = N->getOperand(I);
 947     if (Operand.getValueType() == MVT::Glue) {
 948       // The glue operand always needs to be at the end, so we need to treat it
 949       // specially.
 950       Glue = Operand;
 951     } else {
 952       Ops.push_back(Operand);
 953     }
 954   }
 955   InlineAsm::Flag Flags(InlineAsm::Kind::RegUse, 1);
 956   Ops.push_back(DAG.getTargetConstant(Flags, dl, MVT::i32));
 957   Ops.push_back(ZeroReg);
 958   if (Glue) {
 959     Ops.push_back(Glue);
 960   }
 961
 962   // Replace the current INLINEASM node with a new one that has the zero
 963   // register as implicit parameter.
 964   SDValue New = DAG.getNode(N->getOpcode(), dl, N->getVTList(), Ops);
 965   DAG.ReplaceAllUsesOfValueWith(Op, New);
 966   DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), New.getValue(1));
 967
 968   return New;
 969 }
 970
 971 SDValue AVRTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
 972   switch (Op.getOpcode()) {
 973   default:
 974     llvm_unreachable("Don't know how to custom lower this!");
 975   case ISD::SHL:
 976   case ISD::SRA:
 977   case ISD::SRL:
 978   case ISD::ROTL:
 979   case ISD::ROTR:
 980     return LowerShifts(Op, DAG);
 981   case ISD::GlobalAddress:
 982     return LowerGlobalAddress(Op, DAG);
 983   case ISD::BlockAddress:
 984     return LowerBlockAddress(Op, DAG);
 985   case ISD::BR_CC:
 986     return LowerBR_CC(Op, DAG);
 987   case ISD::SELECT_CC:
 988     return LowerSELECT_CC(Op, DAG);
 989   case ISD::SETCC:
 990     return LowerSETCC(Op, DAG);
 991   case ISD::VASTART:
 992     return LowerVASTART(Op, DAG);
 993   case ISD::SDIVREM:
 994   case ISD::UDIVREM:
 995     return LowerDivRem(Op, DAG);
 996   case ISD::INLINEASM:
 997     return LowerINLINEASM(Op, DAG);
 998   }
 999
1000   return SDValue();
1001 }
1002
1003 /// Replace a node with an illegal result type
1004 /// with a new node built out of custom code.
1005 void AVRTargetLowering::ReplaceNodeResults(SDNode *N,
1006                                            SmallVectorImpl<SDValue> &Results,
1007                                            SelectionDAG &DAG) const {
1008   SDLoc DL(N);
1009
1010   switch (N->getOpcode()) {
1011   case ISD::ADD: {
1012     // Convert add (x, imm) into sub (x, -imm).
1013     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1014       SDValue Sub = DAG.getNode(
1015           ISD::SUB, DL, N->getValueType(0), N->getOperand(0),
1016           DAG.getConstant(-C->getAPIntValue(), DL, C->getValueType(0)));
1017       Results.push_back(Sub);
1018     }
1019     break;
1020   }
1021   default: {
1022     SDValue Res = LowerOperation(SDValue(N, 0), DAG);
1023
1024     for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I)
1025       Results.push_back(Res.getValue(I));
1026
1027     break;
1028   }
1029   }
1030 }
1031
1032 /// Return true if the addressing mode represented
1033 /// by AM is legal for this target, for a load/store of the specified type.
1034 bool AVRTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1035                                               const AddrMode &AM, Type *Ty,
1036                                               unsigned AS,
1037                                               Instruction *I) const {
1038   int64_t Offs = AM.BaseOffs;
1039
1040   // Allow absolute addresses.
1041   if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && Offs == 0) {
1042     return true;
1043   }
1044
1045   // Flash memory instructions only allow zero offsets.
1046   if (isa<PointerType>(Ty) && AS == AVR::ProgramMemory) {
1047     return false;
1048   }
1049
1050   // Allow reg+<6bit> offset.
1051   if (Offs < 0)
1052     Offs = -Offs;
1053   if (AM.BaseGV == nullptr && AM.HasBaseReg && AM.Scale == 0 &&
1054       isUInt<6>(Offs)) {
1055     return true;
1056   }
1057
1058   return false;
1059 }
1060
1061 /// Returns true by value, base pointer and
1062 /// offset pointer and addressing mode by reference if the node's address
1063 /// can be legally represented as pre-indexed load / store address.
1064 bool AVRTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
1065                                                   SDValue &Offset,
1066                                                   ISD::MemIndexedMode &AM,
1067                                                   SelectionDAG &DAG) const {
1068   EVT VT;
1069   const SDNode *Op;
1070   SDLoc DL(N);
1071
1072   if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1073     VT = LD->getMemoryVT();
1074     Op = LD->getBasePtr().getNode();
1075     if (LD->getExtensionType() != ISD::NON_EXTLOAD)
1076       return false;
1077     if (AVR::isProgramMemoryAccess(LD)) {
1078       return false;
1079     }
1080   } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
1081     VT = ST->getMemoryVT();
1082     Op = ST->getBasePtr().getNode();
1083     if (AVR::isProgramMemoryAccess(ST)) {
1084       return false;
1085     }
1086   } else {
1087     return false;
1088   }
1089
1090   if (VT != MVT::i8 && VT != MVT::i16) {
1091     return false;
1092   }
1093
1094   if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) {
1095     return false;
1096   }
1097
1098   if (const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
1099     int RHSC = RHS->getSExtValue();
1100     if (Op->getOpcode() == ISD::SUB)
1101       RHSC = -RHSC;
1102
1103     if ((VT == MVT::i16 && RHSC != -2) || (VT == MVT::i8 && RHSC != -1)) {
1104       return false;
1105     }
1106
1107     Base = Op->getOperand(0);
1108     Offset = DAG.getSignedConstant(RHSC, DL, MVT::i8);
1109     AM = ISD::PRE_DEC;
1110
1111     return true;
1112   }
1113
1114   return false;
1115 }
1116
1117 /// Returns true by value, base pointer and
1118 /// offset pointer and addressing mode by reference if this node can be
1119 /// combined with a load / store to form a post-indexed load / store.
1120 bool AVRTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
1121                                                    SDValue &Base,
1122                                                    SDValue &Offset,
1123                                                    ISD::MemIndexedMode &AM,
1124                                                    SelectionDAG &DAG) const {
1125   EVT VT;
1126   SDLoc DL(N);
1127
1128   if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1129     VT = LD->getMemoryVT();
1130     if (LD->getExtensionType() != ISD::NON_EXTLOAD)
1131       return false;
1132   } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
1133     VT = ST->getMemoryVT();
1134     // We can not store to program memory.
1135     if (AVR::isProgramMemoryAccess(ST))
1136       return false;
1137     // Since the high byte need to be stored first, we can not emit
1138     // i16 post increment store like:
1139     // st X+, r24
1140     // st X+, r25
1141     if (VT == MVT::i16 && !Subtarget.hasLowByteFirst())
1142       return false;
1143   } else {
1144     return false;
1145   }
1146
1147   if (VT != MVT::i8 && VT != MVT::i16) {
1148     return false;
1149   }
1150
1151   if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) {
1152     return false;
1153   }
1154
1155   if (const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
1156     int RHSC = RHS->getSExtValue();
1157     if (Op->getOpcode() == ISD::SUB)
1158       RHSC = -RHSC;
1159     if ((VT == MVT::i16 && RHSC != 2) || (VT == MVT::i8 && RHSC != 1)) {
1160       return false;
1161     }
1162
1163     // FIXME: We temporarily disable post increment load from program memory,
1164     //        due to bug https://github.com/llvm/llvm-project/issues/59914.
1165     if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
1166       if (AVR::isProgramMemoryAccess(LD))
1167         return false;
1168
1169     Base = Op->getOperand(0);
1170     Offset = DAG.getConstant(RHSC, DL, MVT::i8);
1171     AM = ISD::POST_INC;
1172
1173     return true;
1174   }
1175
1176   return false;
1177 }
1178
1179 bool AVRTargetLowering::isOffsetFoldingLegal(
1180     const GlobalAddressSDNode *GA) const {
1181   return true;
1182 }
1183
1184 //===----------------------------------------------------------------------===//
1185 //             Formal Arguments Calling Convention Implementation
1186 //===----------------------------------------------------------------------===//
1187
1188 #include "AVRGenCallingConv.inc"
1189
1190 /// Registers for calling conventions, ordered in reverse as required by ABI.
1191 /// Both arrays must be of the same length.
1192 static const MCPhysReg RegList8AVR[] = {
1193     AVR::R25, AVR::R24, AVR::R23, AVR::R22, AVR::R21, AVR::R20,
1194     AVR::R19, AVR::R18, AVR::R17, AVR::R16, AVR::R15, AVR::R14,
1195     AVR::R13, AVR::R12, AVR::R11, AVR::R10, AVR::R9,  AVR::R8};
1196 static const MCPhysReg RegList8Tiny[] = {AVR::R25, AVR::R24, AVR::R23,
1197                                          AVR::R22, AVR::R21, AVR::R20};
1198 static const MCPhysReg RegList16AVR[] = {
1199     AVR::R26R25, AVR::R25R24, AVR::R24R23, AVR::R23R22, AVR::R22R21,
1200     AVR::R21R20, AVR::R20R19, AVR::R19R18, AVR::R18R17, AVR::R17R16,
1201     AVR::R16R15, AVR::R15R14, AVR::R14R13, AVR::R13R12, AVR::R12R11,
1202     AVR::R11R10, AVR::R10R9,  AVR::R9R8};
1203 static const MCPhysReg RegList16Tiny[] = {AVR::R26R25, AVR::R25R24,
1204                                           AVR::R24R23, AVR::R23R22,
1205                                           AVR::R22R21, AVR::R21R20};
1206
1207 static_assert(std::size(RegList8AVR) == std::size(RegList16AVR),
1208               "8-bit and 16-bit register arrays must be of equal length");
1209 static_assert(std::size(RegList8Tiny) == std::size(RegList16Tiny),
1210               "8-bit and 16-bit register arrays must be of equal length");
1211
1212 /// Analyze incoming and outgoing function arguments. We need custom C++ code
1213 /// to handle special constraints in the ABI.
1214 /// In addition, all pieces of a certain argument have to be passed either
1215 /// using registers or the stack but never mixing both.
1216 template <typename ArgT>
1217 static void analyzeArguments(TargetLowering::CallLoweringInfo *CLI,
1218                              const Function *F, const DataLayout *TD,
1219                              const SmallVectorImpl<ArgT> &Args,
1220                              SmallVectorImpl<CCValAssign> &ArgLocs,
1221                              CCState &CCInfo, bool Tiny) {
1222   // Choose the proper register list for argument passing according to the ABI.
1223   ArrayRef<MCPhysReg> RegList8;
1224   ArrayRef<MCPhysReg> RegList16;
1225   if (Tiny) {
1226     RegList8 = ArrayRef(RegList8Tiny);
1227     RegList16 = ArrayRef(RegList16Tiny);
1228   } else {
1229     RegList8 = ArrayRef(RegList8AVR);
1230     RegList16 = ArrayRef(RegList16AVR);
1231   }
1232
1233   unsigned NumArgs = Args.size();
1234   // This is the index of the last used register, in RegList*.
1235   // -1 means R26 (R26 is never actually used in CC).
1236   int RegLastIdx = -1;
1237   // Once a value is passed to the stack it will always be used
1238   bool UseStack = false;
1239   for (unsigned i = 0; i != NumArgs;) {
1240     MVT VT = Args[i].VT;
1241     // We have to count the number of bytes for each function argument, that is
1242     // those Args with the same OrigArgIndex. This is important in case the
1243     // function takes an aggregate type.
1244     // Current argument will be between [i..j).
1245     unsigned ArgIndex = Args[i].OrigArgIndex;
1246     unsigned TotalBytes = VT.getStoreSize();
1247     unsigned j = i + 1;
1248     for (; j != NumArgs; ++j) {
1249       if (Args[j].OrigArgIndex != ArgIndex)
1250         break;
1251       TotalBytes += Args[j].VT.getStoreSize();
1252     }
1253     // Round up to even number of bytes.
1254     TotalBytes = alignTo(TotalBytes, 2);
1255     // Skip zero sized arguments
1256     if (TotalBytes == 0)
1257       continue;
1258     // The index of the first register to be used
1259     unsigned RegIdx = RegLastIdx + TotalBytes;
1260     RegLastIdx = RegIdx;
1261     // If there are not enough registers, use the stack
1262     if (RegIdx >= RegList8.size()) {
1263       UseStack = true;
1264     }
1265     for (; i != j; ++i) {
1266       MVT VT = Args[i].VT;
1267
1268       if (UseStack) {
1269         auto evt = EVT(VT).getTypeForEVT(CCInfo.getContext());
1270         unsigned Offset = CCInfo.AllocateStack(TD->getTypeAllocSize(evt),
1271                                                TD->getABITypeAlign(evt));
1272         CCInfo.addLoc(
1273             CCValAssign::getMem(i, VT, Offset, VT, CCValAssign::Full));
1274       } else {
1275         unsigned Reg;
1276         if (VT == MVT::i8) {
1277           Reg = CCInfo.AllocateReg(RegList8[RegIdx]);
1278         } else if (VT == MVT::i16) {
1279           Reg = CCInfo.AllocateReg(RegList16[RegIdx]);
1280         } else {
1281           llvm_unreachable(
1282               "calling convention can only manage i8 and i16 types");
1283         }
1284         assert(Reg && "register not available in calling convention");
1285         CCInfo.addLoc(CCValAssign::getReg(i, VT, Reg, VT, CCValAssign::Full));
1286         // Registers inside a particular argument are sorted in increasing order
1287         // (remember the array is reversed).
1288         RegIdx -= VT.getStoreSize();
1289       }
1290     }
1291   }
1292 }
1293
1294 /// Count the total number of bytes needed to pass or return these arguments.
1295 template <typename ArgT>
1296 static unsigned
1297 getTotalArgumentsSizeInBytes(const SmallVectorImpl<ArgT> &Args) {
1298   unsigned TotalBytes = 0;
1299
1300   for (const ArgT &Arg : Args) {
1301     TotalBytes += Arg.VT.getStoreSize();
1302   }
1303   return TotalBytes;
1304 }
1305
1306 /// Analyze incoming and outgoing value of returning from a function.
1307 /// The algorithm is similar to analyzeArguments, but there can only be
1308 /// one value, possibly an aggregate, and it is limited to 8 bytes.
1309 template <typename ArgT>
1310 static void analyzeReturnValues(const SmallVectorImpl<ArgT> &Args,
1311                                 CCState &CCInfo, bool Tiny) {
1312   unsigned NumArgs = Args.size();
1313   unsigned TotalBytes = getTotalArgumentsSizeInBytes(Args);
1314   // CanLowerReturn() guarantees this assertion.
1315   if (Tiny)
1316     assert(TotalBytes <= 4 &&
1317            "return values greater than 4 bytes cannot be lowered on AVRTiny");
1318   else
1319     assert(TotalBytes <= 8 &&
1320            "return values greater than 8 bytes cannot be lowered on AVR");
1321
1322   // Choose the proper register list for argument passing according to the ABI.
1323   ArrayRef<MCPhysReg> RegList8;
1324   ArrayRef<MCPhysReg> RegList16;
1325   if (Tiny) {
1326     RegList8 = ArrayRef(RegList8Tiny);
1327     RegList16 = ArrayRef(RegList16Tiny);
1328   } else {
1329     RegList8 = ArrayRef(RegList8AVR);
1330     RegList16 = ArrayRef(RegList16AVR);
1331   }
1332
1333   // GCC-ABI says that the size is rounded up to the next even number,
1334   // but actually once it is more than 4 it will always round up to 8.
1335   if (TotalBytes > 4) {
1336     TotalBytes = 8;
1337   } else {
1338     TotalBytes = alignTo(TotalBytes, 2);
1339   }
1340
1341   // The index of the first register to use.
1342   int RegIdx = TotalBytes - 1;
1343   for (unsigned i = 0; i != NumArgs; ++i) {
1344     MVT VT = Args[i].VT;
1345     unsigned Reg;
1346     if (VT == MVT::i8) {
1347       Reg = CCInfo.AllocateReg(RegList8[RegIdx]);
1348     } else if (VT == MVT::i16) {
1349       Reg = CCInfo.AllocateReg(RegList16[RegIdx]);
1350     } else {
1351       llvm_unreachable("calling convention can only manage i8 and i16 types");
1352     }
1353     assert(Reg && "register not available in calling convention");
1354     CCInfo.addLoc(CCValAssign::getReg(i, VT, Reg, VT, CCValAssign::Full));
1355     // Registers sort in increasing order
1356     RegIdx -= VT.getStoreSize();
1357   }
1358 }
1359
1360 SDValue AVRTargetLowering::LowerFormalArguments(
1361     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1362     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1363     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1364   MachineFunction &MF = DAG.getMachineFunction();
1365   MachineFrameInfo &MFI = MF.getFrameInfo();
1366   auto DL = DAG.getDataLayout();
1367
1368   // Assign locations to all of the incoming arguments.
1369   SmallVector<CCValAssign, 16> ArgLocs;
1370   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1371                  *DAG.getContext());
1372
1373   // Variadic functions do not need all the analysis below.
1374   if (isVarArg) {
1375     CCInfo.AnalyzeFormalArguments(Ins, ArgCC_AVR_Vararg);
1376   } else {
1377     analyzeArguments(nullptr, &MF.getFunction(), &DL, Ins, ArgLocs, CCInfo,
1378                      Subtarget.hasTinyEncoding());
1379   }
1380
1381   SDValue ArgValue;
1382   for (CCValAssign &VA : ArgLocs) {
1383
1384     // Arguments stored on registers.
1385     if (VA.isRegLoc()) {
1386       EVT RegVT = VA.getLocVT();
1387       const TargetRegisterClass *RC;
1388       if (RegVT == MVT::i8) {
1389         RC = &AVR::GPR8RegClass;
1390       } else if (RegVT == MVT::i16) {
1391         RC = &AVR::DREGSRegClass;
1392       } else {
1393         llvm_unreachable("Unknown argument type!");
1394       }
1395
1396       Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1397       ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
1398
1399       // :NOTE: Clang should not promote any i8 into i16 but for safety the
1400       // following code will handle zexts or sexts generated by other
1401       // front ends. Otherwise:
1402       // If this is an 8 bit value, it is really passed promoted
1403       // to 16 bits. Insert an assert[sz]ext to capture this, then
1404       // truncate to the right size.
1405       switch (VA.getLocInfo()) {
1406       default:
1407         llvm_unreachable("Unknown loc info!");
1408       case CCValAssign::Full:
1409         break;
1410       case CCValAssign::BCvt:
1411         ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
1412         break;
1413       case CCValAssign::SExt:
1414         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
1415                                DAG.getValueType(VA.getValVT()));
1416         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1417         break;
1418       case CCValAssign::ZExt:
1419         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
1420                                DAG.getValueType(VA.getValVT()));
1421         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1422         break;
1423       }
1424
1425       InVals.push_back(ArgValue);
1426     } else {
1427       // Only arguments passed on the stack should make it here.
1428       assert(VA.isMemLoc());
1429
1430       EVT LocVT = VA.getLocVT();
1431
1432       // Create the frame index object for this incoming parameter.
1433       int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
1434                                      VA.getLocMemOffset(), true);
1435
1436       // Create the SelectionDAG nodes corresponding to a load
1437       // from this parameter.
1438       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DL));
1439       InVals.push_back(DAG.getLoad(LocVT, dl, Chain, FIN,
1440                                    MachinePointerInfo::getFixedStack(MF, FI)));
1441     }
1442   }
1443
1444   // If the function takes variable number of arguments, make a frame index for
1445   // the start of the first vararg value... for expansion of llvm.va_start.
1446   if (isVarArg) {
1447     unsigned StackSize = CCInfo.getStackSize();
1448     AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
1449
1450     AFI->setVarArgsFrameIndex(MFI.CreateFixedObject(2, StackSize, true));
1451   }
1452
1453   return Chain;
1454 }
1455
1456 //===----------------------------------------------------------------------===//
1457 //                  Call Calling Convention Implementation
1458 //===----------------------------------------------------------------------===//
1459
1460 SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1461                                      SmallVectorImpl<SDValue> &InVals) const {
1462   SelectionDAG &DAG = CLI.DAG;
1463   SDLoc &DL = CLI.DL;
1464   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1465   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1466   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1467   SDValue Chain = CLI.Chain;
1468   SDValue Callee = CLI.Callee;
1469   bool &isTailCall = CLI.IsTailCall;
1470   CallingConv::ID CallConv = CLI.CallConv;
1471   bool isVarArg = CLI.IsVarArg;
1472
1473   MachineFunction &MF = DAG.getMachineFunction();
1474
1475   // AVR does not yet support tail call optimization.
1476   isTailCall = false;
1477
1478   // Analyze operands of the call, assigning locations to each operand.
1479   SmallVector<CCValAssign, 16> ArgLocs;
1480   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1481                  *DAG.getContext());
1482
1483   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1484   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1485   // node so that legalize doesn't hack it.
1486   const Function *F = nullptr;
1487   if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1488     const GlobalValue *GV = G->getGlobal();
1489     if (isa<Function>(GV))
1490       F = cast<Function>(GV);
1491     Callee =
1492         DAG.getTargetGlobalAddress(GV, DL, getPointerTy(DAG.getDataLayout()));
1493   } else if (const ExternalSymbolSDNode *ES =
1494                  dyn_cast<ExternalSymbolSDNode>(Callee)) {
1495     Callee = DAG.getTargetExternalSymbol(ES->getSymbol(),
1496                                          getPointerTy(DAG.getDataLayout()));
1497   }
1498
1499   // Variadic functions do not need all the analysis below.
1500   if (isVarArg) {
1501     CCInfo.AnalyzeCallOperands(Outs, ArgCC_AVR_Vararg);
1502   } else {
1503     analyzeArguments(&CLI, F, &DAG.getDataLayout(), Outs, ArgLocs, CCInfo,
1504                      Subtarget.hasTinyEncoding());
1505   }
1506
1507   // Get a count of how many bytes are to be pushed on the stack.
1508   unsigned NumBytes = CCInfo.getStackSize();
1509
1510   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1511
1512   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
1513
1514   // First, walk the register assignments, inserting copies.
1515   unsigned AI, AE;
1516   bool HasStackArgs = false;
1517   for (AI = 0, AE = ArgLocs.size(); AI != AE; ++AI) {
1518     CCValAssign &VA = ArgLocs[AI];
1519     EVT RegVT = VA.getLocVT();
1520     SDValue Arg = OutVals[AI];
1521
1522     // Promote the value if needed. With Clang this should not happen.
1523     switch (VA.getLocInfo()) {
1524     default:
1525       llvm_unreachable("Unknown loc info!");
1526     case CCValAssign::Full:
1527       break;
1528     case CCValAssign::SExt:
1529       Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, RegVT, Arg);
1530       break;
1531     case CCValAssign::ZExt:
1532       Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, RegVT, Arg);
1533       break;
1534     case CCValAssign::AExt:
1535       Arg = DAG.getNode(ISD::ANY_EXTEND, DL, RegVT, Arg);
1536       break;
1537     case CCValAssign::BCvt:
1538       Arg = DAG.getNode(ISD::BITCAST, DL, RegVT, Arg);
1539       break;
1540     }
1541
1542     // Stop when we encounter a stack argument, we need to process them
1543     // in reverse order in the loop below.
1544     if (VA.isMemLoc()) {
1545       HasStackArgs = true;
1546       break;
1547     }
1548
1549     // Arguments that can be passed on registers must be kept in the RegsToPass
1550     // vector.
1551     RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1552   }
1553
1554   // Second, stack arguments have to walked.
1555   // Previously this code created chained stores but those chained stores appear
1556   // to be unchained in the legalization phase. Therefore, do not attempt to
1557   // chain them here. In fact, chaining them here somehow causes the first and
1558   // second store to be reversed which is the exact opposite of the intended
1559   // effect.
1560   if (HasStackArgs) {
1561     SmallVector<SDValue, 8> MemOpChains;
1562     for (; AI != AE; AI++) {
1563       CCValAssign &VA = ArgLocs[AI];
1564       SDValue Arg = OutVals[AI];
1565
1566       assert(VA.isMemLoc());
1567
1568       // SP points to one stack slot further so add one to adjust it.
1569       SDValue PtrOff = DAG.getNode(
1570           ISD::ADD, DL, getPointerTy(DAG.getDataLayout()),
1571           DAG.getRegister(AVR::SP, getPointerTy(DAG.getDataLayout())),
1572           DAG.getIntPtrConstant(VA.getLocMemOffset() + 1, DL));
1573
1574       MemOpChains.push_back(
1575           DAG.getStore(Chain, DL, Arg, PtrOff,
1576                        MachinePointerInfo::getStack(MF, VA.getLocMemOffset())));
1577     }
1578
1579     if (!MemOpChains.empty())
1580       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1581   }
1582
1583   // Build a sequence of copy-to-reg nodes chained together with token chain and
1584   // flag operands which copy the outgoing args into registers.  The InGlue in
1585   // necessary since all emited instructions must be stuck together.
1586   SDValue InGlue;
1587   for (auto Reg : RegsToPass) {
1588     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, InGlue);
1589     InGlue = Chain.getValue(1);
1590   }
1591
1592   // Returns a chain & a flag for retval copy to use.
1593   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1594   SmallVector<SDValue, 8> Ops;
1595   Ops.push_back(Chain);
1596   Ops.push_back(Callee);
1597
1598   // Add argument registers to the end of the list so that they are known live
1599   // into the call.
1600   for (auto Reg : RegsToPass) {
1601     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
1602   }
1603
1604   // The zero register (usually R1) must be passed as an implicit register so
1605   // that this register is correctly zeroed in interrupts.
1606   Ops.push_back(DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8));
1607
1608   // Add a register mask operand representing the call-preserved registers.
1609   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1610   const uint32_t *Mask =
1611       TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
1612   assert(Mask && "Missing call preserved mask for calling convention");
1613   Ops.push_back(DAG.getRegisterMask(Mask));
1614
1615   if (InGlue.getNode()) {
1616     Ops.push_back(InGlue);
1617   }
1618
1619   Chain = DAG.getNode(AVRISD::CALL, DL, NodeTys, Ops);
1620   InGlue = Chain.getValue(1);
1621
1622   // Create the CALLSEQ_END node.
1623   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, DL);
1624
1625   if (!Ins.empty()) {
1626     InGlue = Chain.getValue(1);
1627   }
1628
1629   // Handle result values, copying them out of physregs into vregs that we
1630   // return.
1631   return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, DL, DAG,
1632                          InVals);
1633 }
1634
1635 /// Lower the result values of a call into the
1636 /// appropriate copies out of appropriate physical registers.
1637 ///
1638 SDValue AVRTargetLowering::LowerCallResult(
1639     SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
1640     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1641     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1642
1643   // Assign locations to each value returned by this call.
1644   SmallVector<CCValAssign, 16> RVLocs;
1645   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1646                  *DAG.getContext());
1647
1648   // Handle runtime calling convs.
1649   if (CallConv == CallingConv::AVR_BUILTIN) {
1650     CCInfo.AnalyzeCallResult(Ins, RetCC_AVR_BUILTIN);
1651   } else {
1652     analyzeReturnValues(Ins, CCInfo, Subtarget.hasTinyEncoding());
1653   }
1654
1655   // Copy all of the result registers out of their specified physreg.
1656   for (CCValAssign const &RVLoc : RVLocs) {
1657     Chain = DAG.getCopyFromReg(Chain, dl, RVLoc.getLocReg(), RVLoc.getValVT(),
1658                                InGlue)
1659                 .getValue(1);
1660     InGlue = Chain.getValue(2);
1661     InVals.push_back(Chain.getValue(0));
1662   }
1663
1664   return Chain;
1665 }
1666
1667 //===----------------------------------------------------------------------===//
1668 //               Return Value Calling Convention Implementation
1669 //===----------------------------------------------------------------------===//
1670
1671 bool AVRTargetLowering::CanLowerReturn(
1672     CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
1673     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
1674   if (CallConv == CallingConv::AVR_BUILTIN) {
1675     SmallVector<CCValAssign, 16> RVLocs;
1676     CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
1677     return CCInfo.CheckReturn(Outs, RetCC_AVR_BUILTIN);
1678   }
1679
1680   unsigned TotalBytes = getTotalArgumentsSizeInBytes(Outs);
1681   return TotalBytes <= (unsigned)(Subtarget.hasTinyEncoding() ? 4 : 8);
1682 }
1683
1684 SDValue
1685 AVRTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
1686                                bool isVarArg,
1687                                const SmallVectorImpl<ISD::OutputArg> &Outs,
1688                                const SmallVectorImpl<SDValue> &OutVals,
1689                                const SDLoc &dl, SelectionDAG &DAG) const {
1690   // CCValAssign - represent the assignment of the return value to locations.
1691   SmallVector<CCValAssign, 16> RVLocs;
1692
1693   // CCState - Info about the registers and stack slot.
1694   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1695                  *DAG.getContext());
1696
1697   MachineFunction &MF = DAG.getMachineFunction();
1698
1699   // Analyze return values.
1700   if (CallConv == CallingConv::AVR_BUILTIN) {
1701     CCInfo.AnalyzeReturn(Outs, RetCC_AVR_BUILTIN);
1702   } else {
1703     analyzeReturnValues(Outs, CCInfo, Subtarget.hasTinyEncoding());
1704   }
1705
1706   SDValue Glue;
1707   SmallVector<SDValue, 4> RetOps(1, Chain);
1708   // Copy the result values into the output registers.
1709   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
1710     CCValAssign &VA = RVLocs[i];
1711     assert(VA.isRegLoc() && "Can only return in registers!");
1712
1713     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Glue);
1714
1715     // Guarantee that all emitted copies are stuck together with flags.
1716     Glue = Chain.getValue(1);
1717     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
1718   }
1719
1720   // Don't emit the ret/reti instruction when the naked attribute is present in
1721   // the function being compiled.
1722   if (MF.getFunction().getAttributes().hasFnAttr(Attribute::Naked)) {
1723     return Chain;
1724   }
1725
1726   const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
1727
1728   if (!AFI->isInterruptOrSignalHandler()) {
1729     // The return instruction has an implicit zero register operand: it must
1730     // contain zero on return.
1731     // This is not needed in interrupts however, where the zero register is
1732     // handled specially (only pushed/popped when needed).
1733     RetOps.push_back(DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8));
1734   }
1735
1736   unsigned RetOpc =
1737       AFI->isInterruptOrSignalHandler() ? AVRISD::RETI_GLUE : AVRISD::RET_GLUE;
1738
1739   RetOps[0] = Chain; // Update chain.
1740
1741   if (Glue.getNode()) {
1742     RetOps.push_back(Glue);
1743   }
1744
1745   return DAG.getNode(RetOpc, dl, MVT::Other, RetOps);
1746 }
1747
1748 //===----------------------------------------------------------------------===//
1749 //  Custom Inserters
1750 //===----------------------------------------------------------------------===//
1751
1752 MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
1753                                                   MachineBasicBlock *BB,
1754                                                   bool Tiny) const {
1755   unsigned Opc;
1756   const TargetRegisterClass *RC;
1757   bool HasRepeatedOperand = false;
1758   MachineFunction *F = BB->getParent();
1759   MachineRegisterInfo &RI = F->getRegInfo();
1760   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
1761   DebugLoc dl = MI.getDebugLoc();
1762
1763   switch (MI.getOpcode()) {
1764   default:
1765     llvm_unreachable("Invalid shift opcode!");
1766   case AVR::Lsl8:
1767     Opc = AVR::ADDRdRr; // LSL is an alias of ADD Rd, Rd
1768     RC = &AVR::GPR8RegClass;
1769     HasRepeatedOperand = true;
1770     break;
1771   case AVR::Lsl16:
1772     Opc = AVR::LSLWRd;
1773     RC = &AVR::DREGSRegClass;
1774     break;
1775   case AVR::Asr8:
1776     Opc = AVR::ASRRd;
1777     RC = &AVR::GPR8RegClass;
1778     break;
1779   case AVR::Asr16:
1780     Opc = AVR::ASRWRd;
1781     RC = &AVR::DREGSRegClass;
1782     break;
1783   case AVR::Lsr8:
1784     Opc = AVR::LSRRd;
1785     RC = &AVR::GPR8RegClass;
1786     break;
1787   case AVR::Lsr16:
1788     Opc = AVR::LSRWRd;
1789     RC = &AVR::DREGSRegClass;
1790     break;
1791   case AVR::Rol8:
1792     Opc = Tiny ? AVR::ROLBRdR17 : AVR::ROLBRdR1;
1793     RC = &AVR::GPR8RegClass;
1794     break;
1795   case AVR::Rol16:
1796     Opc = AVR::ROLWRd;
1797     RC = &AVR::DREGSRegClass;
1798     break;
1799   case AVR::Ror8:
1800     Opc = AVR::RORBRd;
1801     RC = &AVR::GPR8RegClass;
1802     break;
1803   case AVR::Ror16:
1804     Opc = AVR::RORWRd;
1805     RC = &AVR::DREGSRegClass;
1806     break;
1807   }
1808
1809   const BasicBlock *LLVM_BB = BB->getBasicBlock();
1810
1811   MachineFunction::iterator I;
1812   for (I = BB->getIterator(); I != F->end() && &(*I) != BB; ++I)
1813     ;
1814   if (I != F->end())
1815     ++I;
1816
1817   // Create loop block.
1818   MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB);
1819   MachineBasicBlock *CheckBB = F->CreateMachineBasicBlock(LLVM_BB);
1820   MachineBasicBlock *RemBB = F->CreateMachineBasicBlock(LLVM_BB);
1821
1822   F->insert(I, LoopBB);
1823   F->insert(I, CheckBB);
1824   F->insert(I, RemBB);
1825
1826   // Update machine-CFG edges by transferring all successors of the current
1827   // block to the block containing instructions after shift.
1828   RemBB->splice(RemBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)),
1829                 BB->end());
1830   RemBB->transferSuccessorsAndUpdatePHIs(BB);
1831
1832   // Add edges BB => LoopBB => CheckBB => RemBB, CheckBB => LoopBB.
1833   BB->addSuccessor(CheckBB);
1834   LoopBB->addSuccessor(CheckBB);
1835   CheckBB->addSuccessor(LoopBB);
1836   CheckBB->addSuccessor(RemBB);
1837
1838   Register ShiftAmtReg = RI.createVirtualRegister(&AVR::GPR8RegClass);
1839   Register ShiftAmtReg2 = RI.createVirtualRegister(&AVR::GPR8RegClass);
1840   Register ShiftReg = RI.createVirtualRegister(RC);
1841   Register ShiftReg2 = RI.createVirtualRegister(RC);
1842   Register ShiftAmtSrcReg = MI.getOperand(2).getReg();
1843   Register SrcReg = MI.getOperand(1).getReg();
1844   Register DstReg = MI.getOperand(0).getReg();
1845
1846   // BB:
1847   // rjmp CheckBB
1848   BuildMI(BB, dl, TII.get(AVR::RJMPk)).addMBB(CheckBB);
1849
1850   // LoopBB:
1851   // ShiftReg2 = shift ShiftReg
1852   auto ShiftMI = BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2).addReg(ShiftReg);
1853   if (HasRepeatedOperand)
1854     ShiftMI.addReg(ShiftReg);
1855
1856   // CheckBB:
1857   // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB]
1858   // ShiftAmt = phi [%N,      BB], [%ShiftAmt2, LoopBB]
1859   // DestReg  = phi [%SrcReg, BB], [%ShiftReg,  LoopBB]
1860   // ShiftAmt2 = ShiftAmt - 1;
1861   // if (ShiftAmt2 >= 0) goto LoopBB;
1862   BuildMI(CheckBB, dl, TII.get(AVR::PHI), ShiftReg)
1863       .addReg(SrcReg)
1864       .addMBB(BB)
1865       .addReg(ShiftReg2)
1866       .addMBB(LoopBB);
1867   BuildMI(CheckBB, dl, TII.get(AVR::PHI), ShiftAmtReg)
1868       .addReg(ShiftAmtSrcReg)
1869       .addMBB(BB)
1870       .addReg(ShiftAmtReg2)
1871       .addMBB(LoopBB);
1872   BuildMI(CheckBB, dl, TII.get(AVR::PHI), DstReg)
1873       .addReg(SrcReg)
1874       .addMBB(BB)
1875       .addReg(ShiftReg2)
1876       .addMBB(LoopBB);
1877
1878   BuildMI(CheckBB, dl, TII.get(AVR::DECRd), ShiftAmtReg2).addReg(ShiftAmtReg);
1879   BuildMI(CheckBB, dl, TII.get(AVR::BRPLk)).addMBB(LoopBB);
1880
1881   MI.eraseFromParent(); // The pseudo instruction is gone now.
1882   return RemBB;
1883 }
1884
1885 // Do a multibyte AVR shift. Insert shift instructions and put the output
1886 // registers in the Regs array.
1887 // Because AVR does not have a normal shift instruction (only a single bit shift
1888 // instruction), we have to emulate this behavior with other instructions.
1889 // It first tries large steps (moving registers around) and then smaller steps
1890 // like single bit shifts.
1891 // Large shifts actually reduce the number of shifted registers, so the below
1892 // algorithms have to work independently of the number of registers that are
1893 // shifted.
1894 // For more information and background, see this blogpost:
1895 // https://aykevl.nl/2021/02/avr-bitshift
1896 static void insertMultibyteShift(MachineInstr &MI, MachineBasicBlock *BB,
1897                                  MutableArrayRef<std::pair<Register, int>> Regs,
1898                                  ISD::NodeType Opc, int64_t ShiftAmt) {
1899   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
1900   const AVRSubtarget &STI = BB->getParent()->getSubtarget<AVRSubtarget>();
1901   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
1902   const DebugLoc &dl = MI.getDebugLoc();
1903
1904   const bool ShiftLeft = Opc == ISD::SHL;
1905   const bool ArithmeticShift = Opc == ISD::SRA;
1906
1907   // Zero a register, for use in later operations.
1908   Register ZeroReg = MRI.createVirtualRegister(&AVR::GPR8RegClass);
1909   BuildMI(*BB, MI, dl, TII.get(AVR::COPY), ZeroReg)
1910       .addReg(STI.getZeroRegister());
1911
1912   // Do a shift modulo 6 or 7. This is a bit more complicated than most shifts
1913   // and is hard to compose with the rest, so these are special cased.
1914   // The basic idea is to shift one or two bits in the opposite direction and
1915   // then move registers around to get the correct end result.
1916   if (ShiftLeft && (ShiftAmt % 8) >= 6) {
1917     // Left shift modulo 6 or 7.
1918
1919     // Create a slice of the registers we're going to modify, to ease working
1920     // with them.
1921     size_t ShiftRegsOffset = ShiftAmt / 8;
1922     size_t ShiftRegsSize = Regs.size() - ShiftRegsOffset;
1923     MutableArrayRef<std::pair<Register, int>> ShiftRegs =
1924         Regs.slice(ShiftRegsOffset, ShiftRegsSize);
1925
1926     // Shift one to the right, keeping the least significant bit as the carry
1927     // bit.
1928     insertMultibyteShift(MI, BB, ShiftRegs, ISD::SRL, 1);
1929
1930     // Rotate the least significant bit from the carry bit into a new register
1931     // (that starts out zero).
1932     Register LowByte = MRI.createVirtualRegister(&AVR::GPR8RegClass);
1933     BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), LowByte).addReg(ZeroReg);
1934
1935     // Shift one more to the right if this is a modulo-6 shift.
1936     if (ShiftAmt % 8 == 6) {
1937       insertMultibyteShift(MI, BB, ShiftRegs, ISD::SRL, 1);
1938       Register NewLowByte = MRI.createVirtualRegister(&AVR::GPR8RegClass);
1939       BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), NewLowByte).addReg(LowByte);
1940       LowByte = NewLowByte;
1941     }
1942
1943     // Move all registers to the left, zeroing the bottom registers as needed.
1944     for (size_t I = 0; I < Regs.size(); I++) {
1945       int ShiftRegsIdx = I + 1;
1946       if (ShiftRegsIdx < (int)ShiftRegs.size()) {
1947         Regs[I] = ShiftRegs[ShiftRegsIdx];
1948       } else if (ShiftRegsIdx == (int)ShiftRegs.size()) {
1949         Regs[I] = std::pair(LowByte, 0);
1950       } else {
1951         Regs[I] = std::pair(ZeroReg, 0);
1952       }
1953     }
1954
1955     return;
1956   }
1957
1958   // Right shift modulo 6 or 7.
1959   if (!ShiftLeft && (ShiftAmt % 8) >= 6) {
1960     // Create a view on the registers we're going to modify, to ease working
1961     // with them.
1962     size_t ShiftRegsSize = Regs.size() - (ShiftAmt / 8);
1963     MutableArrayRef<std::pair<Register, int>> ShiftRegs =
1964         Regs.slice(0, ShiftRegsSize);
1965
1966     // Shift one to the left.
1967     insertMultibyteShift(MI, BB, ShiftRegs, ISD::SHL, 1);
1968
1969     // Sign or zero extend the most significant register into a new register.
1970     // The HighByte is the byte that still has one (or two) bits from the
1971     // original value. The ExtByte is purely a zero/sign extend byte (all bits
1972     // are either 0 or 1).
1973     Register HighByte = MRI.createVirtualRegister(&AVR::GPR8RegClass);
1974     Register ExtByte = 0;
1975     if (ArithmeticShift) {
1976       // Sign-extend bit that was shifted out last.
1977       BuildMI(*BB, MI, dl, TII.get(AVR::SBCRdRr), HighByte)
1978           .addReg(HighByte, RegState::Undef)
1979           .addReg(HighByte, RegState::Undef);
1980       ExtByte = HighByte;
1981       // The highest bit of the original value is the same as the zero-extend
1982       // byte, so HighByte and ExtByte are the same.
1983     } else {
1984       // Use the zero register for zero extending.
1985       ExtByte = ZeroReg;
1986       // Rotate most significant bit into a new register (that starts out zero).
1987       BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), HighByte)
1988           .addReg(ExtByte)
1989           .addReg(ExtByte);
1990     }
1991
1992     // Shift one more to the left for modulo 6 shifts.
1993     if (ShiftAmt % 8 == 6) {
1994       insertMultibyteShift(MI, BB, ShiftRegs, ISD::SHL, 1);
1995       // Shift the topmost bit into the HighByte.
1996       Register NewExt = MRI.createVirtualRegister(&AVR::GPR8RegClass);
1997       BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), NewExt)
1998           .addReg(HighByte)
1999           .addReg(HighByte);
2000       HighByte = NewExt;
2001     }
2002
2003     // Move all to the right, while sign or zero extending.
2004     for (int I = Regs.size() - 1; I >= 0; I--) {
2005       int ShiftRegsIdx = I - (Regs.size() - ShiftRegs.size()) - 1;
2006       if (ShiftRegsIdx >= 0) {
2007         Regs[I] = ShiftRegs[ShiftRegsIdx];
2008       } else if (ShiftRegsIdx == -1) {
2009         Regs[I] = std::pair(HighByte, 0);
2010       } else {
2011         Regs[I] = std::pair(ExtByte, 0);
2012       }
2013     }
2014
2015     return;
2016   }
2017
2018   // For shift amounts of at least one register, simply rename the registers and
2019   // zero the bottom registers.
2020   while (ShiftLeft && ShiftAmt >= 8) {
2021     // Move all registers one to the left.
2022     for (size_t I = 0; I < Regs.size() - 1; I++) {
2023       Regs[I] = Regs[I + 1];
2024     }
2025
2026     // Zero the least significant register.
2027     Regs[Regs.size() - 1] = std::pair(ZeroReg, 0);
2028
2029     // Continue shifts with the leftover registers.
2030     Regs = Regs.drop_back(1);
2031
2032     ShiftAmt -= 8;
2033   }
2034
2035   // And again, the same for right shifts.
2036   Register ShrExtendReg = 0;
2037   if (!ShiftLeft && ShiftAmt >= 8) {
2038     if (ArithmeticShift) {
2039       // Sign extend the most significant register into ShrExtendReg.
2040       ShrExtendReg = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2041       Register Tmp = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2042       BuildMI(*BB, MI, dl, TII.get(AVR::ADDRdRr), Tmp)
2043           .addReg(Regs[0].first, 0, Regs[0].second)
2044           .addReg(Regs[0].first, 0, Regs[0].second);
2045       BuildMI(*BB, MI, dl, TII.get(AVR::SBCRdRr), ShrExtendReg)
2046           .addReg(Tmp)
2047           .addReg(Tmp);
2048     } else {
2049       ShrExtendReg = ZeroReg;
2050     }
2051     for (; ShiftAmt >= 8; ShiftAmt -= 8) {
2052       // Move all registers one to the right.
2053       for (size_t I = Regs.size() - 1; I != 0; I--) {
2054         Regs[I] = Regs[I - 1];
2055       }
2056
2057       // Zero or sign extend the most significant register.
2058       Regs[0] = std::pair(ShrExtendReg, 0);
2059
2060       // Continue shifts with the leftover registers.
2061       Regs = Regs.drop_front(1);
2062     }
2063   }
2064
2065   // The bigger shifts are already handled above.
2066   assert((ShiftAmt < 8) && "Unexpect shift amount");
2067
2068   // Shift by four bits, using a complicated swap/eor/andi/eor sequence.
2069   // It only works for logical shifts because the bits shifted in are all
2070   // zeroes.
2071   // To shift a single byte right, it produces code like this:
2072   //   swap r0
2073   //   andi r0, 0x0f
2074   // For a two-byte (16-bit) shift, it adds the following instructions to shift
2075   // the upper byte into the lower byte:
2076   //   swap r1
2077   //   eor r0, r1
2078   //   andi r1, 0x0f
2079   //   eor r0, r1
2080   // For bigger shifts, it repeats the above sequence. For example, for a 3-byte
2081   // (24-bit) shift it adds:
2082   //   swap r2
2083   //   eor r1, r2
2084   //   andi r2, 0x0f
2085   //   eor r1, r2
2086   if (!ArithmeticShift && ShiftAmt >= 4) {
2087     Register Prev = 0;
2088     for (size_t I = 0; I < Regs.size(); I++) {
2089       size_t Idx = ShiftLeft ? I : Regs.size() - I - 1;
2090       Register SwapReg = MRI.createVirtualRegister(&AVR::LD8RegClass);
2091       BuildMI(*BB, MI, dl, TII.get(AVR::SWAPRd), SwapReg)
2092           .addReg(Regs[Idx].first, 0, Regs[Idx].second);
2093       if (I != 0) {
2094         Register R = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2095         BuildMI(*BB, MI, dl, TII.get(AVR::EORRdRr), R)
2096             .addReg(Prev)
2097             .addReg(SwapReg);
2098         Prev = R;
2099       }
2100       Register AndReg = MRI.createVirtualRegister(&AVR::LD8RegClass);
2101       BuildMI(*BB, MI, dl, TII.get(AVR::ANDIRdK), AndReg)
2102           .addReg(SwapReg)
2103           .addImm(ShiftLeft ? 0xf0 : 0x0f);
2104       if (I != 0) {
2105         Register R = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2106         BuildMI(*BB, MI, dl, TII.get(AVR::EORRdRr), R)
2107             .addReg(Prev)
2108             .addReg(AndReg);
2109         size_t PrevIdx = ShiftLeft ? Idx - 1 : Idx + 1;
2110         Regs[PrevIdx] = std::pair(R, 0);
2111       }
2112       Prev = AndReg;
2113       Regs[Idx] = std::pair(AndReg, 0);
2114     }
2115     ShiftAmt -= 4;
2116   }
2117
2118   // Shift by one. This is the fallback that always works, and the shift
2119   // operation that is used for 1, 2, and 3 bit shifts.
2120   while (ShiftLeft && ShiftAmt) {
2121     // Shift one to the left.
2122     for (ssize_t I = Regs.size() - 1; I >= 0; I--) {
2123       Register Out = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2124       Register In = Regs[I].first;
2125       Register InSubreg = Regs[I].second;
2126       if (I == (ssize_t)Regs.size() - 1) { // first iteration
2127         BuildMI(*BB, MI, dl, TII.get(AVR::ADDRdRr), Out)
2128             .addReg(In, 0, InSubreg)
2129             .addReg(In, 0, InSubreg);
2130       } else {
2131         BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), Out)
2132             .addReg(In, 0, InSubreg)
2133             .addReg(In, 0, InSubreg);
2134       }
2135       Regs[I] = std::pair(Out, 0);
2136     }
2137     ShiftAmt--;
2138   }
2139   while (!ShiftLeft && ShiftAmt) {
2140     // Shift one to the right.
2141     for (size_t I = 0; I < Regs.size(); I++) {
2142       Register Out = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2143       Register In = Regs[I].first;
2144       Register InSubreg = Regs[I].second;
2145       if (I == 0) {
2146         unsigned Opc = ArithmeticShift ? AVR::ASRRd : AVR::LSRRd;
2147         BuildMI(*BB, MI, dl, TII.get(Opc), Out).addReg(In, 0, InSubreg);
2148       } else {
2149         BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), Out).addReg(In, 0, InSubreg);
2150       }
2151       Regs[I] = std::pair(Out, 0);
2152     }
2153     ShiftAmt--;
2154   }
2155
2156   if (ShiftAmt != 0) {
2157     llvm_unreachable("don't know how to shift!"); // sanity check
2158   }
2159 }
2160
2161 // Do a wide (32-bit) shift.
2162 MachineBasicBlock *
2163 AVRTargetLowering::insertWideShift(MachineInstr &MI,
2164                                    MachineBasicBlock *BB) const {
2165   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2166   const DebugLoc &dl = MI.getDebugLoc();
2167
2168   // How much to shift to the right (meaning: a negative number indicates a left
2169   // shift).
2170   int64_t ShiftAmt = MI.getOperand(4).getImm();
2171   ISD::NodeType Opc;
2172   switch (MI.getOpcode()) {
2173   case AVR::Lsl32:
2174     Opc = ISD::SHL;
2175     break;
2176   case AVR::Lsr32:
2177     Opc = ISD::SRL;
2178     break;
2179   case AVR::Asr32:
2180     Opc = ISD::SRA;
2181     break;
2182   }
2183
2184   // Read the input registers, with the most significant register at index 0.
2185   std::array<std::pair<Register, int>, 4> Registers = {
2186       std::pair(MI.getOperand(3).getReg(), AVR::sub_hi),
2187       std::pair(MI.getOperand(3).getReg(), AVR::sub_lo),
2188       std::pair(MI.getOperand(2).getReg(), AVR::sub_hi),
2189       std::pair(MI.getOperand(2).getReg(), AVR::sub_lo),
2190   };
2191
2192   // Do the shift. The registers are modified in-place.
2193   insertMultibyteShift(MI, BB, Registers, Opc, ShiftAmt);
2194
2195   // Combine the 8-bit registers into 16-bit register pairs.
2196   // This done either from LSB to MSB or from MSB to LSB, depending on the
2197   // shift. It's an optimization so that the register allocator will use the
2198   // fewest movs possible (which order we use isn't a correctness issue, just an
2199   // optimization issue).
2200   //   - lsl prefers starting from the most significant byte (2nd case).
2201   //   - lshr prefers starting from the least significant byte (1st case).
2202   //   - for ashr it depends on the number of shifted bytes.
2203   // Some shift operations still don't get the most optimal mov sequences even
2204   // with this distinction. TODO: figure out why and try to fix it (but we're
2205   // already equal to or faster than avr-gcc in all cases except ashr 8).
2206   if (Opc != ISD::SHL &&
2207       (Opc != ISD::SRA || (ShiftAmt < 16 || ShiftAmt >= 22))) {
2208     // Use the resulting registers starting with the least significant byte.
2209     BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(0).getReg())
2210         .addReg(Registers[3].first, 0, Registers[3].second)
2211         .addImm(AVR::sub_lo)
2212         .addReg(Registers[2].first, 0, Registers[2].second)
2213         .addImm(AVR::sub_hi);
2214     BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(1).getReg())
2215         .addReg(Registers[1].first, 0, Registers[1].second)
2216         .addImm(AVR::sub_lo)
2217         .addReg(Registers[0].first, 0, Registers[0].second)
2218         .addImm(AVR::sub_hi);
2219   } else {
2220     // Use the resulting registers starting with the most significant byte.
2221     BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(1).getReg())
2222         .addReg(Registers[0].first, 0, Registers[0].second)
2223         .addImm(AVR::sub_hi)
2224         .addReg(Registers[1].first, 0, Registers[1].second)
2225         .addImm(AVR::sub_lo);
2226     BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(0).getReg())
2227         .addReg(Registers[2].first, 0, Registers[2].second)
2228         .addImm(AVR::sub_hi)
2229         .addReg(Registers[3].first, 0, Registers[3].second)
2230         .addImm(AVR::sub_lo);
2231   }
2232
2233   // Remove the pseudo instruction.
2234   MI.eraseFromParent();
2235   return BB;
2236 }
2237
2238 static bool isCopyMulResult(MachineBasicBlock::iterator const &I) {
2239   if (I->getOpcode() == AVR::COPY) {
2240     Register SrcReg = I->getOperand(1).getReg();
2241     return (SrcReg == AVR::R0 || SrcReg == AVR::R1);
2242   }
2243
2244   return false;
2245 }
2246
2247 // The mul instructions wreak havock on our zero_reg R1. We need to clear it
2248 // after the result has been evacuated. This is probably not the best way to do
2249 // it, but it works for now.
2250 MachineBasicBlock *AVRTargetLowering::insertMul(MachineInstr &MI,
2251                                                 MachineBasicBlock *BB) const {
2252   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2253   MachineBasicBlock::iterator I(MI);
2254   ++I; // in any case insert *after* the mul instruction
2255   if (isCopyMulResult(I))
2256     ++I;
2257   if (isCopyMulResult(I))
2258     ++I;
2259   BuildMI(*BB, I, MI.getDebugLoc(), TII.get(AVR::EORRdRr), AVR::R1)
2260       .addReg(AVR::R1)
2261       .addReg(AVR::R1);
2262   return BB;
2263 }
2264
2265 // Insert a read from the zero register.
2266 MachineBasicBlock *
2267 AVRTargetLowering::insertCopyZero(MachineInstr &MI,
2268                                   MachineBasicBlock *BB) const {
2269   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2270   MachineBasicBlock::iterator I(MI);
2271   BuildMI(*BB, I, MI.getDebugLoc(), TII.get(AVR::COPY))
2272       .add(MI.getOperand(0))
2273       .addReg(Subtarget.getZeroRegister());
2274   MI.eraseFromParent();
2275   return BB;
2276 }
2277
2278 // Lower atomicrmw operation to disable interrupts, do operation, and restore
2279 // interrupts. This works because all AVR microcontrollers are single core.
2280 MachineBasicBlock *AVRTargetLowering::insertAtomicArithmeticOp(
2281     MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode, int Width) const {
2282   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
2283   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2284   MachineBasicBlock::iterator I(MI);
2285   DebugLoc dl = MI.getDebugLoc();
2286
2287   // Example instruction sequence, for an atomic 8-bit add:
2288   //   ldi r25, 5
2289   //   in r0, SREG
2290   //   cli
2291   //   ld r24, X
2292   //   add r25, r24
2293   //   st X, r25
2294   //   out SREG, r0
2295
2296   const TargetRegisterClass *RC =
2297       (Width == 8) ? &AVR::GPR8RegClass : &AVR::DREGSRegClass;
2298   unsigned LoadOpcode = (Width == 8) ? AVR::LDRdPtr : AVR::LDWRdPtr;
2299   unsigned StoreOpcode = (Width == 8) ? AVR::STPtrRr : AVR::STWPtrRr;
2300
2301   // Disable interrupts.
2302   BuildMI(*BB, I, dl, TII.get(AVR::INRdA), Subtarget.getTmpRegister())
2303       .addImm(Subtarget.getIORegSREG());
2304   BuildMI(*BB, I, dl, TII.get(AVR::BCLRs)).addImm(7);
2305
2306   // Load the original value.
2307   BuildMI(*BB, I, dl, TII.get(LoadOpcode), MI.getOperand(0).getReg())
2308       .add(MI.getOperand(1));
2309
2310   // Do the arithmetic operation.
2311   Register Result = MRI.createVirtualRegister(RC);
2312   BuildMI(*BB, I, dl, TII.get(Opcode), Result)
2313       .addReg(MI.getOperand(0).getReg())
2314       .add(MI.getOperand(2));
2315
2316   // Store the result.
2317   BuildMI(*BB, I, dl, TII.get(StoreOpcode))
2318       .add(MI.getOperand(1))
2319       .addReg(Result);
2320
2321   // Restore interrupts.
2322   BuildMI(*BB, I, dl, TII.get(AVR::OUTARr))
2323       .addImm(Subtarget.getIORegSREG())
2324       .addReg(Subtarget.getTmpRegister());
2325
2326   // Remove the pseudo instruction.
2327   MI.eraseFromParent();
2328   return BB;
2329 }
2330
2331 MachineBasicBlock *
2332 AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2333                                                MachineBasicBlock *MBB) const {
2334   int Opc = MI.getOpcode();
2335   const AVRSubtarget &STI = MBB->getParent()->getSubtarget<AVRSubtarget>();
2336
2337   // Pseudo shift instructions with a non constant shift amount are expanded
2338   // into a loop.
2339   switch (Opc) {
2340   case AVR::Lsl8:
2341   case AVR::Lsl16:
2342   case AVR::Lsr8:
2343   case AVR::Lsr16:
2344   case AVR::Rol8:
2345   case AVR::Rol16:
2346   case AVR::Ror8:
2347   case AVR::Ror16:
2348   case AVR::Asr8:
2349   case AVR::Asr16:
2350     return insertShift(MI, MBB, STI.hasTinyEncoding());
2351   case AVR::Lsl32:
2352   case AVR::Lsr32:
2353   case AVR::Asr32:
2354     return insertWideShift(MI, MBB);
2355   case AVR::MULRdRr:
2356   case AVR::MULSRdRr:
2357     return insertMul(MI, MBB);
2358   case AVR::CopyZero:
2359     return insertCopyZero(MI, MBB);
2360   case AVR::AtomicLoadAdd8:
2361     return insertAtomicArithmeticOp(MI, MBB, AVR::ADDRdRr, 8);
2362   case AVR::AtomicLoadAdd16:
2363     return insertAtomicArithmeticOp(MI, MBB, AVR::ADDWRdRr, 16);
2364   case AVR::AtomicLoadSub8:
2365     return insertAtomicArithmeticOp(MI, MBB, AVR::SUBRdRr, 8);
2366   case AVR::AtomicLoadSub16:
2367     return insertAtomicArithmeticOp(MI, MBB, AVR::SUBWRdRr, 16);
2368   case AVR::AtomicLoadAnd8:
2369     return insertAtomicArithmeticOp(MI, MBB, AVR::ANDRdRr, 8);
2370   case AVR::AtomicLoadAnd16:
2371     return insertAtomicArithmeticOp(MI, MBB, AVR::ANDWRdRr, 16);
2372   case AVR::AtomicLoadOr8:
2373     return insertAtomicArithmeticOp(MI, MBB, AVR::ORRdRr, 8);
2374   case AVR::AtomicLoadOr16:
2375     return insertAtomicArithmeticOp(MI, MBB, AVR::ORWRdRr, 16);
2376   case AVR::AtomicLoadXor8:
2377     return insertAtomicArithmeticOp(MI, MBB, AVR::EORRdRr, 8);
2378   case AVR::AtomicLoadXor16:
2379     return insertAtomicArithmeticOp(MI, MBB, AVR::EORWRdRr, 16);
2380   }
2381
2382   assert((Opc == AVR::Select16 || Opc == AVR::Select8) &&
2383          "Unexpected instr type to insert");
2384
2385   const AVRInstrInfo &TII = (const AVRInstrInfo &)*MI.getParent()
2386                                 ->getParent()
2387                                 ->getSubtarget()
2388                                 .getInstrInfo();
2389   DebugLoc dl = MI.getDebugLoc();
2390
2391   // To "insert" a SELECT instruction, we insert the diamond
2392   // control-flow pattern. The incoming instruction knows the
2393   // destination vreg to set, the condition code register to branch
2394   // on, the true/false values to select between, and a branch opcode
2395   // to use.
2396
2397   MachineFunction *MF = MBB->getParent();
2398   const BasicBlock *LLVM_BB = MBB->getBasicBlock();
2399   MachineBasicBlock *FallThrough = MBB->getFallThrough();
2400
2401   // If the current basic block falls through to another basic block,
2402   // we must insert an unconditional branch to the fallthrough destination
2403   // if we are to insert basic blocks at the prior fallthrough point.
2404   if (FallThrough != nullptr) {
2405     BuildMI(MBB, dl, TII.get(AVR::RJMPk)).addMBB(FallThrough);
2406   }
2407
2408   MachineBasicBlock *trueMBB = MF->CreateMachineBasicBlock(LLVM_BB);
2409   MachineBasicBlock *falseMBB = MF->CreateMachineBasicBlock(LLVM_BB);
2410
2411   MachineFunction::iterator I;
2412   for (I = MF->begin(); I != MF->end() && &(*I) != MBB; ++I)
2413     ;
2414   if (I != MF->end())
2415     ++I;
2416   MF->insert(I, trueMBB);
2417   MF->insert(I, falseMBB);
2418
2419   // Set the call frame size on entry to the new basic blocks.
2420   unsigned CallFrameSize = TII.getCallFrameSizeAt(MI);
2421   trueMBB->setCallFrameSize(CallFrameSize);
2422   falseMBB->setCallFrameSize(CallFrameSize);
2423
2424   // Transfer remaining instructions and all successors of the current
2425   // block to the block which will contain the Phi node for the
2426   // select.
2427   trueMBB->splice(trueMBB->begin(), MBB,
2428                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
2429   trueMBB->transferSuccessorsAndUpdatePHIs(MBB);
2430
2431   AVRCC::CondCodes CC = (AVRCC::CondCodes)MI.getOperand(3).getImm();
2432   BuildMI(MBB, dl, TII.getBrCond(CC)).addMBB(trueMBB);
2433   BuildMI(MBB, dl, TII.get(AVR::RJMPk)).addMBB(falseMBB);
2434   MBB->addSuccessor(falseMBB);
2435   MBB->addSuccessor(trueMBB);
2436
2437   // Unconditionally flow back to the true block
2438   BuildMI(falseMBB, dl, TII.get(AVR::RJMPk)).addMBB(trueMBB);
2439   falseMBB->addSuccessor(trueMBB);
2440
2441   // Set up the Phi node to determine where we came from
2442   BuildMI(*trueMBB, trueMBB->begin(), dl, TII.get(AVR::PHI),
2443           MI.getOperand(0).getReg())
2444       .addReg(MI.getOperand(1).getReg())
2445       .addMBB(MBB)
2446       .addReg(MI.getOperand(2).getReg())
2447       .addMBB(falseMBB);
2448
2449   MI.eraseFromParent(); // The pseudo instruction is gone now.
2450   return trueMBB;
2451 }
2452
2453 //===----------------------------------------------------------------------===//
2454 //  Inline Asm Support
2455 //===----------------------------------------------------------------------===//
2456
2457 AVRTargetLowering::ConstraintType
2458 AVRTargetLowering::getConstraintType(StringRef Constraint) const {
2459   if (Constraint.size() == 1) {
2460     // See http://www.nongnu.org/avr-libc/user-manual/inline_asm.html
2461     switch (Constraint[0]) {
2462     default:
2463       break;
2464     case 'a': // Simple upper registers
2465     case 'b': // Base pointer registers pairs
2466     case 'd': // Upper register
2467     case 'l': // Lower registers
2468     case 'e': // Pointer register pairs
2469     case 'q': // Stack pointer register
2470     case 'r': // Any register
2471     case 'w': // Special upper register pairs
2472       return C_RegisterClass;
2473     case 't': // Temporary register
2474     case 'x':
2475     case 'X': // Pointer register pair X
2476     case 'y':
2477     case 'Y': // Pointer register pair Y
2478     case 'z':
2479     case 'Z': // Pointer register pair Z
2480       return C_Register;
2481     case 'Q': // A memory address based on Y or Z pointer with displacement.
2482       return C_Memory;
2483     case 'G': // Floating point constant
2484     case 'I': // 6-bit positive integer constant
2485     case 'J': // 6-bit negative integer constant
2486     case 'K': // Integer constant (Range: 2)
2487     case 'L': // Integer constant (Range: 0)
2488     case 'M': // 8-bit integer constant
2489     case 'N': // Integer constant (Range: -1)
2490     case 'O': // Integer constant (Range: 8, 16, 24)
2491     case 'P': // Integer constant (Range: 1)
2492     case 'R': // Integer constant (Range: -6 to 5)x
2493       return C_Immediate;
2494     }
2495   }
2496
2497   return TargetLowering::getConstraintType(Constraint);
2498 }
2499
2500 InlineAsm::ConstraintCode
2501 AVRTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
2502   // Not sure if this is actually the right thing to do, but we got to do
2503   // *something* [agnat]
2504   switch (ConstraintCode[0]) {
2505   case 'Q':
2506     return InlineAsm::ConstraintCode::Q;
2507   }
2508   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
2509 }
2510
2511 AVRTargetLowering::ConstraintWeight
2512 AVRTargetLowering::getSingleConstraintMatchWeight(
2513     AsmOperandInfo &info, const char *constraint) const {
2514   ConstraintWeight weight = CW_Invalid;
2515   Value *CallOperandVal = info.CallOperandVal;
2516
2517   // If we don't have a value, we can't do a match,
2518   // but allow it at the lowest weight.
2519   // (this behaviour has been copied from the ARM backend)
2520   if (!CallOperandVal) {
2521     return CW_Default;
2522   }
2523
2524   // Look at the constraint type.
2525   switch (*constraint) {
2526   default:
2527     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
2528     break;
2529   case 'd':
2530   case 'r':
2531   case 'l':
2532     weight = CW_Register;
2533     break;
2534   case 'a':
2535   case 'b':
2536   case 'e':
2537   case 'q':
2538   case 't':
2539   case 'w':
2540   case 'x':
2541   case 'X':
2542   case 'y':
2543   case 'Y':
2544   case 'z':
2545   case 'Z':
2546     weight = CW_SpecificReg;
2547     break;
2548   case 'G':
2549     if (const ConstantFP *C = dyn_cast<ConstantFP>(CallOperandVal)) {
2550       if (C->isZero()) {
2551         weight = CW_Constant;
2552       }
2553     }
2554     break;
2555   case 'I':
2556     if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2557       if (isUInt<6>(C->getZExtValue())) {
2558         weight = CW_Constant;
2559       }
2560     }
2561     break;
2562   case 'J':
2563     if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2564       if ((C->getSExtValue() >= -63) && (C->getSExtValue() <= 0)) {
2565         weight = CW_Constant;
2566       }
2567     }
2568     break;
2569   case 'K':
2570     if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2571       if (C->getZExtValue() == 2) {
2572         weight = CW_Constant;
2573       }
2574     }
2575     break;
2576   case 'L':
2577     if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2578       if (C->getZExtValue() == 0) {
2579         weight = CW_Constant;
2580       }
2581     }
2582     break;
2583   case 'M':
2584     if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2585       if (isUInt<8>(C->getZExtValue())) {
2586         weight = CW_Constant;
2587       }
2588     }
2589     break;
2590   case 'N':
2591     if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2592       if (C->getSExtValue() == -1) {
2593         weight = CW_Constant;
2594       }
2595     }
2596     break;
2597   case 'O':
2598     if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2599       if ((C->getZExtValue() == 8) || (C->getZExtValue() == 16) ||
2600           (C->getZExtValue() == 24)) {
2601         weight = CW_Constant;
2602       }
2603     }
2604     break;
2605   case 'P':
2606     if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2607       if (C->getZExtValue() == 1) {
2608         weight = CW_Constant;
2609       }
2610     }
2611     break;
2612   case 'R':
2613     if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2614       if ((C->getSExtValue() >= -6) && (C->getSExtValue() <= 5)) {
2615         weight = CW_Constant;
2616       }
2617     }
2618     break;
2619   case 'Q':
2620     weight = CW_Memory;
2621     break;
2622   }
2623
2624   return weight;
2625 }
2626
2627 std::pair<unsigned, const TargetRegisterClass *>
2628 AVRTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
2629                                                 StringRef Constraint,
2630                                                 MVT VT) const {
2631   if (Constraint.size() == 1) {
2632     switch (Constraint[0]) {
2633     case 'a': // Simple upper registers r16..r23.
2634       if (VT == MVT::i8)
2635         return std::make_pair(0U, &AVR::LD8loRegClass);
2636       else if (VT == MVT::i16)
2637         return std::make_pair(0U, &AVR::DREGSLD8loRegClass);
2638       break;
2639     case 'b': // Base pointer registers: y, z.
2640       if (VT == MVT::i8 || VT == MVT::i16)
2641         return std::make_pair(0U, &AVR::PTRDISPREGSRegClass);
2642       break;
2643     case 'd': // Upper registers r16..r31.
2644       if (VT == MVT::i8)
2645         return std::make_pair(0U, &AVR::LD8RegClass);
2646       else if (VT == MVT::i16)
2647         return std::make_pair(0U, &AVR::DLDREGSRegClass);
2648       break;
2649     case 'l': // Lower registers r0..r15.
2650       if (VT == MVT::i8)
2651         return std::make_pair(0U, &AVR::GPR8loRegClass);
2652       else if (VT == MVT::i16)
2653         return std::make_pair(0U, &AVR::DREGSloRegClass);
2654       break;
2655     case 'e': // Pointer register pairs: x, y, z.
2656       if (VT == MVT::i8 || VT == MVT::i16)
2657         return std::make_pair(0U, &AVR::PTRREGSRegClass);
2658       break;
2659     case 'q': // Stack pointer register: SPH:SPL.
2660       return std::make_pair(0U, &AVR::GPRSPRegClass);
2661     case 'r': // Any register: r0..r31.
2662       if (VT == MVT::i8)
2663         return std::make_pair(0U, &AVR::GPR8RegClass);
2664       else if (VT == MVT::i16)
2665         return std::make_pair(0U, &AVR::DREGSRegClass);
2666       break;
2667     case 't': // Temporary register: r0.
2668       if (VT == MVT::i8)
2669         return std::make_pair(unsigned(Subtarget.getTmpRegister()),
2670                               &AVR::GPR8RegClass);
2671       break;
2672     case 'w': // Special upper register pairs: r24, r26, r28, r30.
2673       if (VT == MVT::i8 || VT == MVT::i16)
2674         return std::make_pair(0U, &AVR::IWREGSRegClass);
2675       break;
2676     case 'x': // Pointer register pair X: r27:r26.
2677     case 'X':
2678       if (VT == MVT::i8 || VT == MVT::i16)
2679         return std::make_pair(unsigned(AVR::R27R26), &AVR::PTRREGSRegClass);
2680       break;
2681     case 'y': // Pointer register pair Y: r29:r28.
2682     case 'Y':
2683       if (VT == MVT::i8 || VT == MVT::i16)
2684         return std::make_pair(unsigned(AVR::R29R28), &AVR::PTRREGSRegClass);
2685       break;
2686     case 'z': // Pointer register pair Z: r31:r30.
2687     case 'Z':
2688       if (VT == MVT::i8 || VT == MVT::i16)
2689         return std::make_pair(unsigned(AVR::R31R30), &AVR::PTRREGSRegClass);
2690       break;
2691     default:
2692       break;
2693     }
2694   }
2695
2696   return TargetLowering::getRegForInlineAsmConstraint(
2697       Subtarget.getRegisterInfo(), Constraint, VT);
2698 }
2699
2700 void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
2701                                                      StringRef Constraint,
2702                                                      std::vector<SDValue> &Ops,
2703                                                      SelectionDAG &DAG) const {
2704   SDValue Result;
2705   SDLoc DL(Op);
2706   EVT Ty = Op.getValueType();
2707
2708   // Currently only support length 1 constraints.
2709   if (Constraint.size() != 1) {
2710     return;
2711   }
2712
2713   char ConstraintLetter = Constraint[0];
2714   switch (ConstraintLetter) {
2715   default:
2716     break;
2717   // Deal with integers first:
2718   case 'I':
2719   case 'J':
2720   case 'K':
2721   case 'L':
2722   case 'M':
2723   case 'N':
2724   case 'O':
2725   case 'P':
2726   case 'R': {
2727     const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
2728     if (!C) {
2729       return;
2730     }
2731
2732     int64_t CVal64 = C->getSExtValue();
2733     uint64_t CUVal64 = C->getZExtValue();
2734     switch (ConstraintLetter) {
2735     case 'I': // 0..63
2736       if (!isUInt<6>(CUVal64))
2737         return;
2738       Result = DAG.getTargetConstant(CUVal64, DL, Ty);
2739       break;
2740     case 'J': // -63..0
2741       if (CVal64 < -63 || CVal64 > 0)
2742         return;
2743       Result = DAG.getTargetConstant(CVal64, DL, Ty);
2744       break;
2745     case 'K': // 2
2746       if (CUVal64 != 2)
2747         return;
2748       Result = DAG.getTargetConstant(CUVal64, DL, Ty);
2749       break;
2750     case 'L': // 0
2751       if (CUVal64 != 0)
2752         return;
2753       Result = DAG.getTargetConstant(CUVal64, DL, Ty);
2754       break;
2755     case 'M': // 0..255
2756       if (!isUInt<8>(CUVal64))
2757         return;
2758       // i8 type may be printed as a negative number,
2759       // e.g. 254 would be printed as -2,
2760       // so we force it to i16 at least.
2761       if (Ty.getSimpleVT() == MVT::i8) {
2762         Ty = MVT::i16;
2763       }
2764       Result = DAG.getTargetConstant(CUVal64, DL, Ty);
2765       break;
2766     case 'N': // -1
2767       if (CVal64 != -1)
2768         return;
2769       Result = DAG.getTargetConstant(CVal64, DL, Ty);
2770       break;
2771     case 'O': // 8, 16, 24
2772       if (CUVal64 != 8 && CUVal64 != 16 && CUVal64 != 24)
2773         return;
2774       Result = DAG.getTargetConstant(CUVal64, DL, Ty);
2775       break;
2776     case 'P': // 1
2777       if (CUVal64 != 1)
2778         return;
2779       Result = DAG.getTargetConstant(CUVal64, DL, Ty);
2780       break;
2781     case 'R': // -6..5
2782       if (CVal64 < -6 || CVal64 > 5)
2783         return;
2784       Result = DAG.getTargetConstant(CVal64, DL, Ty);
2785       break;
2786     }
2787
2788     break;
2789   }
2790   case 'G':
2791     const ConstantFPSDNode *FC = dyn_cast<ConstantFPSDNode>(Op);
2792     if (!FC || !FC->isZero())
2793       return;
2794     // Soften float to i8 0
2795     Result = DAG.getTargetConstant(0, DL, MVT::i8);
2796     break;
2797   }
2798
2799   if (Result.getNode()) {
2800     Ops.push_back(Result);
2801     return;
2802   }
2803
2804   return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
2805 }
2806
2807 Register AVRTargetLowering::getRegisterByName(const char *RegName, LLT VT,
2808                                               const MachineFunction &MF) const {
2809   Register Reg;
2810
2811   if (VT == LLT::scalar(8)) {
2812     Reg = StringSwitch<unsigned>(RegName)
2813               .Case("r0", AVR::R0)
2814               .Case("r1", AVR::R1)
2815               .Default(0);
2816   } else {
2817     Reg = StringSwitch<unsigned>(RegName)
2818               .Case("r0", AVR::R1R0)
2819               .Case("sp", AVR::SP)
2820               .Default(0);
2821   }
2822
2823   if (Reg)
2824     return Reg;
2825
2826   report_fatal_error(
2827       Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
2828 }
2829
2830 } // end of namespace llvm