1 //===-- BPFISelLowering.cpp - BPF DAG Lowering Implementation ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the interfaces that BPF uses to lower LLVM code into a
12 //===----------------------------------------------------------------------===//
14 #include "BPFISelLowering.h"
16 #include "BPFSubtarget.h"
17 #include "BPFTargetMachine.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
24 #include "llvm/CodeGen/ValueTypes.h"
25 #include "llvm/IR/DiagnosticInfo.h"
26 #include "llvm/IR/DiagnosticPrinter.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/Support/raw_ostream.h"
32 #define DEBUG_TYPE "bpf-lower"
34 static cl::opt
<bool> BPFExpandMemcpyInOrder("bpf-expand-memcpy-in-order",
35 cl::Hidden
, cl::init(false),
36 cl::desc("Expand memcpy into load/store pairs in order"));
38 static void fail(const SDLoc
&DL
, SelectionDAG
&DAG
, const Twine
&Msg
) {
39 MachineFunction
&MF
= DAG
.getMachineFunction();
40 DAG
.getContext()->diagnose(
41 DiagnosticInfoUnsupported(MF
.getFunction(), Msg
, DL
.getDebugLoc()));
44 static void fail(const SDLoc
&DL
, SelectionDAG
&DAG
, const char *Msg
,
46 MachineFunction
&MF
= DAG
.getMachineFunction();
48 raw_string_ostream
OS(Str
);
52 DAG
.getContext()->diagnose(
53 DiagnosticInfoUnsupported(MF
.getFunction(), Str
, DL
.getDebugLoc()));
56 BPFTargetLowering::BPFTargetLowering(const TargetMachine
&TM
,
57 const BPFSubtarget
&STI
)
58 : TargetLowering(TM
) {
60 // Set up the register classes.
61 addRegisterClass(MVT::i64
, &BPF::GPRRegClass
);
62 if (STI
.getHasAlu32())
63 addRegisterClass(MVT::i32
, &BPF::GPR32RegClass
);
65 // Compute derived properties from the register classes
66 computeRegisterProperties(STI
.getRegisterInfo());
68 setStackPointerRegisterToSaveRestore(BPF::R11
);
70 setOperationAction(ISD::BR_CC
, MVT::i64
, Custom
);
71 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
72 setOperationAction(ISD::BRIND
, MVT::Other
, Expand
);
73 setOperationAction(ISD::BRCOND
, MVT::Other
, Expand
);
75 setOperationAction(ISD::GlobalAddress
, MVT::i64
, Custom
);
77 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i64
, Custom
);
78 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
79 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Expand
);
81 // Set unsupported atomic operations as Custom so
82 // we can emit better error messages than fatal error
84 for (auto VT
: {MVT::i8
, MVT::i16
, MVT::i32
}) {
86 if (STI
.getHasAlu32())
89 setOperationAction(ISD::ATOMIC_LOAD_ADD
, VT
, Custom
);
92 setOperationAction(ISD::ATOMIC_LOAD_AND
, VT
, Custom
);
93 setOperationAction(ISD::ATOMIC_LOAD_OR
, VT
, Custom
);
94 setOperationAction(ISD::ATOMIC_LOAD_XOR
, VT
, Custom
);
95 setOperationAction(ISD::ATOMIC_SWAP
, VT
, Custom
);
96 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
, VT
, Custom
);
99 for (auto VT
: { MVT::i32
, MVT::i64
}) {
100 if (VT
== MVT::i32
&& !STI
.getHasAlu32())
103 setOperationAction(ISD::SDIVREM
, VT
, Expand
);
104 setOperationAction(ISD::UDIVREM
, VT
, Expand
);
105 setOperationAction(ISD::SREM
, VT
, Expand
);
106 setOperationAction(ISD::UREM
, VT
, Expand
);
107 setOperationAction(ISD::MULHU
, VT
, Expand
);
108 setOperationAction(ISD::MULHS
, VT
, Expand
);
109 setOperationAction(ISD::UMUL_LOHI
, VT
, Expand
);
110 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
111 setOperationAction(ISD::ROTR
, VT
, Expand
);
112 setOperationAction(ISD::ROTL
, VT
, Expand
);
113 setOperationAction(ISD::SHL_PARTS
, VT
, Expand
);
114 setOperationAction(ISD::SRL_PARTS
, VT
, Expand
);
115 setOperationAction(ISD::SRA_PARTS
, VT
, Expand
);
116 setOperationAction(ISD::CTPOP
, VT
, Expand
);
118 setOperationAction(ISD::SETCC
, VT
, Expand
);
119 setOperationAction(ISD::SELECT
, VT
, Expand
);
120 setOperationAction(ISD::SELECT_CC
, VT
, Custom
);
123 if (STI
.getHasAlu32()) {
124 setOperationAction(ISD::BSWAP
, MVT::i32
, Promote
);
125 setOperationAction(ISD::BR_CC
, MVT::i32
,
126 STI
.getHasJmp32() ? Custom
: Promote
);
129 setOperationAction(ISD::CTTZ
, MVT::i64
, Custom
);
130 setOperationAction(ISD::CTLZ
, MVT::i64
, Custom
);
131 setOperationAction(ISD::CTTZ_ZERO_UNDEF
, MVT::i64
, Custom
);
132 setOperationAction(ISD::CTLZ_ZERO_UNDEF
, MVT::i64
, Custom
);
134 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
135 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i8
, Expand
);
136 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i16
, Expand
);
137 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i32
, Expand
);
139 // Extended load operations for i1 types must be promoted
140 for (MVT VT
: MVT::integer_valuetypes()) {
141 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::i1
, Promote
);
142 setLoadExtAction(ISD::ZEXTLOAD
, VT
, MVT::i1
, Promote
);
143 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i1
, Promote
);
145 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i8
, Expand
);
146 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i16
, Expand
);
147 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i32
, Expand
);
150 setBooleanContents(ZeroOrOneBooleanContent
);
152 // Function alignments
153 setMinFunctionAlignment(Align(8));
154 setPrefFunctionAlignment(Align(8));
156 if (BPFExpandMemcpyInOrder
) {
157 // LLVM generic code will try to expand memcpy into load/store pairs at this
158 // stage which is before quite a few IR optimization passes, therefore the
159 // loads and stores could potentially be moved apart from each other which
160 // will cause trouble to memcpy pattern matcher inside kernel eBPF JIT
163 // When -bpf-expand-memcpy-in-order specified, we want to defer the expand
164 // of memcpy to later stage in IR optimization pipeline so those load/store
165 // pairs won't be touched and could be kept in order. Hence, we set
166 // MaxStoresPerMem* to zero to disable the generic getMemcpyLoadsAndStores
167 // code path, and ask LLVM to use target expander EmitTargetCodeForMemcpy.
168 MaxStoresPerMemset
= MaxStoresPerMemsetOptSize
= 0;
169 MaxStoresPerMemcpy
= MaxStoresPerMemcpyOptSize
= 0;
170 MaxStoresPerMemmove
= MaxStoresPerMemmoveOptSize
= 0;
172 // inline memcpy() for kernel to see explicit copy
173 unsigned CommonMaxStores
=
174 STI
.getSelectionDAGInfo()->getCommonMaxStoresPerMemFunc();
176 MaxStoresPerMemset
= MaxStoresPerMemsetOptSize
= CommonMaxStores
;
177 MaxStoresPerMemcpy
= MaxStoresPerMemcpyOptSize
= CommonMaxStores
;
178 MaxStoresPerMemmove
= MaxStoresPerMemmoveOptSize
= CommonMaxStores
;
181 // CPU/Feature control
182 HasAlu32
= STI
.getHasAlu32();
183 HasJmp32
= STI
.getHasJmp32();
184 HasJmpExt
= STI
.getHasJmpExt();
187 bool BPFTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode
*GA
) const {
191 bool BPFTargetLowering::isTruncateFree(Type
*Ty1
, Type
*Ty2
) const {
192 if (!Ty1
->isIntegerTy() || !Ty2
->isIntegerTy())
194 unsigned NumBits1
= Ty1
->getPrimitiveSizeInBits();
195 unsigned NumBits2
= Ty2
->getPrimitiveSizeInBits();
196 return NumBits1
> NumBits2
;
199 bool BPFTargetLowering::isTruncateFree(EVT VT1
, EVT VT2
) const {
200 if (!VT1
.isInteger() || !VT2
.isInteger())
202 unsigned NumBits1
= VT1
.getSizeInBits();
203 unsigned NumBits2
= VT2
.getSizeInBits();
204 return NumBits1
> NumBits2
;
207 bool BPFTargetLowering::isZExtFree(Type
*Ty1
, Type
*Ty2
) const {
208 if (!getHasAlu32() || !Ty1
->isIntegerTy() || !Ty2
->isIntegerTy())
210 unsigned NumBits1
= Ty1
->getPrimitiveSizeInBits();
211 unsigned NumBits2
= Ty2
->getPrimitiveSizeInBits();
212 return NumBits1
== 32 && NumBits2
== 64;
215 bool BPFTargetLowering::isZExtFree(EVT VT1
, EVT VT2
) const {
216 if (!getHasAlu32() || !VT1
.isInteger() || !VT2
.isInteger())
218 unsigned NumBits1
= VT1
.getSizeInBits();
219 unsigned NumBits2
= VT2
.getSizeInBits();
220 return NumBits1
== 32 && NumBits2
== 64;
223 BPFTargetLowering::ConstraintType
224 BPFTargetLowering::getConstraintType(StringRef Constraint
) const {
225 if (Constraint
.size() == 1) {
226 switch (Constraint
[0]) {
230 return C_RegisterClass
;
234 return TargetLowering::getConstraintType(Constraint
);
237 std::pair
<unsigned, const TargetRegisterClass
*>
238 BPFTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo
*TRI
,
239 StringRef Constraint
,
241 if (Constraint
.size() == 1)
242 // GCC Constraint Letters
243 switch (Constraint
[0]) {
244 case 'r': // GENERAL_REGS
245 return std::make_pair(0U, &BPF::GPRRegClass
);
248 return std::make_pair(0U, &BPF::GPR32RegClass
);
254 return TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
257 void BPFTargetLowering::ReplaceNodeResults(
258 SDNode
*N
, SmallVectorImpl
<SDValue
> &Results
, SelectionDAG
&DAG
) const {
260 uint32_t Opcode
= N
->getOpcode();
263 report_fatal_error("Unhandled custom legalization");
264 case ISD::ATOMIC_LOAD_ADD
:
265 case ISD::ATOMIC_LOAD_AND
:
266 case ISD::ATOMIC_LOAD_OR
:
267 case ISD::ATOMIC_LOAD_XOR
:
268 case ISD::ATOMIC_SWAP
:
269 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
:
270 if (HasAlu32
|| Opcode
== ISD::ATOMIC_LOAD_ADD
)
271 err_msg
= "Unsupported atomic operations, please use 32/64 bit version";
273 err_msg
= "Unsupported atomic operations, please use 64 bit version";
278 fail(DL
, DAG
, err_msg
);
281 SDValue
BPFTargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const {
282 switch (Op
.getOpcode()) {
284 return LowerBR_CC(Op
, DAG
);
285 case ISD::GlobalAddress
:
286 return LowerGlobalAddress(Op
, DAG
);
288 return LowerSELECT_CC(Op
, DAG
);
289 case ISD::DYNAMIC_STACKALLOC
:
290 report_fatal_error("Unsupported dynamic stack allocation");
292 llvm_unreachable("unimplemented operand");
296 // Calling Convention Implementation
297 #include "BPFGenCallingConv.inc"
299 SDValue
BPFTargetLowering::LowerFormalArguments(
300 SDValue Chain
, CallingConv::ID CallConv
, bool IsVarArg
,
301 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&DL
,
302 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
305 report_fatal_error("Unsupported calling convention");
307 case CallingConv::Fast
:
311 MachineFunction
&MF
= DAG
.getMachineFunction();
312 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
314 // Assign locations to all of the incoming arguments.
315 SmallVector
<CCValAssign
, 16> ArgLocs
;
316 CCState
CCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
317 CCInfo
.AnalyzeFormalArguments(Ins
, getHasAlu32() ? CC_BPF32
: CC_BPF64
);
319 for (auto &VA
: ArgLocs
) {
321 // Arguments passed in registers
322 EVT RegVT
= VA
.getLocVT();
323 MVT::SimpleValueType SimpleTy
= RegVT
.getSimpleVT().SimpleTy
;
326 errs() << "LowerFormalArguments Unhandled argument type: "
327 << RegVT
.getEVTString() << '\n';
332 Register VReg
= RegInfo
.createVirtualRegister(
333 SimpleTy
== MVT::i64
? &BPF::GPRRegClass
: &BPF::GPR32RegClass
);
334 RegInfo
.addLiveIn(VA
.getLocReg(), VReg
);
335 SDValue ArgValue
= DAG
.getCopyFromReg(Chain
, DL
, VReg
, RegVT
);
337 // If this is an value that has been promoted to wider types, insert an
338 // assert[sz]ext to capture this, then truncate to the right size.
339 if (VA
.getLocInfo() == CCValAssign::SExt
)
340 ArgValue
= DAG
.getNode(ISD::AssertSext
, DL
, RegVT
, ArgValue
,
341 DAG
.getValueType(VA
.getValVT()));
342 else if (VA
.getLocInfo() == CCValAssign::ZExt
)
343 ArgValue
= DAG
.getNode(ISD::AssertZext
, DL
, RegVT
, ArgValue
,
344 DAG
.getValueType(VA
.getValVT()));
346 if (VA
.getLocInfo() != CCValAssign::Full
)
347 ArgValue
= DAG
.getNode(ISD::TRUNCATE
, DL
, VA
.getValVT(), ArgValue
);
349 InVals
.push_back(ArgValue
);
354 fail(DL
, DAG
, "defined with too many args");
355 InVals
.push_back(DAG
.getConstant(0, DL
, VA
.getLocVT()));
359 if (IsVarArg
|| MF
.getFunction().hasStructRetAttr()) {
360 fail(DL
, DAG
, "functions with VarArgs or StructRet are not supported");
366 const unsigned BPFTargetLowering::MaxArgs
= 5;
368 SDValue
BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo
&CLI
,
369 SmallVectorImpl
<SDValue
> &InVals
) const {
370 SelectionDAG
&DAG
= CLI
.DAG
;
371 auto &Outs
= CLI
.Outs
;
372 auto &OutVals
= CLI
.OutVals
;
374 SDValue Chain
= CLI
.Chain
;
375 SDValue Callee
= CLI
.Callee
;
376 bool &IsTailCall
= CLI
.IsTailCall
;
377 CallingConv::ID CallConv
= CLI
.CallConv
;
378 bool IsVarArg
= CLI
.IsVarArg
;
379 MachineFunction
&MF
= DAG
.getMachineFunction();
381 // BPF target does not support tail call optimization.
386 report_fatal_error("Unsupported calling convention");
387 case CallingConv::Fast
:
392 // Analyze operands of the call, assigning locations to each operand.
393 SmallVector
<CCValAssign
, 16> ArgLocs
;
394 CCState
CCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
396 CCInfo
.AnalyzeCallOperands(Outs
, getHasAlu32() ? CC_BPF32
: CC_BPF64
);
398 unsigned NumBytes
= CCInfo
.getNextStackOffset();
400 if (Outs
.size() > MaxArgs
)
401 fail(CLI
.DL
, DAG
, "too many args to ", Callee
);
403 for (auto &Arg
: Outs
) {
404 ISD::ArgFlagsTy Flags
= Arg
.Flags
;
405 if (!Flags
.isByVal())
408 fail(CLI
.DL
, DAG
, "pass by value not supported ", Callee
);
411 auto PtrVT
= getPointerTy(MF
.getDataLayout());
412 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, CLI
.DL
);
414 SmallVector
<std::pair
<unsigned, SDValue
>, MaxArgs
> RegsToPass
;
416 // Walk arg assignments
418 e
= std::min(static_cast<unsigned>(ArgLocs
.size()), MaxArgs
);
420 CCValAssign
&VA
= ArgLocs
[i
];
421 SDValue Arg
= OutVals
[i
];
423 // Promote the value if needed.
424 switch (VA
.getLocInfo()) {
426 llvm_unreachable("Unknown loc info");
427 case CCValAssign::Full
:
429 case CCValAssign::SExt
:
430 Arg
= DAG
.getNode(ISD::SIGN_EXTEND
, CLI
.DL
, VA
.getLocVT(), Arg
);
432 case CCValAssign::ZExt
:
433 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
, CLI
.DL
, VA
.getLocVT(), Arg
);
435 case CCValAssign::AExt
:
436 Arg
= DAG
.getNode(ISD::ANY_EXTEND
, CLI
.DL
, VA
.getLocVT(), Arg
);
440 // Push arguments into RegsToPass vector
442 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), Arg
));
444 llvm_unreachable("call arg pass bug");
449 // Build a sequence of copy-to-reg nodes chained together with token chain and
450 // flag operands which copy the outgoing args into registers. The InFlag in
451 // necessary since all emitted instructions must be stuck together.
452 for (auto &Reg
: RegsToPass
) {
453 Chain
= DAG
.getCopyToReg(Chain
, CLI
.DL
, Reg
.first
, Reg
.second
, InFlag
);
454 InFlag
= Chain
.getValue(1);
457 // If the callee is a GlobalAddress node (quite common, every direct call is)
458 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
459 // Likewise ExternalSymbol -> TargetExternalSymbol.
460 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
461 Callee
= DAG
.getTargetGlobalAddress(G
->getGlobal(), CLI
.DL
, PtrVT
,
463 } else if (ExternalSymbolSDNode
*E
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
464 Callee
= DAG
.getTargetExternalSymbol(E
->getSymbol(), PtrVT
, 0);
465 fail(CLI
.DL
, DAG
, Twine("A call to built-in function '"
466 + StringRef(E
->getSymbol())
467 + "' is not supported."));
470 // Returns a chain & a flag for retval copy to use.
471 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
472 SmallVector
<SDValue
, 8> Ops
;
473 Ops
.push_back(Chain
);
474 Ops
.push_back(Callee
);
476 // Add argument registers to the end of the list so that they are
477 // known live into the call.
478 for (auto &Reg
: RegsToPass
)
479 Ops
.push_back(DAG
.getRegister(Reg
.first
, Reg
.second
.getValueType()));
481 if (InFlag
.getNode())
482 Ops
.push_back(InFlag
);
484 Chain
= DAG
.getNode(BPFISD::CALL
, CLI
.DL
, NodeTys
, Ops
);
485 InFlag
= Chain
.getValue(1);
487 // Create the CALLSEQ_END node.
488 Chain
= DAG
.getCALLSEQ_END(
489 Chain
, DAG
.getConstant(NumBytes
, CLI
.DL
, PtrVT
, true),
490 DAG
.getConstant(0, CLI
.DL
, PtrVT
, true), InFlag
, CLI
.DL
);
491 InFlag
= Chain
.getValue(1);
493 // Handle result values, copying them out of physregs into vregs that we
495 return LowerCallResult(Chain
, InFlag
, CallConv
, IsVarArg
, Ins
, CLI
.DL
, DAG
,
500 BPFTargetLowering::LowerReturn(SDValue Chain
, CallingConv::ID CallConv
,
502 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
503 const SmallVectorImpl
<SDValue
> &OutVals
,
504 const SDLoc
&DL
, SelectionDAG
&DAG
) const {
505 unsigned Opc
= BPFISD::RET_FLAG
;
507 // CCValAssign - represent the assignment of the return value to a location
508 SmallVector
<CCValAssign
, 16> RVLocs
;
509 MachineFunction
&MF
= DAG
.getMachineFunction();
511 // CCState - Info about the registers and stack slot.
512 CCState
CCInfo(CallConv
, IsVarArg
, MF
, RVLocs
, *DAG
.getContext());
514 if (MF
.getFunction().getReturnType()->isAggregateType()) {
515 fail(DL
, DAG
, "only integer returns supported");
516 return DAG
.getNode(Opc
, DL
, MVT::Other
, Chain
);
519 // Analize return values.
520 CCInfo
.AnalyzeReturn(Outs
, getHasAlu32() ? RetCC_BPF32
: RetCC_BPF64
);
523 SmallVector
<SDValue
, 4> RetOps(1, Chain
);
525 // Copy the result values into the output registers.
526 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
527 CCValAssign
&VA
= RVLocs
[i
];
528 assert(VA
.isRegLoc() && "Can only return in registers!");
530 Chain
= DAG
.getCopyToReg(Chain
, DL
, VA
.getLocReg(), OutVals
[i
], Flag
);
532 // Guarantee that all emitted copies are stuck together,
533 // avoiding something bad.
534 Flag
= Chain
.getValue(1);
535 RetOps
.push_back(DAG
.getRegister(VA
.getLocReg(), VA
.getLocVT()));
538 RetOps
[0] = Chain
; // Update chain.
540 // Add the flag if we have it.
542 RetOps
.push_back(Flag
);
544 return DAG
.getNode(Opc
, DL
, MVT::Other
, RetOps
);
547 SDValue
BPFTargetLowering::LowerCallResult(
548 SDValue Chain
, SDValue InFlag
, CallingConv::ID CallConv
, bool IsVarArg
,
549 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&DL
,
550 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
552 MachineFunction
&MF
= DAG
.getMachineFunction();
553 // Assign locations to each value returned by this call.
554 SmallVector
<CCValAssign
, 16> RVLocs
;
555 CCState
CCInfo(CallConv
, IsVarArg
, MF
, RVLocs
, *DAG
.getContext());
557 if (Ins
.size() >= 2) {
558 fail(DL
, DAG
, "only small returns supported");
559 for (unsigned i
= 0, e
= Ins
.size(); i
!= e
; ++i
)
560 InVals
.push_back(DAG
.getConstant(0, DL
, Ins
[i
].VT
));
561 return DAG
.getCopyFromReg(Chain
, DL
, 1, Ins
[0].VT
, InFlag
).getValue(1);
564 CCInfo
.AnalyzeCallResult(Ins
, getHasAlu32() ? RetCC_BPF32
: RetCC_BPF64
);
566 // Copy all of the result registers out of their specified physreg.
567 for (auto &Val
: RVLocs
) {
568 Chain
= DAG
.getCopyFromReg(Chain
, DL
, Val
.getLocReg(),
569 Val
.getValVT(), InFlag
).getValue(1);
570 InFlag
= Chain
.getValue(2);
571 InVals
.push_back(Chain
.getValue(0));
577 static void NegateCC(SDValue
&LHS
, SDValue
&RHS
, ISD::CondCode
&CC
) {
585 CC
= ISD::getSetCCSwappedOperands(CC
);
591 SDValue
BPFTargetLowering::LowerBR_CC(SDValue Op
, SelectionDAG
&DAG
) const {
592 SDValue Chain
= Op
.getOperand(0);
593 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(1))->get();
594 SDValue LHS
= Op
.getOperand(2);
595 SDValue RHS
= Op
.getOperand(3);
596 SDValue Dest
= Op
.getOperand(4);
600 NegateCC(LHS
, RHS
, CC
);
602 return DAG
.getNode(BPFISD::BR_CC
, DL
, Op
.getValueType(), Chain
, LHS
, RHS
,
603 DAG
.getConstant(CC
, DL
, LHS
.getValueType()), Dest
);
606 SDValue
BPFTargetLowering::LowerSELECT_CC(SDValue Op
, SelectionDAG
&DAG
) const {
607 SDValue LHS
= Op
.getOperand(0);
608 SDValue RHS
= Op
.getOperand(1);
609 SDValue TrueV
= Op
.getOperand(2);
610 SDValue FalseV
= Op
.getOperand(3);
611 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(4))->get();
615 NegateCC(LHS
, RHS
, CC
);
617 SDValue TargetCC
= DAG
.getConstant(CC
, DL
, LHS
.getValueType());
618 SDVTList VTs
= DAG
.getVTList(Op
.getValueType(), MVT::Glue
);
619 SDValue Ops
[] = {LHS
, RHS
, TargetCC
, TrueV
, FalseV
};
621 return DAG
.getNode(BPFISD::SELECT_CC
, DL
, VTs
, Ops
);
624 const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode
) const {
625 switch ((BPFISD::NodeType
)Opcode
) {
626 case BPFISD::FIRST_NUMBER
:
628 case BPFISD::RET_FLAG
:
629 return "BPFISD::RET_FLAG";
631 return "BPFISD::CALL";
632 case BPFISD::SELECT_CC
:
633 return "BPFISD::SELECT_CC";
635 return "BPFISD::BR_CC";
636 case BPFISD::Wrapper
:
637 return "BPFISD::Wrapper";
639 return "BPFISD::MEMCPY";
644 SDValue
BPFTargetLowering::LowerGlobalAddress(SDValue Op
,
645 SelectionDAG
&DAG
) const {
646 auto N
= cast
<GlobalAddressSDNode
>(Op
);
647 assert(N
->getOffset() == 0 && "Invalid offset for global address");
650 const GlobalValue
*GV
= N
->getGlobal();
651 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, DL
, MVT::i64
);
653 return DAG
.getNode(BPFISD::Wrapper
, DL
, MVT::i64
, GA
);
657 BPFTargetLowering::EmitSubregExt(MachineInstr
&MI
, MachineBasicBlock
*BB
,
658 unsigned Reg
, bool isSigned
) const {
659 const TargetInstrInfo
&TII
= *BB
->getParent()->getSubtarget().getInstrInfo();
660 const TargetRegisterClass
*RC
= getRegClassFor(MVT::i64
);
661 int RShiftOp
= isSigned
? BPF::SRA_ri
: BPF::SRL_ri
;
662 MachineFunction
*F
= BB
->getParent();
663 DebugLoc DL
= MI
.getDebugLoc();
665 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
668 Register PromotedReg0
= RegInfo
.createVirtualRegister(RC
);
669 BuildMI(BB
, DL
, TII
.get(BPF::MOV_32_64
), PromotedReg0
).addReg(Reg
);
672 Register PromotedReg0
= RegInfo
.createVirtualRegister(RC
);
673 Register PromotedReg1
= RegInfo
.createVirtualRegister(RC
);
674 Register PromotedReg2
= RegInfo
.createVirtualRegister(RC
);
675 BuildMI(BB
, DL
, TII
.get(BPF::MOV_32_64
), PromotedReg0
).addReg(Reg
);
676 BuildMI(BB
, DL
, TII
.get(BPF::SLL_ri
), PromotedReg1
)
677 .addReg(PromotedReg0
).addImm(32);
678 BuildMI(BB
, DL
, TII
.get(RShiftOp
), PromotedReg2
)
679 .addReg(PromotedReg1
).addImm(32);
685 BPFTargetLowering::EmitInstrWithCustomInserterMemcpy(MachineInstr
&MI
,
686 MachineBasicBlock
*BB
)
688 MachineFunction
*MF
= MI
.getParent()->getParent();
689 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
690 MachineInstrBuilder
MIB(*MF
, MI
);
693 // This function does custom insertion during lowering BPFISD::MEMCPY which
694 // only has two register operands from memcpy semantics, the copy source
695 // address and the copy destination address.
697 // Because we will expand BPFISD::MEMCPY into load/store pairs, we will need
698 // a third scratch register to serve as the destination register of load and
699 // source register of store.
701 // The scratch register here is with the Define | Dead | EarlyClobber flags.
702 // The EarlyClobber flag has the semantic property that the operand it is
703 // attached to is clobbered before the rest of the inputs are read. Hence it
704 // must be unique among the operands to the instruction. The Define flag is
705 // needed to coerce the machine verifier that an Undef value isn't a problem
706 // as we anyway is loading memory into it. The Dead flag is needed as the
707 // value in scratch isn't supposed to be used by any other instruction.
708 ScratchReg
= MRI
.createVirtualRegister(&BPF::GPRRegClass
);
709 MIB
.addReg(ScratchReg
,
710 RegState::Define
| RegState::Dead
| RegState::EarlyClobber
);
716 BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr
&MI
,
717 MachineBasicBlock
*BB
) const {
718 const TargetInstrInfo
&TII
= *BB
->getParent()->getSubtarget().getInstrInfo();
719 DebugLoc DL
= MI
.getDebugLoc();
720 unsigned Opc
= MI
.getOpcode();
721 bool isSelectRROp
= (Opc
== BPF::Select
||
722 Opc
== BPF::Select_64_32
||
723 Opc
== BPF::Select_32
||
724 Opc
== BPF::Select_32_64
);
726 bool isMemcpyOp
= Opc
== BPF::MEMCPY
;
729 bool isSelectRIOp
= (Opc
== BPF::Select_Ri
||
730 Opc
== BPF::Select_Ri_64_32
||
731 Opc
== BPF::Select_Ri_32
||
732 Opc
== BPF::Select_Ri_32_64
);
735 assert((isSelectRROp
|| isSelectRIOp
|| isMemcpyOp
) &&
736 "Unexpected instr type to insert");
740 return EmitInstrWithCustomInserterMemcpy(MI
, BB
);
742 bool is32BitCmp
= (Opc
== BPF::Select_32
||
743 Opc
== BPF::Select_32_64
||
744 Opc
== BPF::Select_Ri_32
||
745 Opc
== BPF::Select_Ri_32_64
);
747 // To "insert" a SELECT instruction, we actually have to insert the diamond
748 // control-flow pattern. The incoming instruction knows the destination vreg
749 // to set, the condition code register to branch on, the true/false values to
750 // select between, and a branch opcode to use.
751 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
752 MachineFunction::iterator I
= ++BB
->getIterator();
757 // jmp_XX r1, r2 goto Copy1MBB
758 // fallthrough --> Copy0MBB
759 MachineBasicBlock
*ThisMBB
= BB
;
760 MachineFunction
*F
= BB
->getParent();
761 MachineBasicBlock
*Copy0MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
762 MachineBasicBlock
*Copy1MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
764 F
->insert(I
, Copy0MBB
);
765 F
->insert(I
, Copy1MBB
);
766 // Update machine-CFG edges by transferring all successors of the current
767 // block to the new block which will contain the Phi node for the select.
768 Copy1MBB
->splice(Copy1MBB
->begin(), BB
,
769 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
770 Copy1MBB
->transferSuccessorsAndUpdatePHIs(BB
);
771 // Next, add the true and fallthrough blocks as its successors.
772 BB
->addSuccessor(Copy0MBB
);
773 BB
->addSuccessor(Copy1MBB
);
775 // Insert Branch if Flag
776 int CC
= MI
.getOperand(3).getImm();
779 #define SET_NEWCC(X, Y) \
781 if (is32BitCmp && HasJmp32) \
782 NewCC = isSelectRROp ? BPF::Y##_rr_32 : BPF::Y##_ri_32; \
784 NewCC = isSelectRROp ? BPF::Y##_rr : BPF::Y##_ri; \
786 SET_NEWCC(SETGT
, JSGT
);
787 SET_NEWCC(SETUGT
, JUGT
);
788 SET_NEWCC(SETGE
, JSGE
);
789 SET_NEWCC(SETUGE
, JUGE
);
790 SET_NEWCC(SETEQ
, JEQ
);
791 SET_NEWCC(SETNE
, JNE
);
792 SET_NEWCC(SETLT
, JSLT
);
793 SET_NEWCC(SETULT
, JULT
);
794 SET_NEWCC(SETLE
, JSLE
);
795 SET_NEWCC(SETULE
, JULE
);
797 report_fatal_error("unimplemented select CondCode " + Twine(CC
));
800 Register LHS
= MI
.getOperand(1).getReg();
801 bool isSignedCmp
= (CC
== ISD::SETGT
||
806 // eBPF at the moment only has 64-bit comparison. Any 32-bit comparison need
807 // to be promoted, however if the 32-bit comparison operands are destination
808 // registers then they are implicitly zero-extended already, there is no
809 // need of explicit zero-extend sequence for them.
811 // We simply do extension for all situations in this method, but we will
812 // try to remove those unnecessary in BPFMIPeephole pass.
813 if (is32BitCmp
&& !HasJmp32
)
814 LHS
= EmitSubregExt(MI
, BB
, LHS
, isSignedCmp
);
817 Register RHS
= MI
.getOperand(2).getReg();
819 if (is32BitCmp
&& !HasJmp32
)
820 RHS
= EmitSubregExt(MI
, BB
, RHS
, isSignedCmp
);
822 BuildMI(BB
, DL
, TII
.get(NewCC
)).addReg(LHS
).addReg(RHS
).addMBB(Copy1MBB
);
824 int64_t imm32
= MI
.getOperand(2).getImm();
825 // sanity check before we build J*_ri instruction.
826 assert (isInt
<32>(imm32
));
827 BuildMI(BB
, DL
, TII
.get(NewCC
))
828 .addReg(LHS
).addImm(imm32
).addMBB(Copy1MBB
);
833 // # fallthrough to Copy1MBB
836 // Update machine-CFG edges
837 BB
->addSuccessor(Copy1MBB
);
840 // %Result = phi [ %FalseValue, Copy0MBB ], [ %TrueValue, ThisMBB ]
843 BuildMI(*BB
, BB
->begin(), DL
, TII
.get(BPF::PHI
), MI
.getOperand(0).getReg())
844 .addReg(MI
.getOperand(5).getReg())
846 .addReg(MI
.getOperand(4).getReg())
849 MI
.eraseFromParent(); // The pseudo instruction is gone now.
853 EVT
BPFTargetLowering::getSetCCResultType(const DataLayout
&, LLVMContext
&,
855 return getHasAlu32() ? MVT::i32
: MVT::i64
;
858 MVT
BPFTargetLowering::getScalarShiftAmountTy(const DataLayout
&DL
,
860 return (getHasAlu32() && VT
== MVT::i32
) ? MVT::i32
: MVT::i64
;