1 //===-- SystemZISelDAGToDAG.cpp - A dag to dag inst selector for SystemZ --===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines an instruction selector for the SystemZ target.
11 //===----------------------------------------------------------------------===//
13 #include "SystemZTargetMachine.h"
14 #include "SystemZISelLowering.h"
15 #include "llvm/Analysis/AliasAnalysis.h"
16 #include "llvm/CodeGen/SelectionDAGISel.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/KnownBits.h"
19 #include "llvm/Support/raw_ostream.h"
23 #define DEBUG_TYPE "systemz-isel"
24 #define PASS_NAME "SystemZ DAG->DAG Pattern Instruction Selection"
27 // Used to build addressing modes.
28 struct SystemZAddressingMode
{
29 // The shape of the address.
34 // base+displacement+index for load and store operands
37 // base+displacement+index for load address operands
40 // base+displacement+index+ADJDYNALLOC
45 // The type of displacement. The enum names here correspond directly
46 // to the definitions in SystemZOperand.td. We could split them into
47 // flags -- single/pair, 128-bit, etc. -- but it hardly seems worth it.
57 // The parts of the address. The address is equivalent to:
59 // Base + Disp + Index + (IncludesDynAlloc ? ADJDYNALLOC : 0)
63 bool IncludesDynAlloc
;
65 SystemZAddressingMode(AddrForm form
, DispRange dr
)
66 : Form(form
), DR(dr
), Disp(0), IncludesDynAlloc(false) {}
68 // True if the address can have an index register.
69 bool hasIndexField() { return Form
!= FormBD
; }
71 // True if the address can (and must) include ADJDYNALLOC.
72 bool isDynAlloc() { return Form
== FormBDXDynAlloc
; }
74 void dump(const llvm::SelectionDAG
*DAG
) {
75 errs() << "SystemZAddressingMode " << this << '\n';
79 Base
.getNode()->dump(DAG
);
83 if (hasIndexField()) {
86 Index
.getNode()->dump(DAG
);
91 errs() << " Disp " << Disp
;
93 errs() << " + ADJDYNALLOC";
98 // Return a mask with Count low bits set.
99 static uint64_t allOnes(unsigned int Count
) {
103 return (uint64_t(1) << Count
) - 1;
106 // Represents operands 2 to 5 of the ROTATE AND ... SELECTED BITS operation
107 // given by Opcode. The operands are: Input (R2), Start (I3), End (I4) and
108 // Rotate (I5). The combined operand value is effectively:
110 // (or (rotl Input, Rotate), ~Mask)
114 // (and (rotl Input, Rotate), Mask)
116 // otherwise. The output value has BitSize bits, although Input may be
117 // narrower (in which case the upper bits are don't care), or wider (in which
118 // case the result will be truncated as part of the operation).
119 struct RxSBGOperands
{
120 RxSBGOperands(unsigned Op
, SDValue N
)
121 : Opcode(Op
), BitSize(N
.getValueSizeInBits()),
122 Mask(allOnes(BitSize
)), Input(N
), Start(64 - BitSize
), End(63),
134 class SystemZDAGToDAGISel
: public SelectionDAGISel
{
135 const SystemZSubtarget
*Subtarget
;
137 // Used by SystemZOperands.td to create integer constants.
138 inline SDValue
getImm(const SDNode
*Node
, uint64_t Imm
) const {
139 return CurDAG
->getTargetConstant(Imm
, SDLoc(Node
), Node
->getValueType(0));
142 const SystemZTargetMachine
&getTargetMachine() const {
143 return static_cast<const SystemZTargetMachine
&>(TM
);
146 const SystemZInstrInfo
*getInstrInfo() const {
147 return Subtarget
->getInstrInfo();
150 // Try to fold more of the base or index of AM into AM, where IsBase
151 // selects between the base and index.
152 bool expandAddress(SystemZAddressingMode
&AM
, bool IsBase
) const;
154 // Try to describe N in AM, returning true on success.
155 bool selectAddress(SDValue N
, SystemZAddressingMode
&AM
) const;
157 // Extract individual target operands from matched address AM.
158 void getAddressOperands(const SystemZAddressingMode
&AM
, EVT VT
,
159 SDValue
&Base
, SDValue
&Disp
) const;
160 void getAddressOperands(const SystemZAddressingMode
&AM
, EVT VT
,
161 SDValue
&Base
, SDValue
&Disp
, SDValue
&Index
) const;
163 // Try to match Addr as a FormBD address with displacement type DR.
164 // Return true on success, storing the base and displacement in
165 // Base and Disp respectively.
166 bool selectBDAddr(SystemZAddressingMode::DispRange DR
, SDValue Addr
,
167 SDValue
&Base
, SDValue
&Disp
) const;
169 // Try to match Addr as a FormBDX address with displacement type DR.
170 // Return true on success and if the result had no index. Store the
171 // base and displacement in Base and Disp respectively.
172 bool selectMVIAddr(SystemZAddressingMode::DispRange DR
, SDValue Addr
,
173 SDValue
&Base
, SDValue
&Disp
) const;
175 // Try to match Addr as a FormBDX* address of form Form with
176 // displacement type DR. Return true on success, storing the base,
177 // displacement and index in Base, Disp and Index respectively.
178 bool selectBDXAddr(SystemZAddressingMode::AddrForm Form
,
179 SystemZAddressingMode::DispRange DR
, SDValue Addr
,
180 SDValue
&Base
, SDValue
&Disp
, SDValue
&Index
) const;
182 // PC-relative address matching routines used by SystemZOperands.td.
183 bool selectPCRelAddress(SDValue Addr
, SDValue
&Target
) const {
184 if (SystemZISD::isPCREL(Addr
.getOpcode())) {
185 Target
= Addr
.getOperand(0);
191 // BD matching routines used by SystemZOperands.td.
192 bool selectBDAddr12Only(SDValue Addr
, SDValue
&Base
, SDValue
&Disp
) const {
193 return selectBDAddr(SystemZAddressingMode::Disp12Only
, Addr
, Base
, Disp
);
195 bool selectBDAddr12Pair(SDValue Addr
, SDValue
&Base
, SDValue
&Disp
) const {
196 return selectBDAddr(SystemZAddressingMode::Disp12Pair
, Addr
, Base
, Disp
);
198 bool selectBDAddr20Only(SDValue Addr
, SDValue
&Base
, SDValue
&Disp
) const {
199 return selectBDAddr(SystemZAddressingMode::Disp20Only
, Addr
, Base
, Disp
);
201 bool selectBDAddr20Pair(SDValue Addr
, SDValue
&Base
, SDValue
&Disp
) const {
202 return selectBDAddr(SystemZAddressingMode::Disp20Pair
, Addr
, Base
, Disp
);
205 // MVI matching routines used by SystemZOperands.td.
206 bool selectMVIAddr12Pair(SDValue Addr
, SDValue
&Base
, SDValue
&Disp
) const {
207 return selectMVIAddr(SystemZAddressingMode::Disp12Pair
, Addr
, Base
, Disp
);
209 bool selectMVIAddr20Pair(SDValue Addr
, SDValue
&Base
, SDValue
&Disp
) const {
210 return selectMVIAddr(SystemZAddressingMode::Disp20Pair
, Addr
, Base
, Disp
);
213 // BDX matching routines used by SystemZOperands.td.
214 bool selectBDXAddr12Only(SDValue Addr
, SDValue
&Base
, SDValue
&Disp
,
215 SDValue
&Index
) const {
216 return selectBDXAddr(SystemZAddressingMode::FormBDXNormal
,
217 SystemZAddressingMode::Disp12Only
,
218 Addr
, Base
, Disp
, Index
);
220 bool selectBDXAddr12Pair(SDValue Addr
, SDValue
&Base
, SDValue
&Disp
,
221 SDValue
&Index
) const {
222 return selectBDXAddr(SystemZAddressingMode::FormBDXNormal
,
223 SystemZAddressingMode::Disp12Pair
,
224 Addr
, Base
, Disp
, Index
);
226 bool selectDynAlloc12Only(SDValue Addr
, SDValue
&Base
, SDValue
&Disp
,
227 SDValue
&Index
) const {
228 return selectBDXAddr(SystemZAddressingMode::FormBDXDynAlloc
,
229 SystemZAddressingMode::Disp12Only
,
230 Addr
, Base
, Disp
, Index
);
232 bool selectBDXAddr20Only(SDValue Addr
, SDValue
&Base
, SDValue
&Disp
,
233 SDValue
&Index
) const {
234 return selectBDXAddr(SystemZAddressingMode::FormBDXNormal
,
235 SystemZAddressingMode::Disp20Only
,
236 Addr
, Base
, Disp
, Index
);
238 bool selectBDXAddr20Only128(SDValue Addr
, SDValue
&Base
, SDValue
&Disp
,
239 SDValue
&Index
) const {
240 return selectBDXAddr(SystemZAddressingMode::FormBDXNormal
,
241 SystemZAddressingMode::Disp20Only128
,
242 Addr
, Base
, Disp
, Index
);
244 bool selectBDXAddr20Pair(SDValue Addr
, SDValue
&Base
, SDValue
&Disp
,
245 SDValue
&Index
) const {
246 return selectBDXAddr(SystemZAddressingMode::FormBDXNormal
,
247 SystemZAddressingMode::Disp20Pair
,
248 Addr
, Base
, Disp
, Index
);
250 bool selectLAAddr12Pair(SDValue Addr
, SDValue
&Base
, SDValue
&Disp
,
251 SDValue
&Index
) const {
252 return selectBDXAddr(SystemZAddressingMode::FormBDXLA
,
253 SystemZAddressingMode::Disp12Pair
,
254 Addr
, Base
, Disp
, Index
);
256 bool selectLAAddr20Pair(SDValue Addr
, SDValue
&Base
, SDValue
&Disp
,
257 SDValue
&Index
) const {
258 return selectBDXAddr(SystemZAddressingMode::FormBDXLA
,
259 SystemZAddressingMode::Disp20Pair
,
260 Addr
, Base
, Disp
, Index
);
263 // Try to match Addr as an address with a base, 12-bit displacement
264 // and index, where the index is element Elem of a vector.
265 // Return true on success, storing the base, displacement and vector
266 // in Base, Disp and Index respectively.
267 bool selectBDVAddr12Only(SDValue Addr
, SDValue Elem
, SDValue
&Base
,
268 SDValue
&Disp
, SDValue
&Index
) const;
270 // Check whether (or Op (and X InsertMask)) is effectively an insertion
271 // of X into bits InsertMask of some Y != Op. Return true if so and
273 bool detectOrAndInsertion(SDValue
&Op
, uint64_t InsertMask
) const;
275 // Try to update RxSBG so that only the bits of RxSBG.Input in Mask are used.
276 // Return true on success.
277 bool refineRxSBGMask(RxSBGOperands
&RxSBG
, uint64_t Mask
) const;
279 // Try to fold some of RxSBG.Input into other fields of RxSBG.
280 // Return true on success.
281 bool expandRxSBG(RxSBGOperands
&RxSBG
) const;
283 // Return an undefined value of type VT.
284 SDValue
getUNDEF(const SDLoc
&DL
, EVT VT
) const;
286 // Convert N to VT, if it isn't already.
287 SDValue
convertTo(const SDLoc
&DL
, EVT VT
, SDValue N
) const;
289 // Try to implement AND or shift node N using RISBG with the zero flag set.
290 // Return the selected node on success, otherwise return null.
291 bool tryRISBGZero(SDNode
*N
);
293 // Try to use RISBG or Opcode to implement OR or XOR node N.
294 // Return the selected node on success, otherwise return null.
295 bool tryRxSBG(SDNode
*N
, unsigned Opcode
);
297 // If Op0 is null, then Node is a constant that can be loaded using:
299 // (Opcode UpperVal LowerVal)
301 // If Op0 is nonnull, then Node can be implemented using:
303 // (Opcode (Opcode Op0 UpperVal) LowerVal)
304 void splitLargeImmediate(unsigned Opcode
, SDNode
*Node
, SDValue Op0
,
305 uint64_t UpperVal
, uint64_t LowerVal
);
307 void loadVectorConstant(const SystemZVectorConstantInfo
&VCI
,
310 SDNode
*loadPoolVectorConstant(APInt Val
, EVT VT
, SDLoc DL
);
312 // Try to use gather instruction Opcode to implement vector insertion N.
313 bool tryGather(SDNode
*N
, unsigned Opcode
);
315 // Try to use scatter instruction Opcode to implement store Store.
316 bool tryScatter(StoreSDNode
*Store
, unsigned Opcode
);
318 // Change a chain of {load; op; store} of the same value into a simple op
319 // through memory of that value, if the uses of the modified value and its
320 // address are suitable.
321 bool tryFoldLoadStoreIntoMemOperand(SDNode
*Node
);
323 // Return true if Load and Store are loads and stores of the same size
324 // and are guaranteed not to overlap. Such operations can be implemented
325 // using block (SS-format) instructions.
327 // Partial overlap would lead to incorrect code, since the block operations
328 // are logically bytewise, even though they have a fast path for the
329 // non-overlapping case. We also need to avoid full overlap (i.e. two
330 // addresses that might be equal at run time) because although that case
331 // would be handled correctly, it might be implemented by millicode.
332 bool canUseBlockOperation(StoreSDNode
*Store
, LoadSDNode
*Load
) const;
334 // N is a (store (load Y), X) pattern. Return true if it can use an MVC
336 bool storeLoadCanUseMVC(SDNode
*N
) const;
338 // N is a (store (op (load A[0]), (load A[1])), X) pattern. Return true
339 // if A[1 - I] == X and if N can use a block operation like NC from A[I]
341 bool storeLoadCanUseBlockBinary(SDNode
*N
, unsigned I
) const;
343 // Return true if N (a load or a store) fullfills the alignment
344 // requirements for a PC-relative access.
345 bool storeLoadIsAligned(SDNode
*N
) const;
347 // Return the load extension type of a load or atomic load.
348 ISD::LoadExtType
getLoadExtType(SDNode
*N
) const;
350 // Try to expand a boolean SELECT_CCMASK using an IPM sequence.
351 SDValue
expandSelectBoolean(SDNode
*Node
);
353 // Return true if the flags of N and the subtarget allows for
354 // reassociation, in which case a reg/reg opcode is needed as input to the
356 bool shouldSelectForReassoc(SDNode
*N
) const;
359 SystemZDAGToDAGISel() = delete;
361 SystemZDAGToDAGISel(SystemZTargetMachine
&TM
, CodeGenOptLevel OptLevel
)
362 : SelectionDAGISel(TM
, OptLevel
) {}
364 bool runOnMachineFunction(MachineFunction
&MF
) override
{
365 const Function
&F
= MF
.getFunction();
366 if (F
.getFnAttribute("fentry-call").getValueAsString() != "true") {
367 if (F
.hasFnAttribute("mnop-mcount"))
368 report_fatal_error("mnop-mcount only supported with fentry-call");
369 if (F
.hasFnAttribute("mrecord-mcount"))
370 report_fatal_error("mrecord-mcount only supported with fentry-call");
373 Subtarget
= &MF
.getSubtarget
<SystemZSubtarget
>();
374 return SelectionDAGISel::runOnMachineFunction(MF
);
377 // Override SelectionDAGISel.
378 void Select(SDNode
*Node
) override
;
379 bool SelectInlineAsmMemoryOperand(const SDValue
&Op
,
380 InlineAsm::ConstraintCode ConstraintID
,
381 std::vector
<SDValue
> &OutOps
) override
;
382 bool IsProfitableToFold(SDValue N
, SDNode
*U
, SDNode
*Root
) const override
;
383 void PreprocessISelDAG() override
;
385 // Include the pieces autogenerated from the target description.
386 #include "SystemZGenDAGISel.inc"
389 class SystemZDAGToDAGISelLegacy
: public SelectionDAGISelLegacy
{
392 explicit SystemZDAGToDAGISelLegacy(SystemZTargetMachine
&TM
,
393 CodeGenOptLevel OptLevel
)
394 : SelectionDAGISelLegacy(
395 ID
, std::make_unique
<SystemZDAGToDAGISel
>(TM
, OptLevel
)) {}
397 } // end anonymous namespace
399 char SystemZDAGToDAGISelLegacy::ID
= 0;
401 INITIALIZE_PASS(SystemZDAGToDAGISelLegacy
, DEBUG_TYPE
, PASS_NAME
, false, false)
403 FunctionPass
*llvm::createSystemZISelDag(SystemZTargetMachine
&TM
,
404 CodeGenOptLevel OptLevel
) {
405 return new SystemZDAGToDAGISelLegacy(TM
, OptLevel
);
408 // Return true if Val should be selected as a displacement for an address
409 // with range DR. Here we're interested in the range of both the instruction
410 // described by DR and of any pairing instruction.
411 static bool selectDisp(SystemZAddressingMode::DispRange DR
, int64_t Val
) {
413 case SystemZAddressingMode::Disp12Only
:
414 return isUInt
<12>(Val
);
416 case SystemZAddressingMode::Disp12Pair
:
417 case SystemZAddressingMode::Disp20Only
:
418 case SystemZAddressingMode::Disp20Pair
:
419 return isInt
<20>(Val
);
421 case SystemZAddressingMode::Disp20Only128
:
422 return isInt
<20>(Val
) && isInt
<20>(Val
+ 8);
424 llvm_unreachable("Unhandled displacement range");
427 // Change the base or index in AM to Value, where IsBase selects
428 // between the base and index.
429 static void changeComponent(SystemZAddressingMode
&AM
, bool IsBase
,
437 // The base or index of AM is equivalent to Value + ADJDYNALLOC,
438 // where IsBase selects between the base and index. Try to fold the
439 // ADJDYNALLOC into AM.
440 static bool expandAdjDynAlloc(SystemZAddressingMode
&AM
, bool IsBase
,
442 if (AM
.isDynAlloc() && !AM
.IncludesDynAlloc
) {
443 changeComponent(AM
, IsBase
, Value
);
444 AM
.IncludesDynAlloc
= true;
450 // The base of AM is equivalent to Base + Index. Try to use Index as
451 // the index register.
452 static bool expandIndex(SystemZAddressingMode
&AM
, SDValue Base
,
454 if (AM
.hasIndexField() && !AM
.Index
.getNode()) {
462 // The base or index of AM is equivalent to Op0 + Op1, where IsBase selects
463 // between the base and index. Try to fold Op1 into AM's displacement.
464 static bool expandDisp(SystemZAddressingMode
&AM
, bool IsBase
,
465 SDValue Op0
, uint64_t Op1
) {
466 // First try adjusting the displacement.
467 int64_t TestDisp
= AM
.Disp
+ Op1
;
468 if (selectDisp(AM
.DR
, TestDisp
)) {
469 changeComponent(AM
, IsBase
, Op0
);
474 // We could consider forcing the displacement into a register and
475 // using it as an index, but it would need to be carefully tuned.
479 bool SystemZDAGToDAGISel::expandAddress(SystemZAddressingMode
&AM
,
481 SDValue N
= IsBase
? AM
.Base
: AM
.Index
;
482 unsigned Opcode
= N
.getOpcode();
483 // Look through no-op truncations.
484 if (Opcode
== ISD::TRUNCATE
&& N
.getOperand(0).getValueSizeInBits() <= 64) {
486 Opcode
= N
.getOpcode();
488 if (Opcode
== ISD::ADD
|| CurDAG
->isBaseWithConstantOffset(N
)) {
489 SDValue Op0
= N
.getOperand(0);
490 SDValue Op1
= N
.getOperand(1);
492 unsigned Op0Code
= Op0
->getOpcode();
493 unsigned Op1Code
= Op1
->getOpcode();
495 if (Op0Code
== SystemZISD::ADJDYNALLOC
)
496 return expandAdjDynAlloc(AM
, IsBase
, Op1
);
497 if (Op1Code
== SystemZISD::ADJDYNALLOC
)
498 return expandAdjDynAlloc(AM
, IsBase
, Op0
);
500 if (Op0Code
== ISD::Constant
)
501 return expandDisp(AM
, IsBase
, Op1
,
502 cast
<ConstantSDNode
>(Op0
)->getSExtValue());
503 if (Op1Code
== ISD::Constant
)
504 return expandDisp(AM
, IsBase
, Op0
,
505 cast
<ConstantSDNode
>(Op1
)->getSExtValue());
507 if (IsBase
&& expandIndex(AM
, Op0
, Op1
))
510 if (Opcode
== SystemZISD::PCREL_OFFSET
) {
511 SDValue Full
= N
.getOperand(0);
512 SDValue Base
= N
.getOperand(1);
513 SDValue Anchor
= Base
.getOperand(0);
514 uint64_t Offset
= (cast
<GlobalAddressSDNode
>(Full
)->getOffset() -
515 cast
<GlobalAddressSDNode
>(Anchor
)->getOffset());
516 return expandDisp(AM
, IsBase
, Base
, Offset
);
521 // Return true if an instruction with displacement range DR should be
522 // used for displacement value Val. selectDisp(DR, Val) must already hold.
523 static bool isValidDisp(SystemZAddressingMode::DispRange DR
, int64_t Val
) {
524 assert(selectDisp(DR
, Val
) && "Invalid displacement");
526 case SystemZAddressingMode::Disp12Only
:
527 case SystemZAddressingMode::Disp20Only
:
528 case SystemZAddressingMode::Disp20Only128
:
531 case SystemZAddressingMode::Disp12Pair
:
532 // Use the other instruction if the displacement is too large.
533 return isUInt
<12>(Val
);
535 case SystemZAddressingMode::Disp20Pair
:
536 // Use the other instruction if the displacement is small enough.
537 return !isUInt
<12>(Val
);
539 llvm_unreachable("Unhandled displacement range");
542 // Return true if Base + Disp + Index should be performed by LA(Y).
543 static bool shouldUseLA(SDNode
*Base
, int64_t Disp
, SDNode
*Index
) {
544 // Don't use LA(Y) for constants.
548 // Always use LA(Y) for frame addresses, since we know that the destination
549 // register is almost always (perhaps always) going to be different from
550 // the frame register.
551 if (Base
->getOpcode() == ISD::FrameIndex
)
555 // Always use LA(Y) if there is a base, displacement and index.
559 // Always use LA if the displacement is small enough. It should always
560 // be no worse than AGHI (and better if it avoids a move).
561 if (isUInt
<12>(Disp
))
564 // For similar reasons, always use LAY if the constant is too big for AGHI.
565 // LAY should be no worse than AGFI.
566 if (!isInt
<16>(Disp
))
569 // Don't use LA for plain registers.
573 // Don't use LA for plain addition if the index operand is only used
574 // once. It should be a natural two-operand addition in that case.
575 if (Index
->hasOneUse())
578 // Prefer addition if the second operation is sign-extended, in the
579 // hope of using AGF.
580 unsigned IndexOpcode
= Index
->getOpcode();
581 if (IndexOpcode
== ISD::SIGN_EXTEND
||
582 IndexOpcode
== ISD::SIGN_EXTEND_INREG
)
586 // Don't use LA for two-operand addition if either operand is only
587 // used once. The addition instructions are better in that case.
588 if (Base
->hasOneUse())
594 // Return true if Addr is suitable for AM, updating AM if so.
595 bool SystemZDAGToDAGISel::selectAddress(SDValue Addr
,
596 SystemZAddressingMode
&AM
) const {
597 // Start out assuming that the address will need to be loaded separately,
598 // then try to extend it as much as we can.
601 // First try treating the address as a constant.
602 if (Addr
.getOpcode() == ISD::Constant
&&
603 expandDisp(AM
, true, SDValue(),
604 cast
<ConstantSDNode
>(Addr
)->getSExtValue()))
606 // Also see if it's a bare ADJDYNALLOC.
607 else if (Addr
.getOpcode() == SystemZISD::ADJDYNALLOC
&&
608 expandAdjDynAlloc(AM
, true, SDValue()))
611 // Otherwise try expanding each component.
612 while (expandAddress(AM
, true) ||
613 (AM
.Index
.getNode() && expandAddress(AM
, false)))
616 // Reject cases where it isn't profitable to use LA(Y).
617 if (AM
.Form
== SystemZAddressingMode::FormBDXLA
&&
618 !shouldUseLA(AM
.Base
.getNode(), AM
.Disp
, AM
.Index
.getNode()))
621 // Reject cases where the other instruction in a pair should be used.
622 if (!isValidDisp(AM
.DR
, AM
.Disp
))
625 // Make sure that ADJDYNALLOC is included where necessary.
626 if (AM
.isDynAlloc() && !AM
.IncludesDynAlloc
)
629 LLVM_DEBUG(AM
.dump(CurDAG
));
633 // Insert a node into the DAG at least before Pos. This will reposition
634 // the node as needed, and will assign it a node ID that is <= Pos's ID.
635 // Note that this does *not* preserve the uniqueness of node IDs!
636 // The selection DAG must no longer depend on their uniqueness when this
638 static void insertDAGNode(SelectionDAG
*DAG
, SDNode
*Pos
, SDValue N
) {
639 if (N
->getNodeId() == -1 ||
640 (SelectionDAGISel::getUninvalidatedNodeId(N
.getNode()) >
641 SelectionDAGISel::getUninvalidatedNodeId(Pos
))) {
642 DAG
->RepositionNode(Pos
->getIterator(), N
.getNode());
643 // Mark Node as invalid for pruning as after this it may be a successor to a
644 // selected node but otherwise be in the same position of Pos.
645 // Conservatively mark it with the same -abs(Id) to assure node id
646 // invariant is preserved.
647 N
->setNodeId(Pos
->getNodeId());
648 SelectionDAGISel::InvalidateNodeId(N
.getNode());
652 void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode
&AM
,
653 EVT VT
, SDValue
&Base
,
654 SDValue
&Disp
) const {
657 // Register 0 means "no base". This is mostly useful for shifts.
658 Base
= CurDAG
->getRegister(0, VT
);
659 else if (Base
.getOpcode() == ISD::FrameIndex
) {
660 // Lower a FrameIndex to a TargetFrameIndex.
661 int64_t FrameIndex
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
662 Base
= CurDAG
->getTargetFrameIndex(FrameIndex
, VT
);
663 } else if (Base
.getValueType() != VT
) {
664 // Truncate values from i64 to i32, for shifts.
665 assert(VT
== MVT::i32
&& Base
.getValueType() == MVT::i64
&&
666 "Unexpected truncation");
668 SDValue Trunc
= CurDAG
->getNode(ISD::TRUNCATE
, DL
, VT
, Base
);
669 insertDAGNode(CurDAG
, Base
.getNode(), Trunc
);
673 // Lower the displacement to a TargetConstant.
674 Disp
= CurDAG
->getSignedTargetConstant(AM
.Disp
, SDLoc(Base
), VT
);
677 void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode
&AM
,
678 EVT VT
, SDValue
&Base
,
680 SDValue
&Index
) const {
681 getAddressOperands(AM
, VT
, Base
, Disp
);
684 if (!Index
.getNode())
685 // Register 0 means "no index".
686 Index
= CurDAG
->getRegister(0, VT
);
689 bool SystemZDAGToDAGISel::selectBDAddr(SystemZAddressingMode::DispRange DR
,
690 SDValue Addr
, SDValue
&Base
,
691 SDValue
&Disp
) const {
692 SystemZAddressingMode
AM(SystemZAddressingMode::FormBD
, DR
);
693 if (!selectAddress(Addr
, AM
))
696 getAddressOperands(AM
, Addr
.getValueType(), Base
, Disp
);
700 bool SystemZDAGToDAGISel::selectMVIAddr(SystemZAddressingMode::DispRange DR
,
701 SDValue Addr
, SDValue
&Base
,
702 SDValue
&Disp
) const {
703 SystemZAddressingMode
AM(SystemZAddressingMode::FormBDXNormal
, DR
);
704 if (!selectAddress(Addr
, AM
) || AM
.Index
.getNode())
707 getAddressOperands(AM
, Addr
.getValueType(), Base
, Disp
);
711 bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form
,
712 SystemZAddressingMode::DispRange DR
,
713 SDValue Addr
, SDValue
&Base
,
714 SDValue
&Disp
, SDValue
&Index
) const {
715 SystemZAddressingMode
AM(Form
, DR
);
716 if (!selectAddress(Addr
, AM
))
719 getAddressOperands(AM
, Addr
.getValueType(), Base
, Disp
, Index
);
723 bool SystemZDAGToDAGISel::selectBDVAddr12Only(SDValue Addr
, SDValue Elem
,
726 SDValue
&Index
) const {
728 if (selectBDXAddr12Only(Addr
, Regs
[0], Disp
, Regs
[1]) &&
729 Regs
[0].getNode() && Regs
[1].getNode()) {
730 for (unsigned int I
= 0; I
< 2; ++I
) {
733 // We can't tell here whether the index vector has the right type
734 // for the access; the caller needs to do that instead.
735 if (Index
.getOpcode() == ISD::ZERO_EXTEND
)
736 Index
= Index
.getOperand(0);
737 if (Index
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
738 Index
.getOperand(1) == Elem
) {
739 Index
= Index
.getOperand(0);
747 bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue
&Op
,
748 uint64_t InsertMask
) const {
749 // We're only interested in cases where the insertion is into some operand
750 // of Op, rather than into Op itself. The only useful case is an AND.
751 if (Op
.getOpcode() != ISD::AND
)
754 // We need a constant mask.
755 auto *MaskNode
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1).getNode());
759 // It's not an insertion of Op.getOperand(0) if the two masks overlap.
760 uint64_t AndMask
= MaskNode
->getZExtValue();
761 if (InsertMask
& AndMask
)
764 // It's only an insertion if all bits are covered or are known to be zero.
765 // The inner check covers all cases but is more expensive.
766 uint64_t Used
= allOnes(Op
.getValueSizeInBits());
767 if (Used
!= (AndMask
| InsertMask
)) {
768 KnownBits Known
= CurDAG
->computeKnownBits(Op
.getOperand(0));
769 if (Used
!= (AndMask
| InsertMask
| Known
.Zero
.getZExtValue()))
773 Op
= Op
.getOperand(0);
777 bool SystemZDAGToDAGISel::refineRxSBGMask(RxSBGOperands
&RxSBG
,
778 uint64_t Mask
) const {
779 const SystemZInstrInfo
*TII
= getInstrInfo();
780 if (RxSBG
.Rotate
!= 0)
781 Mask
= (Mask
<< RxSBG
.Rotate
) | (Mask
>> (64 - RxSBG
.Rotate
));
783 if (TII
->isRxSBGMask(Mask
, RxSBG
.BitSize
, RxSBG
.Start
, RxSBG
.End
)) {
790 // Return true if any bits of (RxSBG.Input & Mask) are significant.
791 static bool maskMatters(RxSBGOperands
&RxSBG
, uint64_t Mask
) {
792 // Rotate the mask in the same way as RxSBG.Input is rotated.
793 if (RxSBG
.Rotate
!= 0)
794 Mask
= ((Mask
<< RxSBG
.Rotate
) | (Mask
>> (64 - RxSBG
.Rotate
)));
795 return (Mask
& RxSBG
.Mask
) != 0;
798 bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands
&RxSBG
) const {
799 SDValue N
= RxSBG
.Input
;
800 unsigned Opcode
= N
.getOpcode();
802 case ISD::TRUNCATE
: {
803 if (RxSBG
.Opcode
== SystemZ::RNSBG
)
805 if (N
.getOperand(0).getValueSizeInBits() > 64)
807 uint64_t BitSize
= N
.getValueSizeInBits();
808 uint64_t Mask
= allOnes(BitSize
);
809 if (!refineRxSBGMask(RxSBG
, Mask
))
811 RxSBG
.Input
= N
.getOperand(0);
815 if (RxSBG
.Opcode
== SystemZ::RNSBG
)
818 auto *MaskNode
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1).getNode());
822 SDValue Input
= N
.getOperand(0);
823 uint64_t Mask
= MaskNode
->getZExtValue();
824 if (!refineRxSBGMask(RxSBG
, Mask
)) {
825 // If some bits of Input are already known zeros, those bits will have
826 // been removed from the mask. See if adding them back in makes the
828 KnownBits Known
= CurDAG
->computeKnownBits(Input
);
829 Mask
|= Known
.Zero
.getZExtValue();
830 if (!refineRxSBGMask(RxSBG
, Mask
))
838 if (RxSBG
.Opcode
!= SystemZ::RNSBG
)
841 auto *MaskNode
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1).getNode());
845 SDValue Input
= N
.getOperand(0);
846 uint64_t Mask
= ~MaskNode
->getZExtValue();
847 if (!refineRxSBGMask(RxSBG
, Mask
)) {
848 // If some bits of Input are already known ones, those bits will have
849 // been removed from the mask. See if adding them back in makes the
851 KnownBits Known
= CurDAG
->computeKnownBits(Input
);
852 Mask
&= ~Known
.One
.getZExtValue();
853 if (!refineRxSBGMask(RxSBG
, Mask
))
861 // Any 64-bit rotate left can be merged into the RxSBG.
862 if (RxSBG
.BitSize
!= 64 || N
.getValueType() != MVT::i64
)
864 auto *CountNode
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1).getNode());
868 RxSBG
.Rotate
= (RxSBG
.Rotate
+ CountNode
->getZExtValue()) & 63;
869 RxSBG
.Input
= N
.getOperand(0);
873 case ISD::ANY_EXTEND
:
874 // Bits above the extended operand are don't-care.
875 RxSBG
.Input
= N
.getOperand(0);
878 case ISD::ZERO_EXTEND
:
879 if (RxSBG
.Opcode
!= SystemZ::RNSBG
) {
880 // Restrict the mask to the extended operand.
881 unsigned InnerBitSize
= N
.getOperand(0).getValueSizeInBits();
882 if (!refineRxSBGMask(RxSBG
, allOnes(InnerBitSize
)))
885 RxSBG
.Input
= N
.getOperand(0);
890 case ISD::SIGN_EXTEND
: {
891 // Check that the extension bits are don't-care (i.e. are masked out
892 // by the final mask).
893 unsigned BitSize
= N
.getValueSizeInBits();
894 unsigned InnerBitSize
= N
.getOperand(0).getValueSizeInBits();
895 if (maskMatters(RxSBG
, allOnes(BitSize
) - allOnes(InnerBitSize
))) {
896 // In the case where only the sign bit is active, increase Rotate with
897 // the extension width.
898 if (RxSBG
.Mask
== 1 && RxSBG
.Rotate
== 1)
899 RxSBG
.Rotate
+= (BitSize
- InnerBitSize
);
904 RxSBG
.Input
= N
.getOperand(0);
909 auto *CountNode
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1).getNode());
913 uint64_t Count
= CountNode
->getZExtValue();
914 unsigned BitSize
= N
.getValueSizeInBits();
915 if (Count
< 1 || Count
>= BitSize
)
918 if (RxSBG
.Opcode
== SystemZ::RNSBG
) {
919 // Treat (shl X, count) as (rotl X, size-count) as long as the bottom
920 // count bits from RxSBG.Input are ignored.
921 if (maskMatters(RxSBG
, allOnes(Count
)))
924 // Treat (shl X, count) as (and (rotl X, count), ~0<<count).
925 if (!refineRxSBGMask(RxSBG
, allOnes(BitSize
- Count
) << Count
))
929 RxSBG
.Rotate
= (RxSBG
.Rotate
+ Count
) & 63;
930 RxSBG
.Input
= N
.getOperand(0);
936 auto *CountNode
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1).getNode());
940 uint64_t Count
= CountNode
->getZExtValue();
941 unsigned BitSize
= N
.getValueSizeInBits();
942 if (Count
< 1 || Count
>= BitSize
)
945 if (RxSBG
.Opcode
== SystemZ::RNSBG
|| Opcode
== ISD::SRA
) {
946 // Treat (srl|sra X, count) as (rotl X, size-count) as long as the top
947 // count bits from RxSBG.Input are ignored.
948 if (maskMatters(RxSBG
, allOnes(Count
) << (BitSize
- Count
)))
951 // Treat (srl X, count), mask) as (and (rotl X, size-count), ~0>>count),
952 // which is similar to SLL above.
953 if (!refineRxSBGMask(RxSBG
, allOnes(BitSize
- Count
)))
957 RxSBG
.Rotate
= (RxSBG
.Rotate
- Count
) & 63;
958 RxSBG
.Input
= N
.getOperand(0);
966 SDValue
SystemZDAGToDAGISel::getUNDEF(const SDLoc
&DL
, EVT VT
) const {
967 SDNode
*N
= CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, DL
, VT
);
968 return SDValue(N
, 0);
971 SDValue
SystemZDAGToDAGISel::convertTo(const SDLoc
&DL
, EVT VT
,
973 if (N
.getValueType() == MVT::i32
&& VT
== MVT::i64
)
974 return CurDAG
->getTargetInsertSubreg(SystemZ::subreg_l32
,
975 DL
, VT
, getUNDEF(DL
, MVT::i64
), N
);
976 if (N
.getValueType() == MVT::i64
&& VT
== MVT::i32
)
977 return CurDAG
->getTargetExtractSubreg(SystemZ::subreg_l32
, DL
, VT
, N
);
978 assert(N
.getValueType() == VT
&& "Unexpected value types");
982 bool SystemZDAGToDAGISel::tryRISBGZero(SDNode
*N
) {
984 EVT VT
= N
->getValueType(0);
985 if (!VT
.isInteger() || VT
.getSizeInBits() > 64)
987 RxSBGOperands
RISBG(SystemZ::RISBG
, SDValue(N
, 0));
989 while (expandRxSBG(RISBG
))
990 // The widening or narrowing is expected to be free.
991 // Counting widening or narrowing as a saved operation will result in
992 // preferring an R*SBG over a simple shift/logical instruction.
993 if (RISBG
.Input
.getOpcode() != ISD::ANY_EXTEND
&&
994 RISBG
.Input
.getOpcode() != ISD::TRUNCATE
)
996 if (Count
== 0 || isa
<ConstantSDNode
>(RISBG
.Input
))
999 // Prefer to use normal shift instructions over RISBG, since they can handle
1000 // all cases and are sometimes shorter.
1001 if (Count
== 1 && N
->getOpcode() != ISD::AND
)
1004 // Prefer register extensions like LLC over RISBG. Also prefer to start
1005 // out with normal ANDs if one instruction would be enough. We can convert
1006 // these ANDs into an RISBG later if a three-address instruction is useful.
1007 if (RISBG
.Rotate
== 0) {
1008 bool PreferAnd
= false;
1009 // Prefer AND for any 32-bit and-immediate operation.
1012 // As well as for any 64-bit operation that can be implemented via LLC(R),
1013 // LLH(R), LLGT(R), or one of the and-immediate instructions.
1014 else if (RISBG
.Mask
== 0xff ||
1015 RISBG
.Mask
== 0xffff ||
1016 RISBG
.Mask
== 0x7fffffff ||
1017 SystemZ::isImmLF(~RISBG
.Mask
) ||
1018 SystemZ::isImmHF(~RISBG
.Mask
))
1020 // And likewise for the LLZRGF instruction, which doesn't have a register
1021 // to register version.
1022 else if (auto *Load
= dyn_cast
<LoadSDNode
>(RISBG
.Input
)) {
1023 if (Load
->getMemoryVT() == MVT::i32
&&
1024 (Load
->getExtensionType() == ISD::EXTLOAD
||
1025 Load
->getExtensionType() == ISD::ZEXTLOAD
) &&
1026 RISBG
.Mask
== 0xffffff00 &&
1027 Subtarget
->hasLoadAndZeroRightmostByte())
1031 // Replace the current node with an AND. Note that the current node
1032 // might already be that same AND, in which case it is already CSE'd
1033 // with it, and we must not call ReplaceNode.
1034 SDValue In
= convertTo(DL
, VT
, RISBG
.Input
);
1035 SDValue Mask
= CurDAG
->getConstant(RISBG
.Mask
, DL
, VT
);
1036 SDValue New
= CurDAG
->getNode(ISD::AND
, DL
, VT
, In
, Mask
);
1037 if (N
!= New
.getNode()) {
1038 insertDAGNode(CurDAG
, N
, Mask
);
1039 insertDAGNode(CurDAG
, N
, New
);
1040 ReplaceNode(N
, New
.getNode());
1043 // Now, select the machine opcode to implement this operation.
1044 if (!N
->isMachineOpcode())
1050 unsigned Opcode
= SystemZ::RISBG
;
1051 // Prefer RISBGN if available, since it does not clobber CC.
1052 if (Subtarget
->hasMiscellaneousExtensions())
1053 Opcode
= SystemZ::RISBGN
;
1054 EVT OpcodeVT
= MVT::i64
;
1055 if (VT
== MVT::i32
&& Subtarget
->hasHighWord() &&
1056 // We can only use the 32-bit instructions if all source bits are
1057 // in the low 32 bits without wrapping, both after rotation (because
1058 // of the smaller range for Start and End) and before rotation
1059 // (because the input value is truncated).
1060 RISBG
.Start
>= 32 && RISBG
.End
>= RISBG
.Start
&&
1061 ((RISBG
.Start
+ RISBG
.Rotate
) & 63) >= 32 &&
1062 ((RISBG
.End
+ RISBG
.Rotate
) & 63) >=
1063 ((RISBG
.Start
+ RISBG
.Rotate
) & 63)) {
1064 Opcode
= SystemZ::RISBMux
;
1065 OpcodeVT
= MVT::i32
;
1070 getUNDEF(DL
, OpcodeVT
),
1071 convertTo(DL
, OpcodeVT
, RISBG
.Input
),
1072 CurDAG
->getTargetConstant(RISBG
.Start
, DL
, MVT::i32
),
1073 CurDAG
->getTargetConstant(RISBG
.End
| 128, DL
, MVT::i32
),
1074 CurDAG
->getTargetConstant(RISBG
.Rotate
, DL
, MVT::i32
)
1076 SDValue New
= convertTo(
1077 DL
, VT
, SDValue(CurDAG
->getMachineNode(Opcode
, DL
, OpcodeVT
, Ops
), 0));
1078 ReplaceNode(N
, New
.getNode());
1082 bool SystemZDAGToDAGISel::tryRxSBG(SDNode
*N
, unsigned Opcode
) {
1084 EVT VT
= N
->getValueType(0);
1085 if (!VT
.isInteger() || VT
.getSizeInBits() > 64)
1087 // Try treating each operand of N as the second operand of the RxSBG
1088 // and see which goes deepest.
1089 RxSBGOperands RxSBG
[] = {
1090 RxSBGOperands(Opcode
, N
->getOperand(0)),
1091 RxSBGOperands(Opcode
, N
->getOperand(1))
1093 unsigned Count
[] = { 0, 0 };
1094 for (unsigned I
= 0; I
< 2; ++I
)
1095 while (RxSBG
[I
].Input
->hasOneUse() && expandRxSBG(RxSBG
[I
]))
1096 // In cases of multiple users it seems better to keep the simple
1097 // instruction as they are one cycle faster, and it also helps in cases
1098 // where both inputs share a common node.
1099 // The widening or narrowing is expected to be free. Counting widening
1100 // or narrowing as a saved operation will result in preferring an R*SBG
1101 // over a simple shift/logical instruction.
1102 if (RxSBG
[I
].Input
.getOpcode() != ISD::ANY_EXTEND
&&
1103 RxSBG
[I
].Input
.getOpcode() != ISD::TRUNCATE
)
1106 // Do nothing if neither operand is suitable.
1107 if (Count
[0] == 0 && Count
[1] == 0)
1110 // Pick the deepest second operand.
1111 unsigned I
= Count
[0] > Count
[1] ? 0 : 1;
1112 SDValue Op0
= N
->getOperand(I
^ 1);
1114 // Prefer IC for character insertions from memory.
1115 if (Opcode
== SystemZ::ROSBG
&& (RxSBG
[I
].Mask
& 0xff) == 0)
1116 if (auto *Load
= dyn_cast
<LoadSDNode
>(Op0
.getNode()))
1117 if (Load
->getMemoryVT() == MVT::i8
)
1120 // See whether we can avoid an AND in the first operand by converting
1122 if (Opcode
== SystemZ::ROSBG
&& detectOrAndInsertion(Op0
, RxSBG
[I
].Mask
)) {
1123 Opcode
= SystemZ::RISBG
;
1124 // Prefer RISBGN if available, since it does not clobber CC.
1125 if (Subtarget
->hasMiscellaneousExtensions())
1126 Opcode
= SystemZ::RISBGN
;
1130 convertTo(DL
, MVT::i64
, Op0
),
1131 convertTo(DL
, MVT::i64
, RxSBG
[I
].Input
),
1132 CurDAG
->getTargetConstant(RxSBG
[I
].Start
, DL
, MVT::i32
),
1133 CurDAG
->getTargetConstant(RxSBG
[I
].End
, DL
, MVT::i32
),
1134 CurDAG
->getTargetConstant(RxSBG
[I
].Rotate
, DL
, MVT::i32
)
1136 SDValue New
= convertTo(
1137 DL
, VT
, SDValue(CurDAG
->getMachineNode(Opcode
, DL
, MVT::i64
, Ops
), 0));
1138 ReplaceNode(N
, New
.getNode());
1142 void SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode
, SDNode
*Node
,
1143 SDValue Op0
, uint64_t UpperVal
,
1144 uint64_t LowerVal
) {
1145 EVT VT
= Node
->getValueType(0);
1147 SDValue Upper
= CurDAG
->getConstant(UpperVal
, DL
, VT
);
1149 Upper
= CurDAG
->getNode(Opcode
, DL
, VT
, Op0
, Upper
);
1152 // When we haven't passed in Op0, Upper will be a constant. In order to
1153 // prevent folding back to the large immediate in `Or = getNode(...)` we run
1154 // SelectCode first and end up with an opaque machine node. This means that
1155 // we need to use a handle to keep track of Upper in case it gets CSE'd by
1158 // Note that in the case where Op0 is passed in we could just call
1159 // SelectCode(Upper) later, along with the SelectCode(Or), and avoid needing
1160 // the handle at all, but it's fine to do it here.
1162 // TODO: This is a pretty hacky way to do this. Can we do something that
1163 // doesn't require a two paragraph explanation?
1164 HandleSDNode
Handle(Upper
);
1165 SelectCode(Upper
.getNode());
1166 Upper
= Handle
.getValue();
1169 SDValue Lower
= CurDAG
->getConstant(LowerVal
, DL
, VT
);
1170 SDValue Or
= CurDAG
->getNode(Opcode
, DL
, VT
, Upper
, Lower
);
1172 ReplaceNode(Node
, Or
.getNode());
1174 SelectCode(Or
.getNode());
1177 void SystemZDAGToDAGISel::loadVectorConstant(
1178 const SystemZVectorConstantInfo
&VCI
, SDNode
*Node
) {
1179 assert((VCI
.Opcode
== SystemZISD::BYTE_MASK
||
1180 VCI
.Opcode
== SystemZISD::REPLICATE
||
1181 VCI
.Opcode
== SystemZISD::ROTATE_MASK
) &&
1183 assert(VCI
.VecVT
.getSizeInBits() == 128 && "Expected a vector type");
1184 EVT VT
= Node
->getValueType(0);
1186 SmallVector
<SDValue
, 2> Ops
;
1187 for (unsigned OpVal
: VCI
.OpVals
)
1188 Ops
.push_back(CurDAG
->getTargetConstant(OpVal
, DL
, MVT::i32
));
1189 SDValue Op
= CurDAG
->getNode(VCI
.Opcode
, DL
, VCI
.VecVT
, Ops
);
1191 if (VCI
.VecVT
== VT
.getSimpleVT())
1192 ReplaceNode(Node
, Op
.getNode());
1193 else if (VT
.getSizeInBits() == 128) {
1194 SDValue BitCast
= CurDAG
->getNode(ISD::BITCAST
, DL
, VT
, Op
);
1195 ReplaceNode(Node
, BitCast
.getNode());
1196 SelectCode(BitCast
.getNode());
1197 } else { // float or double
1198 unsigned SubRegIdx
=
1199 (VT
.getSizeInBits() == 32 ? SystemZ::subreg_h32
: SystemZ::subreg_h64
);
1201 Node
, CurDAG
->getTargetExtractSubreg(SubRegIdx
, DL
, VT
, Op
).getNode());
1203 SelectCode(Op
.getNode());
1206 SDNode
*SystemZDAGToDAGISel::loadPoolVectorConstant(APInt Val
, EVT VT
, SDLoc DL
) {
1208 assert (VT
.getSizeInBits() == 128);
1210 SDValue CP
= CurDAG
->getTargetConstantPool(
1211 ConstantInt::get(Type::getInt128Ty(*CurDAG
->getContext()), Val
),
1212 TLI
->getPointerTy(CurDAG
->getDataLayout()));
1214 EVT PtrVT
= CP
.getValueType();
1216 SDValue(CurDAG
->getMachineNode(SystemZ::LARL
, DL
, PtrVT
, CP
), 0),
1217 CurDAG
->getTargetConstant(0, DL
, PtrVT
),
1218 CurDAG
->getRegister(0, PtrVT
),
1219 CurDAG
->getEntryNode()
1221 ResNode
= CurDAG
->getMachineNode(SystemZ::VL
, DL
, VT
, MVT::Other
, Ops
);
1223 // Annotate ResNode with memory operand information so that MachineInstr
1224 // queries work properly. This e.g. gives the register allocation the
1225 // required information for rematerialization.
1226 MachineFunction
& MF
= CurDAG
->getMachineFunction();
1227 MachineMemOperand
*MemOp
=
1228 MF
.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF
),
1229 MachineMemOperand::MOLoad
, 16, Align(8));
1231 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(ResNode
), {MemOp
});
1235 bool SystemZDAGToDAGISel::tryGather(SDNode
*N
, unsigned Opcode
) {
1236 SDValue ElemV
= N
->getOperand(2);
1237 auto *ElemN
= dyn_cast
<ConstantSDNode
>(ElemV
);
1241 unsigned Elem
= ElemN
->getZExtValue();
1242 EVT VT
= N
->getValueType(0);
1243 if (Elem
>= VT
.getVectorNumElements())
1246 auto *Load
= dyn_cast
<LoadSDNode
>(N
->getOperand(1));
1247 if (!Load
|| !Load
->hasNUsesOfValue(1, 0))
1249 if (Load
->getMemoryVT().getSizeInBits() !=
1250 Load
->getValueType(0).getSizeInBits())
1253 SDValue Base
, Disp
, Index
;
1254 if (!selectBDVAddr12Only(Load
->getBasePtr(), ElemV
, Base
, Disp
, Index
) ||
1255 Index
.getValueType() != VT
.changeVectorElementTypeToInteger())
1260 N
->getOperand(0), Base
, Disp
, Index
,
1261 CurDAG
->getTargetConstant(Elem
, DL
, MVT::i32
), Load
->getChain()
1263 SDNode
*Res
= CurDAG
->getMachineNode(Opcode
, DL
, VT
, MVT::Other
, Ops
);
1264 ReplaceUses(SDValue(Load
, 1), SDValue(Res
, 1));
1265 ReplaceNode(N
, Res
);
1269 bool SystemZDAGToDAGISel::tryScatter(StoreSDNode
*Store
, unsigned Opcode
) {
1270 SDValue Value
= Store
->getValue();
1271 if (Value
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
1273 if (Store
->getMemoryVT().getSizeInBits() != Value
.getValueSizeInBits())
1276 SDValue ElemV
= Value
.getOperand(1);
1277 auto *ElemN
= dyn_cast
<ConstantSDNode
>(ElemV
);
1281 SDValue Vec
= Value
.getOperand(0);
1282 EVT VT
= Vec
.getValueType();
1283 unsigned Elem
= ElemN
->getZExtValue();
1284 if (Elem
>= VT
.getVectorNumElements())
1287 SDValue Base
, Disp
, Index
;
1288 if (!selectBDVAddr12Only(Store
->getBasePtr(), ElemV
, Base
, Disp
, Index
) ||
1289 Index
.getValueType() != VT
.changeVectorElementTypeToInteger())
1294 Vec
, Base
, Disp
, Index
, CurDAG
->getTargetConstant(Elem
, DL
, MVT::i32
),
1297 ReplaceNode(Store
, CurDAG
->getMachineNode(Opcode
, DL
, MVT::Other
, Ops
));
1301 // Check whether or not the chain ending in StoreNode is suitable for doing
1302 // the {load; op; store} to modify transformation.
1303 static bool isFusableLoadOpStorePattern(StoreSDNode
*StoreNode
,
1304 SDValue StoredVal
, SelectionDAG
*CurDAG
,
1305 LoadSDNode
*&LoadNode
,
1306 SDValue
&InputChain
) {
1307 // Is the stored value result 0 of the operation?
1308 if (StoredVal
.getResNo() != 0)
1311 // Are there other uses of the loaded value than the operation?
1312 if (!StoredVal
.getNode()->hasNUsesOfValue(1, 0))
1315 // Is the store non-extending and non-indexed?
1316 if (!ISD::isNormalStore(StoreNode
) || StoreNode
->isNonTemporal())
1319 SDValue Load
= StoredVal
->getOperand(0);
1320 // Is the stored value a non-extending and non-indexed load?
1321 if (!ISD::isNormalLoad(Load
.getNode()))
1324 // Return LoadNode by reference.
1325 LoadNode
= cast
<LoadSDNode
>(Load
);
1327 // Is store the only read of the loaded value?
1328 if (!Load
.hasOneUse())
1331 // Is the address of the store the same as the load?
1332 if (LoadNode
->getBasePtr() != StoreNode
->getBasePtr() ||
1333 LoadNode
->getOffset() != StoreNode
->getOffset())
1336 // Check if the chain is produced by the load or is a TokenFactor with
1337 // the load output chain as an operand. Return InputChain by reference.
1338 SDValue Chain
= StoreNode
->getChain();
1340 bool ChainCheck
= false;
1341 if (Chain
== Load
.getValue(1)) {
1343 InputChain
= LoadNode
->getChain();
1344 } else if (Chain
.getOpcode() == ISD::TokenFactor
) {
1345 SmallVector
<SDValue
, 4> ChainOps
;
1346 SmallVector
<const SDNode
*, 4> LoopWorklist
;
1347 SmallPtrSet
<const SDNode
*, 16> Visited
;
1348 const unsigned int Max
= 1024;
1349 for (unsigned i
= 0, e
= Chain
.getNumOperands(); i
!= e
; ++i
) {
1350 SDValue Op
= Chain
.getOperand(i
);
1351 if (Op
== Load
.getValue(1)) {
1353 // Drop Load, but keep its chain. No cycle check necessary.
1354 ChainOps
.push_back(Load
.getOperand(0));
1357 LoopWorklist
.push_back(Op
.getNode());
1358 ChainOps
.push_back(Op
);
1362 // Add the other operand of StoredVal to worklist.
1363 for (SDValue Op
: StoredVal
->ops())
1364 if (Op
.getNode() != LoadNode
)
1365 LoopWorklist
.push_back(Op
.getNode());
1367 // Check if Load is reachable from any of the nodes in the worklist.
1368 if (SDNode::hasPredecessorHelper(Load
.getNode(), Visited
, LoopWorklist
, Max
,
1372 // Make a new TokenFactor with all the other input chains except
1374 InputChain
= CurDAG
->getNode(ISD::TokenFactor
, SDLoc(Chain
),
1375 MVT::Other
, ChainOps
);
1384 // Change a chain of {load; op; store} of the same value into a simple op
1385 // through memory of that value, if the uses of the modified value and its
1386 // address are suitable.
1388 // The tablegen pattern memory operand pattern is currently not able to match
1389 // the case where the CC on the original operation are used.
1391 // See the equivalent routine in X86ISelDAGToDAG for further comments.
1392 bool SystemZDAGToDAGISel::tryFoldLoadStoreIntoMemOperand(SDNode
*Node
) {
1393 StoreSDNode
*StoreNode
= cast
<StoreSDNode
>(Node
);
1394 SDValue StoredVal
= StoreNode
->getOperand(1);
1395 unsigned Opc
= StoredVal
->getOpcode();
1396 SDLoc
DL(StoreNode
);
1398 // Before we try to select anything, make sure this is memory operand size
1399 // and opcode we can handle. Note that this must match the code below that
1400 // actually lowers the opcodes.
1401 EVT MemVT
= StoreNode
->getMemoryVT();
1402 unsigned NewOpc
= 0;
1403 bool NegateOperand
= false;
1407 case SystemZISD::SSUBO
:
1408 NegateOperand
= true;
1410 case SystemZISD::SADDO
:
1411 if (MemVT
== MVT::i32
)
1412 NewOpc
= SystemZ::ASI
;
1413 else if (MemVT
== MVT::i64
)
1414 NewOpc
= SystemZ::AGSI
;
1418 case SystemZISD::USUBO
:
1419 NegateOperand
= true;
1421 case SystemZISD::UADDO
:
1422 if (MemVT
== MVT::i32
)
1423 NewOpc
= SystemZ::ALSI
;
1424 else if (MemVT
== MVT::i64
)
1425 NewOpc
= SystemZ::ALGSI
;
1431 LoadSDNode
*LoadNode
= nullptr;
1433 if (!isFusableLoadOpStorePattern(StoreNode
, StoredVal
, CurDAG
, LoadNode
,
1437 SDValue Operand
= StoredVal
.getOperand(1);
1438 auto *OperandC
= dyn_cast
<ConstantSDNode
>(Operand
);
1441 auto OperandV
= OperandC
->getAPIntValue();
1443 OperandV
= -OperandV
;
1444 if (OperandV
.getSignificantBits() > 8)
1446 Operand
= CurDAG
->getTargetConstant(OperandV
, DL
, MemVT
);
1449 if (!selectBDAddr20Only(StoreNode
->getBasePtr(), Base
, Disp
))
1452 SDValue Ops
[] = { Base
, Disp
, Operand
, InputChain
};
1453 MachineSDNode
*Result
=
1454 CurDAG
->getMachineNode(NewOpc
, DL
, MVT::i32
, MVT::Other
, Ops
);
1455 CurDAG
->setNodeMemRefs(
1456 Result
, {StoreNode
->getMemOperand(), LoadNode
->getMemOperand()});
1458 ReplaceUses(SDValue(StoreNode
, 0), SDValue(Result
, 1));
1459 ReplaceUses(SDValue(StoredVal
.getNode(), 1), SDValue(Result
, 0));
1460 CurDAG
->RemoveDeadNode(Node
);
1464 bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode
*Store
,
1465 LoadSDNode
*Load
) const {
1466 // Check that the two memory operands have the same size.
1467 if (Load
->getMemoryVT() != Store
->getMemoryVT())
1470 // Volatility stops an access from being decomposed.
1471 if (Load
->isVolatile() || Store
->isVolatile())
1474 // There's no chance of overlap if the load is invariant.
1475 if (Load
->isInvariant() && Load
->isDereferenceable())
1478 // Otherwise we need to check whether there's an alias.
1479 const Value
*V1
= Load
->getMemOperand()->getValue();
1480 const Value
*V2
= Store
->getMemOperand()->getValue();
1485 uint64_t Size
= Load
->getMemoryVT().getStoreSize();
1486 int64_t End1
= Load
->getSrcValueOffset() + Size
;
1487 int64_t End2
= Store
->getSrcValueOffset() + Size
;
1488 if (V1
== V2
&& End1
== End2
)
1491 return AA
->isNoAlias(MemoryLocation(V1
, End1
, Load
->getAAInfo()),
1492 MemoryLocation(V2
, End2
, Store
->getAAInfo()));
1495 bool SystemZDAGToDAGISel::storeLoadCanUseMVC(SDNode
*N
) const {
1496 auto *Store
= cast
<StoreSDNode
>(N
);
1497 auto *Load
= cast
<LoadSDNode
>(Store
->getValue());
1499 // Prefer not to use MVC if either address can use ... RELATIVE LONG
1501 uint64_t Size
= Load
->getMemoryVT().getStoreSize();
1502 if (Size
> 1 && Size
<= 8) {
1503 // Prefer LHRL, LRL and LGRL.
1504 if (SystemZISD::isPCREL(Load
->getBasePtr().getOpcode()))
1506 // Prefer STHRL, STRL and STGRL.
1507 if (SystemZISD::isPCREL(Store
->getBasePtr().getOpcode()))
1511 return canUseBlockOperation(Store
, Load
);
1514 bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode
*N
,
1516 auto *StoreA
= cast
<StoreSDNode
>(N
);
1517 auto *LoadA
= cast
<LoadSDNode
>(StoreA
->getValue().getOperand(1 - I
));
1518 auto *LoadB
= cast
<LoadSDNode
>(StoreA
->getValue().getOperand(I
));
1519 return !LoadA
->isVolatile() && LoadA
->getMemoryVT() == LoadB
->getMemoryVT() &&
1520 canUseBlockOperation(StoreA
, LoadB
);
1523 bool SystemZDAGToDAGISel::storeLoadIsAligned(SDNode
*N
) const {
1525 auto *MemAccess
= cast
<MemSDNode
>(N
);
1526 auto *LdSt
= dyn_cast
<LSBaseSDNode
>(MemAccess
);
1527 TypeSize StoreSize
= MemAccess
->getMemoryVT().getStoreSize();
1528 SDValue BasePtr
= MemAccess
->getBasePtr();
1529 MachineMemOperand
*MMO
= MemAccess
->getMemOperand();
1530 assert(MMO
&& "Expected a memory operand.");
1532 // The memory access must have a proper alignment and no index register.
1533 // Only load and store nodes have the offset operand (atomic loads do not).
1534 if (MemAccess
->getAlign().value() < StoreSize
||
1535 (LdSt
&& !LdSt
->getOffset().isUndef()))
1538 // The MMO must not have an unaligned offset.
1539 if (MMO
->getOffset() % StoreSize
!= 0)
1542 // An access to GOT or the Constant Pool is aligned.
1543 if (const PseudoSourceValue
*PSV
= MMO
->getPseudoValue())
1544 if ((PSV
->isGOT() || PSV
->isConstantPool()))
1547 // Check the alignment of a Global Address.
1548 if (BasePtr
.getNumOperands())
1549 if (GlobalAddressSDNode
*GA
=
1550 dyn_cast
<GlobalAddressSDNode
>(BasePtr
.getOperand(0))) {
1551 // The immediate offset must be aligned.
1552 if (GA
->getOffset() % StoreSize
!= 0)
1555 // The alignment of the symbol itself must be at least the store size.
1556 const GlobalValue
*GV
= GA
->getGlobal();
1557 const DataLayout
&DL
= GV
->getDataLayout();
1558 if (GV
->getPointerAlignment(DL
).value() < StoreSize
)
1565 ISD::LoadExtType
SystemZDAGToDAGISel::getLoadExtType(SDNode
*N
) const {
1566 ISD::LoadExtType ETy
;
1567 if (auto *L
= dyn_cast
<LoadSDNode
>(N
))
1568 ETy
= L
->getExtensionType();
1569 else if (auto *AL
= dyn_cast
<AtomicSDNode
>(N
))
1570 ETy
= AL
->getExtensionType();
1572 llvm_unreachable("Unkown load node type.");
1576 void SystemZDAGToDAGISel::Select(SDNode
*Node
) {
1577 // If we have a custom node, we already have selected!
1578 if (Node
->isMachineOpcode()) {
1579 LLVM_DEBUG(errs() << "== "; Node
->dump(CurDAG
); errs() << "\n");
1580 Node
->setNodeId(-1);
1584 unsigned Opcode
= Node
->getOpcode();
1587 if (Node
->getOperand(1).getOpcode() != ISD::Constant
)
1588 if (tryRxSBG(Node
, SystemZ::ROSBG
))
1593 if (Node
->getOperand(1).getOpcode() != ISD::Constant
)
1594 if (tryRxSBG(Node
, SystemZ::RXSBG
))
1598 // If this is a 64-bit operation in which both 32-bit halves are nonzero,
1599 // split the operation into two. If both operands here happen to be
1600 // constant, leave this to common code to optimize.
1601 if (Node
->getValueType(0) == MVT::i64
&&
1602 Node
->getOperand(0).getOpcode() != ISD::Constant
)
1603 if (auto *Op1
= dyn_cast
<ConstantSDNode
>(Node
->getOperand(1))) {
1604 uint64_t Val
= Op1
->getZExtValue();
1605 // Don't split the operation if we can match one of the combined
1606 // logical operations provided by miscellaneous-extensions-3.
1607 if (Subtarget
->hasMiscellaneousExtensions3()) {
1608 unsigned ChildOpcode
= Node
->getOperand(0).getOpcode();
1609 // Check whether this expression matches NAND/NOR/NXOR.
1610 if (Val
== (uint64_t)-1 && Opcode
== ISD::XOR
)
1611 if (ChildOpcode
== ISD::AND
|| ChildOpcode
== ISD::OR
||
1612 ChildOpcode
== ISD::XOR
)
1614 // Check whether this expression matches OR-with-complement
1615 // (or matches an alternate pattern for NXOR).
1616 if (ChildOpcode
== ISD::XOR
) {
1617 auto Op0
= Node
->getOperand(0);
1618 if (auto *Op0Op1
= dyn_cast
<ConstantSDNode
>(Op0
->getOperand(1)))
1619 if (Op0Op1
->getZExtValue() == (uint64_t)-1)
1623 // Don't split an XOR with -1 as LCGR/AGHI is more compact.
1624 if (Opcode
== ISD::XOR
&& Op1
->isAllOnes())
1626 if (!SystemZ::isImmLF(Val
) && !SystemZ::isImmHF(Val
)) {
1627 splitLargeImmediate(Opcode
, Node
, Node
->getOperand(0),
1628 Val
- uint32_t(Val
), uint32_t(Val
));
1635 if (Node
->getOperand(1).getOpcode() != ISD::Constant
)
1636 if (tryRxSBG(Node
, SystemZ::RNSBG
))
1642 case ISD::ZERO_EXTEND
:
1643 if (tryRISBGZero(Node
))
1648 if (Node
->getValueType(0) == MVT::i128
) {
1650 SDValue Src
= Node
->getOperand(0);
1651 Src
= CurDAG
->getNode(ISD::BITCAST
, DL
, MVT::v16i8
, Src
);
1653 uint64_t Bytes
[2] = { 0x0706050403020100ULL
, 0x0f0e0d0c0b0a0908ULL
};
1654 SDNode
*Mask
= loadPoolVectorConstant(APInt(128, Bytes
), MVT::v16i8
, DL
);
1655 SDValue Ops
[] = { Src
, Src
, SDValue(Mask
, 0) };
1656 SDValue Res
= SDValue(CurDAG
->getMachineNode(SystemZ::VPERM
, DL
,
1657 MVT::v16i8
, Ops
), 0);
1659 Res
= CurDAG
->getNode(ISD::BITCAST
, DL
, MVT::i128
, Res
);
1660 SDNode
*ResNode
= Res
.getNode();
1661 ReplaceNode(Node
, ResNode
);
1662 SelectCode(Src
.getNode());
1663 SelectCode(ResNode
);
1669 // If this is a 64-bit constant that is out of the range of LLILF,
1670 // LLIHF and LGFI, split it into two 32-bit pieces.
1671 if (Node
->getValueType(0) == MVT::i64
) {
1672 uint64_t Val
= Node
->getAsZExtVal();
1673 if (!SystemZ::isImmLF(Val
) && !SystemZ::isImmHF(Val
) && !isInt
<32>(Val
)) {
1674 splitLargeImmediate(ISD::OR
, Node
, SDValue(), Val
- uint32_t(Val
),
1679 if (Node
->getValueType(0) == MVT::i128
) {
1680 const APInt
&Val
= Node
->getAsAPIntVal();
1681 SystemZVectorConstantInfo
VCI(Val
);
1682 if (VCI
.isVectorConstantLegal(*Subtarget
)) {
1683 loadVectorConstant(VCI
, Node
);
1686 // If we can't materialize the constant we need to use a literal pool.
1687 SDNode
*ResNode
= loadPoolVectorConstant(Val
, MVT::i128
, SDLoc(Node
));
1688 ReplaceNode(Node
, ResNode
);
1693 case SystemZISD::SELECT_CCMASK
: {
1694 SDValue Op0
= Node
->getOperand(0);
1695 SDValue Op1
= Node
->getOperand(1);
1696 // Prefer to put any load first, so that it can be matched as a
1697 // conditional load. Likewise for constants in range for LOCHI.
1698 if ((Op1
.getOpcode() == ISD::LOAD
&& Op0
.getOpcode() != ISD::LOAD
) ||
1699 (Subtarget
->hasLoadStoreOnCond2() &&
1700 Node
->getValueType(0).isInteger() &&
1701 Node
->getValueType(0).getSizeInBits() <= 64 &&
1702 Op1
.getOpcode() == ISD::Constant
&&
1703 isInt
<16>(cast
<ConstantSDNode
>(Op1
)->getSExtValue()) &&
1704 !(Op0
.getOpcode() == ISD::Constant
&&
1705 isInt
<16>(cast
<ConstantSDNode
>(Op0
)->getSExtValue())))) {
1706 SDValue CCValid
= Node
->getOperand(2);
1707 SDValue CCMask
= Node
->getOperand(3);
1708 uint64_t ConstCCValid
= CCValid
.getNode()->getAsZExtVal();
1709 uint64_t ConstCCMask
= CCMask
.getNode()->getAsZExtVal();
1710 // Invert the condition.
1711 CCMask
= CurDAG
->getTargetConstant(ConstCCValid
^ ConstCCMask
,
1712 SDLoc(Node
), CCMask
.getValueType());
1713 SDValue Op4
= Node
->getOperand(4);
1714 SDNode
*UpdatedNode
=
1715 CurDAG
->UpdateNodeOperands(Node
, Op1
, Op0
, CCValid
, CCMask
, Op4
);
1716 if (UpdatedNode
!= Node
) {
1717 // In case this node already exists then replace Node with it.
1718 ReplaceNode(Node
, UpdatedNode
);
1725 case ISD::INSERT_VECTOR_ELT
: {
1726 EVT VT
= Node
->getValueType(0);
1727 unsigned ElemBitSize
= VT
.getScalarSizeInBits();
1728 if (ElemBitSize
== 32) {
1729 if (tryGather(Node
, SystemZ::VGEF
))
1731 } else if (ElemBitSize
== 64) {
1732 if (tryGather(Node
, SystemZ::VGEG
))
1738 case ISD::BUILD_VECTOR
: {
1739 auto *BVN
= cast
<BuildVectorSDNode
>(Node
);
1740 SystemZVectorConstantInfo
VCI(BVN
);
1741 if (VCI
.isVectorConstantLegal(*Subtarget
)) {
1742 loadVectorConstant(VCI
, Node
);
1748 case ISD::ConstantFP
: {
1749 APFloat Imm
= cast
<ConstantFPSDNode
>(Node
)->getValueAPF();
1750 if (Imm
.isZero() || Imm
.isNegZero())
1752 SystemZVectorConstantInfo
VCI(Imm
);
1753 bool Success
= VCI
.isVectorConstantLegal(*Subtarget
); (void)Success
;
1754 assert(Success
&& "Expected legal FP immediate");
1755 loadVectorConstant(VCI
, Node
);
1760 if (tryFoldLoadStoreIntoMemOperand(Node
))
1762 auto *Store
= cast
<StoreSDNode
>(Node
);
1763 unsigned ElemBitSize
= Store
->getValue().getValueSizeInBits();
1764 if (ElemBitSize
== 32) {
1765 if (tryScatter(Store
, SystemZ::VSCEF
))
1767 } else if (ElemBitSize
== 64) {
1768 if (tryScatter(Store
, SystemZ::VSCEG
))
1774 case ISD::ATOMIC_STORE
: {
1775 auto *AtomOp
= cast
<AtomicSDNode
>(Node
);
1776 // Replace the atomic_store with a regular store and select it. This is
1777 // ok since we know all store instructions <= 8 bytes are atomic, and the
1778 // 16 byte case is already handled during lowering.
1779 StoreSDNode
*St
= cast
<StoreSDNode
>(CurDAG
->getTruncStore(
1780 AtomOp
->getChain(), SDLoc(AtomOp
), AtomOp
->getVal(),
1781 AtomOp
->getBasePtr(), AtomOp
->getMemoryVT(), AtomOp
->getMemOperand()));
1782 assert(St
->getMemOperand()->isAtomic() && "Broken MMO.");
1784 // We have to enforce sequential consistency by performing a
1785 // serialization operation after the store.
1786 if (AtomOp
->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent
)
1787 Chain
= CurDAG
->getMachineNode(SystemZ::Serialize
, SDLoc(AtomOp
),
1788 MVT::Other
, SDValue(Chain
, 0));
1789 ReplaceNode(Node
, Chain
);
1798 bool SystemZDAGToDAGISel::SelectInlineAsmMemoryOperand(
1799 const SDValue
&Op
, InlineAsm::ConstraintCode ConstraintID
,
1800 std::vector
<SDValue
> &OutOps
) {
1801 SystemZAddressingMode::AddrForm Form
;
1802 SystemZAddressingMode::DispRange DispRange
;
1803 SDValue Base
, Disp
, Index
;
1805 switch(ConstraintID
) {
1807 llvm_unreachable("Unexpected asm memory constraint");
1808 case InlineAsm::ConstraintCode::i
:
1809 case InlineAsm::ConstraintCode::Q
:
1810 case InlineAsm::ConstraintCode::ZQ
:
1811 // Accept an address with a short displacement, but no index.
1812 Form
= SystemZAddressingMode::FormBD
;
1813 DispRange
= SystemZAddressingMode::Disp12Only
;
1815 case InlineAsm::ConstraintCode::R
:
1816 case InlineAsm::ConstraintCode::ZR
:
1817 // Accept an address with a short displacement and an index.
1818 Form
= SystemZAddressingMode::FormBDXNormal
;
1819 DispRange
= SystemZAddressingMode::Disp12Only
;
1821 case InlineAsm::ConstraintCode::S
:
1822 case InlineAsm::ConstraintCode::ZS
:
1823 // Accept an address with a long displacement, but no index.
1824 Form
= SystemZAddressingMode::FormBD
;
1825 DispRange
= SystemZAddressingMode::Disp20Only
;
1827 case InlineAsm::ConstraintCode::T
:
1828 case InlineAsm::ConstraintCode::m
:
1829 case InlineAsm::ConstraintCode::o
:
1830 case InlineAsm::ConstraintCode::p
:
1831 case InlineAsm::ConstraintCode::ZT
:
1832 // Accept an address with a long displacement and an index.
1833 // m works the same as T, as this is the most general case.
1834 // We don't really have any special handling of "offsettable"
1835 // memory addresses, so just treat o the same as m.
1836 Form
= SystemZAddressingMode::FormBDXNormal
;
1837 DispRange
= SystemZAddressingMode::Disp20Only
;
1841 if (selectBDXAddr(Form
, DispRange
, Op
, Base
, Disp
, Index
)) {
1842 const TargetRegisterClass
*TRC
=
1843 Subtarget
->getRegisterInfo()->getPointerRegClass(*MF
);
1845 SDValue RC
= CurDAG
->getTargetConstant(TRC
->getID(), DL
, MVT::i32
);
1847 // Make sure that the base address doesn't go into %r0.
1848 // If it's a TargetFrameIndex or a fixed register, we shouldn't do anything.
1849 if (Base
.getOpcode() != ISD::TargetFrameIndex
&&
1850 Base
.getOpcode() != ISD::Register
) {
1852 SDValue(CurDAG
->getMachineNode(TargetOpcode::COPY_TO_REGCLASS
,
1853 DL
, Base
.getValueType(),
1857 // Make sure that the index register isn't assigned to %r0 either.
1858 if (Index
.getOpcode() != ISD::Register
) {
1860 SDValue(CurDAG
->getMachineNode(TargetOpcode::COPY_TO_REGCLASS
,
1861 DL
, Index
.getValueType(),
1865 OutOps
.push_back(Base
);
1866 OutOps
.push_back(Disp
);
1867 OutOps
.push_back(Index
);
1874 // IsProfitableToFold - Returns true if is profitable to fold the specific
1875 // operand node N of U during instruction selection that starts at Root.
1877 SystemZDAGToDAGISel::IsProfitableToFold(SDValue N
, SDNode
*U
,
1878 SDNode
*Root
) const {
1879 // We want to avoid folding a LOAD into an ICMP node if as a result
1880 // we would be forced to spill the condition code into a GPR.
1881 if (N
.getOpcode() == ISD::LOAD
&& U
->getOpcode() == SystemZISD::ICMP
) {
1882 if (!N
.hasOneUse() || !U
->hasOneUse())
1885 // The user of the CC value will usually be a CopyToReg into the
1886 // physical CC register, which in turn is glued and chained to the
1887 // actual instruction that uses the CC value. Bail out if we have
1888 // anything else than that.
1889 SDNode
*CCUser
= *U
->user_begin();
1890 SDNode
*CCRegUser
= nullptr;
1891 if (CCUser
->getOpcode() == ISD::CopyToReg
||
1892 cast
<RegisterSDNode
>(CCUser
->getOperand(1))->getReg() == SystemZ::CC
) {
1893 for (auto *U
: CCUser
->users()) {
1894 if (CCRegUser
== nullptr)
1896 else if (CCRegUser
!= U
)
1900 if (CCRegUser
== nullptr)
1903 // If the actual instruction is a branch, the only thing that remains to be
1904 // checked is whether the CCUser chain is a predecessor of the load.
1905 if (CCRegUser
->isMachineOpcode() &&
1906 CCRegUser
->getMachineOpcode() == SystemZ::BRC
)
1907 return !N
->isPredecessorOf(CCUser
->getOperand(0).getNode());
1909 // Otherwise, the instruction may have multiple operands, and we need to
1910 // verify that none of them are a predecessor of the load. This is exactly
1911 // the same check that would be done by common code if the CC setter were
1912 // glued to the CC user, so simply invoke that check here.
1913 if (!IsLegalToFold(N
, U
, CCRegUser
, OptLevel
, false))
1921 // Represents a sequence for extracting a 0/1 value from an IPM result:
1922 // (((X ^ XORValue) + AddValue) >> Bit)
1923 struct IPMConversion
{
1924 IPMConversion(unsigned xorValue
, int64_t addValue
, unsigned bit
)
1925 : XORValue(xorValue
), AddValue(addValue
), Bit(bit
) {}
1931 } // end anonymous namespace
1933 // Return a sequence for getting a 1 from an IPM result when CC has a
1934 // value in CCMask and a 0 when CC has a value in CCValid & ~CCMask.
1935 // The handling of CC values outside CCValid doesn't matter.
1936 static IPMConversion
getIPMConversion(unsigned CCValid
, unsigned CCMask
) {
1937 // Deal with cases where the result can be taken directly from a bit
1938 // of the IPM result.
1939 if (CCMask
== (CCValid
& (SystemZ::CCMASK_1
| SystemZ::CCMASK_3
)))
1940 return IPMConversion(0, 0, SystemZ::IPM_CC
);
1941 if (CCMask
== (CCValid
& (SystemZ::CCMASK_2
| SystemZ::CCMASK_3
)))
1942 return IPMConversion(0, 0, SystemZ::IPM_CC
+ 1);
1944 // Deal with cases where we can add a value to force the sign bit
1945 // to contain the right value. Putting the bit in 31 means we can
1946 // use SRL rather than RISBG(L), and also makes it easier to get a
1947 // 0/-1 value, so it has priority over the other tests below.
1949 // These sequences rely on the fact that the upper two bits of the
1950 // IPM result are zero.
1951 uint64_t TopBit
= uint64_t(1) << 31;
1952 if (CCMask
== (CCValid
& SystemZ::CCMASK_0
))
1953 return IPMConversion(0, -(1 << SystemZ::IPM_CC
), 31);
1954 if (CCMask
== (CCValid
& (SystemZ::CCMASK_0
| SystemZ::CCMASK_1
)))
1955 return IPMConversion(0, -(2 << SystemZ::IPM_CC
), 31);
1956 if (CCMask
== (CCValid
& (SystemZ::CCMASK_0
1958 | SystemZ::CCMASK_2
)))
1959 return IPMConversion(0, -(3 << SystemZ::IPM_CC
), 31);
1960 if (CCMask
== (CCValid
& SystemZ::CCMASK_3
))
1961 return IPMConversion(0, TopBit
- (3 << SystemZ::IPM_CC
), 31);
1962 if (CCMask
== (CCValid
& (SystemZ::CCMASK_1
1964 | SystemZ::CCMASK_3
)))
1965 return IPMConversion(0, TopBit
- (1 << SystemZ::IPM_CC
), 31);
1967 // Next try inverting the value and testing a bit. 0/1 could be
1968 // handled this way too, but we dealt with that case above.
1969 if (CCMask
== (CCValid
& (SystemZ::CCMASK_0
| SystemZ::CCMASK_2
)))
1970 return IPMConversion(-1, 0, SystemZ::IPM_CC
);
1972 // Handle cases where adding a value forces a non-sign bit to contain
1974 if (CCMask
== (CCValid
& (SystemZ::CCMASK_1
| SystemZ::CCMASK_2
)))
1975 return IPMConversion(0, 1 << SystemZ::IPM_CC
, SystemZ::IPM_CC
+ 1);
1976 if (CCMask
== (CCValid
& (SystemZ::CCMASK_0
| SystemZ::CCMASK_3
)))
1977 return IPMConversion(0, -(1 << SystemZ::IPM_CC
), SystemZ::IPM_CC
+ 1);
1979 // The remaining cases are 1, 2, 0/1/3 and 0/2/3. All these are
1980 // can be done by inverting the low CC bit and applying one of the
1981 // sign-based extractions above.
1982 if (CCMask
== (CCValid
& SystemZ::CCMASK_1
))
1983 return IPMConversion(1 << SystemZ::IPM_CC
, -(1 << SystemZ::IPM_CC
), 31);
1984 if (CCMask
== (CCValid
& SystemZ::CCMASK_2
))
1985 return IPMConversion(1 << SystemZ::IPM_CC
,
1986 TopBit
- (3 << SystemZ::IPM_CC
), 31);
1987 if (CCMask
== (CCValid
& (SystemZ::CCMASK_0
1989 | SystemZ::CCMASK_3
)))
1990 return IPMConversion(1 << SystemZ::IPM_CC
, -(3 << SystemZ::IPM_CC
), 31);
1991 if (CCMask
== (CCValid
& (SystemZ::CCMASK_0
1993 | SystemZ::CCMASK_3
)))
1994 return IPMConversion(1 << SystemZ::IPM_CC
,
1995 TopBit
- (1 << SystemZ::IPM_CC
), 31);
1997 llvm_unreachable("Unexpected CC combination");
2000 SDValue
SystemZDAGToDAGISel::expandSelectBoolean(SDNode
*Node
) {
2001 auto *TrueOp
= dyn_cast
<ConstantSDNode
>(Node
->getOperand(0));
2002 auto *FalseOp
= dyn_cast
<ConstantSDNode
>(Node
->getOperand(1));
2003 if (!TrueOp
|| !FalseOp
)
2005 if (FalseOp
->getZExtValue() != 0)
2007 if (TrueOp
->getSExtValue() != 1 && TrueOp
->getSExtValue() != -1)
2010 auto *CCValidOp
= dyn_cast
<ConstantSDNode
>(Node
->getOperand(2));
2011 auto *CCMaskOp
= dyn_cast
<ConstantSDNode
>(Node
->getOperand(3));
2012 if (!CCValidOp
|| !CCMaskOp
)
2014 int CCValid
= CCValidOp
->getZExtValue();
2015 int CCMask
= CCMaskOp
->getZExtValue();
2018 SDValue CCReg
= Node
->getOperand(4);
2019 IPMConversion IPM
= getIPMConversion(CCValid
, CCMask
);
2020 SDValue Result
= CurDAG
->getNode(SystemZISD::IPM
, DL
, MVT::i32
, CCReg
);
2023 Result
= CurDAG
->getNode(ISD::XOR
, DL
, MVT::i32
, Result
,
2024 CurDAG
->getConstant(IPM
.XORValue
, DL
, MVT::i32
));
2028 CurDAG
->getNode(ISD::ADD
, DL
, MVT::i32
, Result
,
2029 CurDAG
->getSignedConstant(IPM
.AddValue
, DL
, MVT::i32
));
2031 EVT VT
= Node
->getValueType(0);
2032 if (VT
== MVT::i32
&& IPM
.Bit
== 31) {
2033 unsigned ShiftOp
= TrueOp
->getSExtValue() == 1 ? ISD::SRL
: ISD::SRA
;
2034 Result
= CurDAG
->getNode(ShiftOp
, DL
, MVT::i32
, Result
,
2035 CurDAG
->getConstant(IPM
.Bit
, DL
, MVT::i32
));
2038 Result
= CurDAG
->getNode(ISD::ANY_EXTEND
, DL
, VT
, Result
);
2040 if (TrueOp
->getSExtValue() == 1) {
2041 // The SHR/AND sequence should get optimized to an RISBG.
2042 Result
= CurDAG
->getNode(ISD::SRL
, DL
, VT
, Result
,
2043 CurDAG
->getConstant(IPM
.Bit
, DL
, MVT::i32
));
2044 Result
= CurDAG
->getNode(ISD::AND
, DL
, VT
, Result
,
2045 CurDAG
->getConstant(1, DL
, VT
));
2047 // Sign-extend from IPM.Bit using a pair of shifts.
2048 int ShlAmt
= VT
.getSizeInBits() - 1 - IPM
.Bit
;
2049 int SraAmt
= VT
.getSizeInBits() - 1;
2050 Result
= CurDAG
->getNode(ISD::SHL
, DL
, VT
, Result
,
2051 CurDAG
->getConstant(ShlAmt
, DL
, MVT::i32
));
2052 Result
= CurDAG
->getNode(ISD::SRA
, DL
, VT
, Result
,
2053 CurDAG
->getConstant(SraAmt
, DL
, MVT::i32
));
2060 bool SystemZDAGToDAGISel::shouldSelectForReassoc(SDNode
*N
) const {
2061 EVT VT
= N
->getValueType(0);
2062 assert(VT
.isFloatingPoint() && "Expected FP SDNode");
2063 return N
->getFlags().hasAllowReassociation() &&
2064 N
->getFlags().hasNoSignedZeros() && Subtarget
->hasVector() &&
2065 (VT
!= MVT::f32
|| Subtarget
->hasVectorEnhancements1()) &&
2066 !N
->isStrictFPOpcode();
2069 void SystemZDAGToDAGISel::PreprocessISelDAG() {
2070 // If we have conditional immediate loads, we always prefer
2071 // using those over an IPM sequence.
2072 if (Subtarget
->hasLoadStoreOnCond2())
2075 bool MadeChange
= false;
2077 for (SelectionDAG::allnodes_iterator I
= CurDAG
->allnodes_begin(),
2078 E
= CurDAG
->allnodes_end();
2085 switch (N
->getOpcode()) {
2087 case SystemZISD::SELECT_CCMASK
:
2088 Res
= expandSelectBoolean(N
);
2093 LLVM_DEBUG(dbgs() << "SystemZ DAG preprocessing replacing:\nOld: ");
2094 LLVM_DEBUG(N
->dump(CurDAG
));
2095 LLVM_DEBUG(dbgs() << "\nNew: ");
2096 LLVM_DEBUG(Res
.getNode()->dump(CurDAG
));
2097 LLVM_DEBUG(dbgs() << "\n");
2099 CurDAG
->ReplaceAllUsesOfValueWith(SDValue(N
, 0), Res
);
2105 CurDAG
->RemoveDeadNodes();