1 //===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass looks for instructions that can be replaced by a Test Data Class
10 // instruction, and replaces them when profitable.
12 // Roughly, the following rules are recognized:
14 // 1: fcmp pred X, 0 -> tdc X, mask
15 // 2: fcmp pred X, +-inf -> tdc X, mask
16 // 3: fcmp pred X, +-minnorm -> tdc X, mask
17 // 4: tdc (fabs X), mask -> tdc X, newmask
18 // 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit]
19 // 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask
20 // 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask
21 // 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2)
22 // 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2)
23 // 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2)
25 // The pass works in 4 steps:
27 // 1. All fcmp and icmp instructions in a function are checked for a match
28 // with rules 1-3 and 5-7. Their TDC equivalents are stored in
29 // the ConvertedInsts mapping. If the operand of a fcmp instruction is
30 // a fabs, it's also folded according to rule 4.
31 // 2. All and/or/xor i1 instructions whose both operands have been already
32 // mapped are mapped according to rules 8-10. LogicOpsWorklist is used
33 // as a queue of instructions to check.
34 // 3. All mapped instructions that are considered worthy of conversion (ie.
35 // replacing them will actually simplify the final code) are replaced
36 // with a call to the s390.tdc intrinsic.
37 // 4. All intermediate results of replaced instructions are removed if unused.
39 // Instructions that match rules 1-3 are considered unworthy of conversion
40 // on their own (since a comparison instruction is superior), but are mapped
41 // in the hopes of folding the result using rules 4 and 8-10 (likely removing
42 // the original comparison in the process).
44 //===----------------------------------------------------------------------===//
47 #include "SystemZSubtarget.h"
48 #include "llvm/ADT/MapVector.h"
49 #include "llvm/CodeGen/TargetPassConfig.h"
50 #include "llvm/IR/Constants.h"
51 #include "llvm/IR/IRBuilder.h"
52 #include "llvm/IR/InstIterator.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicsS390.h"
55 #include "llvm/IR/LegacyPassManager.h"
56 #include "llvm/IR/Module.h"
57 #include "llvm/Target/TargetMachine.h"
64 class SystemZTDCPass
: public FunctionPass
{
67 SystemZTDCPass() : FunctionPass(ID
) {
68 initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry());
71 bool runOnFunction(Function
&F
) override
;
73 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
74 AU
.addRequired
<TargetPassConfig
>();
78 // Maps seen instructions that can be mapped to a TDC, values are
79 // (TDC operand, TDC mask, worthy flag) triples.
80 MapVector
<Instruction
*, std::tuple
<Value
*, int, bool>> ConvertedInsts
;
81 // The queue of and/or/xor i1 instructions to be potentially folded.
82 std::vector
<BinaryOperator
*> LogicOpsWorklist
;
83 // Instructions matched while folding, to be removed at the end if unused.
84 std::set
<Instruction
*> PossibleJunk
;
86 // Tries to convert a fcmp instruction.
87 void convertFCmp(CmpInst
&I
);
89 // Tries to convert an icmp instruction.
90 void convertICmp(CmpInst
&I
);
92 // Tries to convert an i1 and/or/xor instruction, whose both operands
93 // have been already converted.
94 void convertLogicOp(BinaryOperator
&I
);
96 // Marks an instruction as converted - adds it to ConvertedInsts and adds
97 // any and/or/xor i1 users to the queue.
98 void converted(Instruction
*I
, Value
*V
, int Mask
, bool Worthy
) {
99 ConvertedInsts
[I
] = std::make_tuple(V
, Mask
, Worthy
);
100 auto &M
= *I
->getFunction()->getParent();
101 auto &Ctx
= M
.getContext();
102 for (auto *U
: I
->users()) {
103 auto *LI
= dyn_cast
<BinaryOperator
>(U
);
104 if (LI
&& LI
->getType() == Type::getInt1Ty(Ctx
) &&
105 (LI
->getOpcode() == Instruction::And
||
106 LI
->getOpcode() == Instruction::Or
||
107 LI
->getOpcode() == Instruction::Xor
)) {
108 LogicOpsWorklist
.push_back(LI
);
114 } // end anonymous namespace
116 char SystemZTDCPass::ID
= 0;
117 INITIALIZE_PASS(SystemZTDCPass
, "systemz-tdc",
118 "SystemZ Test Data Class optimization", false, false)
120 FunctionPass
*llvm::createSystemZTDCPass() {
121 return new SystemZTDCPass();
124 void SystemZTDCPass::convertFCmp(CmpInst
&I
) {
125 Value
*Op0
= I
.getOperand(0);
126 auto *Const
= dyn_cast
<ConstantFP
>(I
.getOperand(1));
127 auto Pred
= I
.getPredicate();
128 // Only comparisons with consts are interesting.
131 // Compute the smallest normal number (and its negation).
132 auto &Sem
= Op0
->getType()->getFltSemantics();
133 APFloat Smallest
= APFloat::getSmallestNormalized(Sem
);
134 APFloat NegSmallest
= Smallest
;
135 NegSmallest
.changeSign();
136 // Check if Const is one of our recognized consts.
138 if (Const
->isZero()) {
139 // All comparisons with 0 can be converted.
141 } else if (Const
->isInfinity()) {
142 // Likewise for infinities.
143 WhichConst
= Const
->isNegative() ? 2 : 1;
144 } else if (Const
->isExactlyValue(Smallest
)) {
145 // For Smallest, we cannot do EQ separately from GT.
146 if ((Pred
& CmpInst::FCMP_OGE
) != CmpInst::FCMP_OGE
&&
147 (Pred
& CmpInst::FCMP_OGE
) != 0)
150 } else if (Const
->isExactlyValue(NegSmallest
)) {
151 // Likewise for NegSmallest, we cannot do EQ separately from LT.
152 if ((Pred
& CmpInst::FCMP_OLE
) != CmpInst::FCMP_OLE
&&
153 (Pred
& CmpInst::FCMP_OLE
) != 0)
157 // Not one of our special constants.
160 // Partial masks to use for EQ, GT, LT, UN comparisons, respectively.
161 static const int Masks
[][4] = {
163 SystemZ::TDCMASK_ZERO
, // eq
164 SystemZ::TDCMASK_POSITIVE
, // gt
165 SystemZ::TDCMASK_NEGATIVE
, // lt
166 SystemZ::TDCMASK_NAN
, // un
169 SystemZ::TDCMASK_INFINITY_PLUS
, // eq
171 (SystemZ::TDCMASK_ZERO
|
172 SystemZ::TDCMASK_NEGATIVE
|
173 SystemZ::TDCMASK_NORMAL_PLUS
|
174 SystemZ::TDCMASK_SUBNORMAL_PLUS
), // lt
175 SystemZ::TDCMASK_NAN
, // un
178 SystemZ::TDCMASK_INFINITY_MINUS
, // eq
179 (SystemZ::TDCMASK_ZERO
|
180 SystemZ::TDCMASK_POSITIVE
|
181 SystemZ::TDCMASK_NORMAL_MINUS
|
182 SystemZ::TDCMASK_SUBNORMAL_MINUS
), // gt
184 SystemZ::TDCMASK_NAN
, // un
187 0, // eq (unsupported)
188 (SystemZ::TDCMASK_NORMAL_PLUS
|
189 SystemZ::TDCMASK_INFINITY_PLUS
), // gt (actually ge)
190 (SystemZ::TDCMASK_ZERO
|
191 SystemZ::TDCMASK_NEGATIVE
|
192 SystemZ::TDCMASK_SUBNORMAL_PLUS
), // lt
193 SystemZ::TDCMASK_NAN
, // un
196 0, // eq (unsupported)
197 (SystemZ::TDCMASK_ZERO
|
198 SystemZ::TDCMASK_POSITIVE
|
199 SystemZ::TDCMASK_SUBNORMAL_MINUS
), // gt
200 (SystemZ::TDCMASK_NORMAL_MINUS
|
201 SystemZ::TDCMASK_INFINITY_MINUS
), // lt (actually le)
202 SystemZ::TDCMASK_NAN
, // un
205 // Construct the mask as a combination of the partial masks.
207 if (Pred
& CmpInst::FCMP_OEQ
)
208 Mask
|= Masks
[WhichConst
][0];
209 if (Pred
& CmpInst::FCMP_OGT
)
210 Mask
|= Masks
[WhichConst
][1];
211 if (Pred
& CmpInst::FCMP_OLT
)
212 Mask
|= Masks
[WhichConst
][2];
213 if (Pred
& CmpInst::FCMP_UNO
)
214 Mask
|= Masks
[WhichConst
][3];
215 // A lone fcmp is unworthy of tdc conversion on its own, but may become
216 // worthy if combined with fabs.
218 if (CallInst
*CI
= dyn_cast
<CallInst
>(Op0
)) {
219 Function
*F
= CI
->getCalledFunction();
220 if (F
&& F
->getIntrinsicID() == Intrinsic::fabs
) {
221 // Fold with fabs - adjust the mask appropriately.
222 Mask
&= SystemZ::TDCMASK_PLUS
;
224 Op0
= CI
->getArgOperand(0);
225 // A combination of fcmp with fabs is a win, unless the constant
226 // involved is 0 (which is handled by later passes).
227 Worthy
= WhichConst
!= 0;
228 PossibleJunk
.insert(CI
);
231 converted(&I
, Op0
, Mask
, Worthy
);
234 void SystemZTDCPass::convertICmp(CmpInst
&I
) {
235 Value
*Op0
= I
.getOperand(0);
236 auto *Const
= dyn_cast
<ConstantInt
>(I
.getOperand(1));
237 auto Pred
= I
.getPredicate();
238 // All our icmp rules involve comparisons with consts.
241 if (auto *Cast
= dyn_cast
<BitCastInst
>(Op0
)) {
242 // Check for icmp+bitcast used for signbit.
243 if (!Cast
->getSrcTy()->isFloatTy() &&
244 !Cast
->getSrcTy()->isDoubleTy() &&
245 !Cast
->getSrcTy()->isFP128Ty())
247 Value
*V
= Cast
->getOperand(0);
249 if (Pred
== CmpInst::ICMP_SLT
&& Const
->isZero()) {
250 // icmp slt (bitcast X), 0 - set if sign bit true
251 Mask
= SystemZ::TDCMASK_MINUS
;
252 } else if (Pred
== CmpInst::ICMP_SGT
&& Const
->isMinusOne()) {
253 // icmp sgt (bitcast X), -1 - set if sign bit false
254 Mask
= SystemZ::TDCMASK_PLUS
;
256 // Not a sign bit check.
259 PossibleJunk
.insert(Cast
);
260 converted(&I
, V
, Mask
, true);
261 } else if (auto *CI
= dyn_cast
<CallInst
>(Op0
)) {
262 // Check if this is a pre-existing call of our tdc intrinsic.
263 Function
*F
= CI
->getCalledFunction();
264 if (!F
|| F
->getIntrinsicID() != Intrinsic::s390_tdc
)
266 if (!Const
->isZero())
268 Value
*V
= CI
->getArgOperand(0);
269 auto *MaskC
= dyn_cast
<ConstantInt
>(CI
->getArgOperand(1));
270 // Bail if the mask is not a constant.
273 int Mask
= MaskC
->getZExtValue();
274 Mask
&= SystemZ::TDCMASK_ALL
;
275 if (Pred
== CmpInst::ICMP_NE
) {
276 // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC
277 } else if (Pred
== CmpInst::ICMP_EQ
) {
278 // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask
279 Mask
^= SystemZ::TDCMASK_ALL
;
281 // An unknown comparison - ignore.
284 PossibleJunk
.insert(CI
);
285 converted(&I
, V
, Mask
, false);
289 void SystemZTDCPass::convertLogicOp(BinaryOperator
&I
) {
292 bool Worthy0
, Worthy1
;
293 std::tie(Op0
, Mask0
, Worthy0
) = ConvertedInsts
[cast
<Instruction
>(I
.getOperand(0))];
294 std::tie(Op1
, Mask1
, Worthy1
) = ConvertedInsts
[cast
<Instruction
>(I
.getOperand(1))];
298 switch (I
.getOpcode()) {
299 case Instruction::And
:
300 Mask
= Mask0
& Mask1
;
302 case Instruction::Or
:
303 Mask
= Mask0
| Mask1
;
305 case Instruction::Xor
:
306 Mask
= Mask0
^ Mask1
;
309 llvm_unreachable("Unknown op in convertLogicOp");
311 converted(&I
, Op0
, Mask
, true);
314 bool SystemZTDCPass::runOnFunction(Function
&F
) {
315 auto &TPC
= getAnalysis
<TargetPassConfig
>();
316 if (TPC
.getTM
<TargetMachine
>()
317 .getSubtarget
<SystemZSubtarget
>(F
)
321 ConvertedInsts
.clear();
322 LogicOpsWorklist
.clear();
323 PossibleJunk
.clear();
325 // Look for icmp+fcmp instructions.
326 for (auto &I
: instructions(F
)) {
327 if (I
.getOpcode() == Instruction::FCmp
)
328 convertFCmp(cast
<CmpInst
>(I
));
329 else if (I
.getOpcode() == Instruction::ICmp
)
330 convertICmp(cast
<CmpInst
>(I
));
333 // If none found, bail already.
334 if (ConvertedInsts
.empty())
337 // Process the queue of logic instructions.
338 while (!LogicOpsWorklist
.empty()) {
339 BinaryOperator
*Op
= LogicOpsWorklist
.back();
340 LogicOpsWorklist
.pop_back();
341 // If both operands mapped, and the instruction itself not yet mapped,
343 if (ConvertedInsts
.count(dyn_cast
<Instruction
>(Op
->getOperand(0))) &&
344 ConvertedInsts
.count(dyn_cast
<Instruction
>(Op
->getOperand(1))) &&
345 !ConvertedInsts
.count(Op
))
349 // Time to actually replace the instructions. Do it in the reverse order
350 // of finding them, since there's a good chance the earlier ones will be
351 // unused (due to being folded into later ones).
352 Module
&M
= *F
.getParent();
353 auto &Ctx
= M
.getContext();
354 Value
*Zero32
= ConstantInt::get(Type::getInt32Ty(Ctx
), 0);
355 bool MadeChange
= false;
356 for (auto &It
: reverse(ConvertedInsts
)) {
357 Instruction
*I
= It
.first
;
361 std::tie(V
, Mask
, Worthy
) = It
.second
;
362 if (!I
->user_empty()) {
363 // If used and unworthy of conversion, skip it.
366 // Call the intrinsic, compare result with 0.
368 Value
*MaskVal
= ConstantInt::get(Type::getInt64Ty(Ctx
), Mask
);
370 IRB
.CreateIntrinsic(Intrinsic::s390_tdc
, V
->getType(), {V
, MaskVal
});
371 Value
*ICmp
= IRB
.CreateICmp(CmpInst::ICMP_NE
, TDC
, Zero32
);
372 I
->replaceAllUsesWith(ICmp
);
374 // If unused, or used and converted, remove it.
375 I
->eraseFromParent();
382 // We've actually done something - now clear misc accumulated junk (fabs,
384 for (auto *I
: PossibleJunk
)
386 I
->eraseFromParent();