1 //===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass looks for instructions that can be replaced by a Test Data Class
10 // instruction, and replaces them when profitable.
12 // Roughly, the following rules are recognized:
14 // 1: fcmp pred X, 0 -> tdc X, mask
15 // 2: fcmp pred X, +-inf -> tdc X, mask
16 // 3: fcmp pred X, +-minnorm -> tdc X, mask
17 // 4: tdc (fabs X), mask -> tdc X, newmask
18 // 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit]
19 // 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask
20 // 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask
21 // 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2)
22 // 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2)
23 // 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2)
25 // The pass works in 4 steps:
27 // 1. All fcmp and icmp instructions in a function are checked for a match
28 // with rules 1-3 and 5-7. Their TDC equivalents are stored in
29 // the ConvertedInsts mapping. If the operand of a fcmp instruction is
30 // a fabs, it's also folded according to rule 4.
31 // 2. All and/or/xor i1 instructions whose both operands have been already
32 // mapped are mapped according to rules 8-10. LogicOpsWorklist is used
33 // as a queue of instructions to check.
34 // 3. All mapped instructions that are considered worthy of conversion (ie.
35 // replacing them will actually simplify the final code) are replaced
36 // with a call to the s390.tdc intrinsic.
37 // 4. All intermediate results of replaced instructions are removed if unused.
39 // Instructions that match rules 1-3 are considered unworthy of conversion
40 // on their own (since a comparison instruction is superior), but are mapped
41 // in the hopes of folding the result using rules 4 and 8-10 (likely removing
42 // the original comparison in the process).
44 //===----------------------------------------------------------------------===//
47 #include "llvm/ADT/MapVector.h"
48 #include "llvm/IR/Constants.h"
49 #include "llvm/IR/IRBuilder.h"
50 #include "llvm/IR/InstIterator.h"
51 #include "llvm/IR/Instructions.h"
52 #include "llvm/IR/IntrinsicInst.h"
53 #include "llvm/IR/LegacyPassManager.h"
54 #include "llvm/IR/Module.h"
61 void initializeSystemZTDCPassPass(PassRegistry
&);
66 class SystemZTDCPass
: public FunctionPass
{
69 SystemZTDCPass() : FunctionPass(ID
) {
70 initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry());
73 bool runOnFunction(Function
&F
) override
;
75 // Maps seen instructions that can be mapped to a TDC, values are
76 // (TDC operand, TDC mask, worthy flag) triples.
77 MapVector
<Instruction
*, std::tuple
<Value
*, int, bool>> ConvertedInsts
;
78 // The queue of and/or/xor i1 instructions to be potentially folded.
79 std::vector
<BinaryOperator
*> LogicOpsWorklist
;
80 // Instructions matched while folding, to be removed at the end if unused.
81 std::set
<Instruction
*> PossibleJunk
;
83 // Tries to convert a fcmp instruction.
84 void convertFCmp(CmpInst
&I
);
86 // Tries to convert an icmp instruction.
87 void convertICmp(CmpInst
&I
);
89 // Tries to convert an i1 and/or/xor instruction, whose both operands
90 // have been already converted.
91 void convertLogicOp(BinaryOperator
&I
);
93 // Marks an instruction as converted - adds it to ConvertedInsts and adds
94 // any and/or/xor i1 users to the queue.
95 void converted(Instruction
*I
, Value
*V
, int Mask
, bool Worthy
) {
96 ConvertedInsts
[I
] = std::make_tuple(V
, Mask
, Worthy
);
97 auto &M
= *I
->getFunction()->getParent();
98 auto &Ctx
= M
.getContext();
99 for (auto *U
: I
->users()) {
100 auto *LI
= dyn_cast
<BinaryOperator
>(U
);
101 if (LI
&& LI
->getType() == Type::getInt1Ty(Ctx
) &&
102 (LI
->getOpcode() == Instruction::And
||
103 LI
->getOpcode() == Instruction::Or
||
104 LI
->getOpcode() == Instruction::Xor
)) {
105 LogicOpsWorklist
.push_back(LI
);
111 } // end anonymous namespace
113 char SystemZTDCPass::ID
= 0;
114 INITIALIZE_PASS(SystemZTDCPass
, "systemz-tdc",
115 "SystemZ Test Data Class optimization", false, false)
117 FunctionPass
*llvm::createSystemZTDCPass() {
118 return new SystemZTDCPass();
121 void SystemZTDCPass::convertFCmp(CmpInst
&I
) {
122 Value
*Op0
= I
.getOperand(0);
123 auto *Const
= dyn_cast
<ConstantFP
>(I
.getOperand(1));
124 auto Pred
= I
.getPredicate();
125 // Only comparisons with consts are interesting.
128 // Compute the smallest normal number (and its negation).
129 auto &Sem
= Op0
->getType()->getFltSemantics();
130 APFloat Smallest
= APFloat::getSmallestNormalized(Sem
);
131 APFloat NegSmallest
= Smallest
;
132 NegSmallest
.changeSign();
133 // Check if Const is one of our recognized consts.
135 if (Const
->isZero()) {
136 // All comparisons with 0 can be converted.
138 } else if (Const
->isInfinity()) {
139 // Likewise for infinities.
140 WhichConst
= Const
->isNegative() ? 2 : 1;
141 } else if (Const
->isExactlyValue(Smallest
)) {
142 // For Smallest, we cannot do EQ separately from GT.
143 if ((Pred
& CmpInst::FCMP_OGE
) != CmpInst::FCMP_OGE
&&
144 (Pred
& CmpInst::FCMP_OGE
) != 0)
147 } else if (Const
->isExactlyValue(NegSmallest
)) {
148 // Likewise for NegSmallest, we cannot do EQ separately from LT.
149 if ((Pred
& CmpInst::FCMP_OLE
) != CmpInst::FCMP_OLE
&&
150 (Pred
& CmpInst::FCMP_OLE
) != 0)
154 // Not one of our special constants.
157 // Partial masks to use for EQ, GT, LT, UN comparisons, respectively.
158 static const int Masks
[][4] = {
160 SystemZ::TDCMASK_ZERO
, // eq
161 SystemZ::TDCMASK_POSITIVE
, // gt
162 SystemZ::TDCMASK_NEGATIVE
, // lt
163 SystemZ::TDCMASK_NAN
, // un
166 SystemZ::TDCMASK_INFINITY_PLUS
, // eq
168 (SystemZ::TDCMASK_ZERO
|
169 SystemZ::TDCMASK_NEGATIVE
|
170 SystemZ::TDCMASK_NORMAL_PLUS
|
171 SystemZ::TDCMASK_SUBNORMAL_PLUS
), // lt
172 SystemZ::TDCMASK_NAN
, // un
175 SystemZ::TDCMASK_INFINITY_MINUS
, // eq
176 (SystemZ::TDCMASK_ZERO
|
177 SystemZ::TDCMASK_POSITIVE
|
178 SystemZ::TDCMASK_NORMAL_MINUS
|
179 SystemZ::TDCMASK_SUBNORMAL_MINUS
), // gt
181 SystemZ::TDCMASK_NAN
, // un
184 0, // eq (unsupported)
185 (SystemZ::TDCMASK_NORMAL_PLUS
|
186 SystemZ::TDCMASK_INFINITY_PLUS
), // gt (actually ge)
187 (SystemZ::TDCMASK_ZERO
|
188 SystemZ::TDCMASK_NEGATIVE
|
189 SystemZ::TDCMASK_SUBNORMAL_PLUS
), // lt
190 SystemZ::TDCMASK_NAN
, // un
193 0, // eq (unsupported)
194 (SystemZ::TDCMASK_ZERO
|
195 SystemZ::TDCMASK_POSITIVE
|
196 SystemZ::TDCMASK_SUBNORMAL_MINUS
), // gt
197 (SystemZ::TDCMASK_NORMAL_MINUS
|
198 SystemZ::TDCMASK_INFINITY_MINUS
), // lt (actually le)
199 SystemZ::TDCMASK_NAN
, // un
202 // Construct the mask as a combination of the partial masks.
204 if (Pred
& CmpInst::FCMP_OEQ
)
205 Mask
|= Masks
[WhichConst
][0];
206 if (Pred
& CmpInst::FCMP_OGT
)
207 Mask
|= Masks
[WhichConst
][1];
208 if (Pred
& CmpInst::FCMP_OLT
)
209 Mask
|= Masks
[WhichConst
][2];
210 if (Pred
& CmpInst::FCMP_UNO
)
211 Mask
|= Masks
[WhichConst
][3];
212 // A lone fcmp is unworthy of tdc conversion on its own, but may become
213 // worthy if combined with fabs.
215 if (CallInst
*CI
= dyn_cast
<CallInst
>(Op0
)) {
216 Function
*F
= CI
->getCalledFunction();
217 if (F
&& F
->getIntrinsicID() == Intrinsic::fabs
) {
218 // Fold with fabs - adjust the mask appropriately.
219 Mask
&= SystemZ::TDCMASK_PLUS
;
221 Op0
= CI
->getArgOperand(0);
222 // A combination of fcmp with fabs is a win, unless the constant
223 // involved is 0 (which is handled by later passes).
224 Worthy
= WhichConst
!= 0;
225 PossibleJunk
.insert(CI
);
228 converted(&I
, Op0
, Mask
, Worthy
);
231 void SystemZTDCPass::convertICmp(CmpInst
&I
) {
232 Value
*Op0
= I
.getOperand(0);
233 auto *Const
= dyn_cast
<ConstantInt
>(I
.getOperand(1));
234 auto Pred
= I
.getPredicate();
235 // All our icmp rules involve comparisons with consts.
238 if (auto *Cast
= dyn_cast
<BitCastInst
>(Op0
)) {
239 // Check for icmp+bitcast used for signbit.
240 if (!Cast
->getSrcTy()->isFloatTy() &&
241 !Cast
->getSrcTy()->isDoubleTy() &&
242 !Cast
->getSrcTy()->isFP128Ty())
244 Value
*V
= Cast
->getOperand(0);
246 if (Pred
== CmpInst::ICMP_SLT
&& Const
->isZero()) {
247 // icmp slt (bitcast X), 0 - set if sign bit true
248 Mask
= SystemZ::TDCMASK_MINUS
;
249 } else if (Pred
== CmpInst::ICMP_SGT
&& Const
->isMinusOne()) {
250 // icmp sgt (bitcast X), -1 - set if sign bit false
251 Mask
= SystemZ::TDCMASK_PLUS
;
253 // Not a sign bit check.
256 PossibleJunk
.insert(Cast
);
257 converted(&I
, V
, Mask
, true);
258 } else if (auto *CI
= dyn_cast
<CallInst
>(Op0
)) {
259 // Check if this is a pre-existing call of our tdc intrinsic.
260 Function
*F
= CI
->getCalledFunction();
261 if (!F
|| F
->getIntrinsicID() != Intrinsic::s390_tdc
)
263 if (!Const
->isZero())
265 Value
*V
= CI
->getArgOperand(0);
266 auto *MaskC
= dyn_cast
<ConstantInt
>(CI
->getArgOperand(1));
267 // Bail if the mask is not a constant.
270 int Mask
= MaskC
->getZExtValue();
271 Mask
&= SystemZ::TDCMASK_ALL
;
272 if (Pred
== CmpInst::ICMP_NE
) {
273 // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC
274 } else if (Pred
== CmpInst::ICMP_EQ
) {
275 // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask
276 Mask
^= SystemZ::TDCMASK_ALL
;
278 // An unknown comparison - ignore.
281 PossibleJunk
.insert(CI
);
282 converted(&I
, V
, Mask
, false);
286 void SystemZTDCPass::convertLogicOp(BinaryOperator
&I
) {
289 bool Worthy0
, Worthy1
;
290 std::tie(Op0
, Mask0
, Worthy0
) = ConvertedInsts
[cast
<Instruction
>(I
.getOperand(0))];
291 std::tie(Op1
, Mask1
, Worthy1
) = ConvertedInsts
[cast
<Instruction
>(I
.getOperand(1))];
295 switch (I
.getOpcode()) {
296 case Instruction::And
:
297 Mask
= Mask0
& Mask1
;
299 case Instruction::Or
:
300 Mask
= Mask0
| Mask1
;
302 case Instruction::Xor
:
303 Mask
= Mask0
^ Mask1
;
306 llvm_unreachable("Unknown op in convertLogicOp");
308 converted(&I
, Op0
, Mask
, true);
311 bool SystemZTDCPass::runOnFunction(Function
&F
) {
312 ConvertedInsts
.clear();
313 LogicOpsWorklist
.clear();
314 PossibleJunk
.clear();
316 // Look for icmp+fcmp instructions.
317 for (auto &I
: instructions(F
)) {
318 if (I
.getOpcode() == Instruction::FCmp
)
319 convertFCmp(cast
<CmpInst
>(I
));
320 else if (I
.getOpcode() == Instruction::ICmp
)
321 convertICmp(cast
<CmpInst
>(I
));
324 // If none found, bail already.
325 if (ConvertedInsts
.empty())
328 // Process the queue of logic instructions.
329 while (!LogicOpsWorklist
.empty()) {
330 BinaryOperator
*Op
= LogicOpsWorklist
.back();
331 LogicOpsWorklist
.pop_back();
332 // If both operands mapped, and the instruction itself not yet mapped,
334 if (ConvertedInsts
.count(dyn_cast
<Instruction
>(Op
->getOperand(0))) &&
335 ConvertedInsts
.count(dyn_cast
<Instruction
>(Op
->getOperand(1))) &&
336 !ConvertedInsts
.count(Op
))
340 // Time to actually replace the instructions. Do it in the reverse order
341 // of finding them, since there's a good chance the earlier ones will be
342 // unused (due to being folded into later ones).
343 Module
&M
= *F
.getParent();
344 auto &Ctx
= M
.getContext();
345 Value
*Zero32
= ConstantInt::get(Type::getInt32Ty(Ctx
), 0);
346 bool MadeChange
= false;
347 for (auto &It
: reverse(ConvertedInsts
)) {
348 Instruction
*I
= It
.first
;
352 std::tie(V
, Mask
, Worthy
) = It
.second
;
353 if (!I
->user_empty()) {
354 // If used and unworthy of conversion, skip it.
357 // Call the intrinsic, compare result with 0.
359 Intrinsic::getDeclaration(&M
, Intrinsic::s390_tdc
, V
->getType());
361 Value
*MaskVal
= ConstantInt::get(Type::getInt64Ty(Ctx
), Mask
);
362 Instruction
*TDC
= IRB
.CreateCall(TDCFunc
, {V
, MaskVal
});
363 Value
*ICmp
= IRB
.CreateICmp(CmpInst::ICMP_NE
, TDC
, Zero32
);
364 I
->replaceAllUsesWith(ICmp
);
366 // If unused, or used and converted, remove it.
367 I
->eraseFromParent();
374 // We've actually done something - now clear misc accumulated junk (fabs,
376 for (auto *I
: PossibleJunk
)
378 I
->eraseFromParent();