1 //===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass looks for instructions that can be replaced by a Test Data Class
10 // instruction, and replaces them when profitable.
12 // Roughly, the following rules are recognized:
14 // 1: fcmp pred X, 0 -> tdc X, mask
15 // 2: fcmp pred X, +-inf -> tdc X, mask
16 // 3: fcmp pred X, +-minnorm -> tdc X, mask
17 // 4: tdc (fabs X), mask -> tdc X, newmask
18 // 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit]
19 // 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask
20 // 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask
21 // 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2)
22 // 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2)
23 // 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2)
25 // The pass works in 4 steps:
27 // 1. All fcmp and icmp instructions in a function are checked for a match
28 // with rules 1-3 and 5-7. Their TDC equivalents are stored in
29 // the ConvertedInsts mapping. If the operand of a fcmp instruction is
30 // a fabs, it's also folded according to rule 4.
31 // 2. All and/or/xor i1 instructions whose both operands have been already
32 // mapped are mapped according to rules 8-10. LogicOpsWorklist is used
33 // as a queue of instructions to check.
34 // 3. All mapped instructions that are considered worthy of conversion (ie.
35 // replacing them will actually simplify the final code) are replaced
36 // with a call to the s390.tdc intrinsic.
37 // 4. All intermediate results of replaced instructions are removed if unused.
39 // Instructions that match rules 1-3 are considered unworthy of conversion
40 // on their own (since a comparison instruction is superior), but are mapped
41 // in the hopes of folding the result using rules 4 and 8-10 (likely removing
42 // the original comparison in the process).
44 //===----------------------------------------------------------------------===//
47 #include "llvm/ADT/MapVector.h"
48 #include "llvm/IR/Constants.h"
49 #include "llvm/IR/IRBuilder.h"
50 #include "llvm/IR/InstIterator.h"
51 #include "llvm/IR/Instructions.h"
52 #include "llvm/IR/IntrinsicInst.h"
53 #include "llvm/IR/IntrinsicsS390.h"
54 #include "llvm/IR/LegacyPassManager.h"
55 #include "llvm/IR/Module.h"
62 void initializeSystemZTDCPassPass(PassRegistry
&);
67 class SystemZTDCPass
: public FunctionPass
{
70 SystemZTDCPass() : FunctionPass(ID
) {
71 initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry());
74 bool runOnFunction(Function
&F
) override
;
76 // Maps seen instructions that can be mapped to a TDC, values are
77 // (TDC operand, TDC mask, worthy flag) triples.
78 MapVector
<Instruction
*, std::tuple
<Value
*, int, bool>> ConvertedInsts
;
79 // The queue of and/or/xor i1 instructions to be potentially folded.
80 std::vector
<BinaryOperator
*> LogicOpsWorklist
;
81 // Instructions matched while folding, to be removed at the end if unused.
82 std::set
<Instruction
*> PossibleJunk
;
84 // Tries to convert a fcmp instruction.
85 void convertFCmp(CmpInst
&I
);
87 // Tries to convert an icmp instruction.
88 void convertICmp(CmpInst
&I
);
90 // Tries to convert an i1 and/or/xor instruction, whose both operands
91 // have been already converted.
92 void convertLogicOp(BinaryOperator
&I
);
94 // Marks an instruction as converted - adds it to ConvertedInsts and adds
95 // any and/or/xor i1 users to the queue.
96 void converted(Instruction
*I
, Value
*V
, int Mask
, bool Worthy
) {
97 ConvertedInsts
[I
] = std::make_tuple(V
, Mask
, Worthy
);
98 auto &M
= *I
->getFunction()->getParent();
99 auto &Ctx
= M
.getContext();
100 for (auto *U
: I
->users()) {
101 auto *LI
= dyn_cast
<BinaryOperator
>(U
);
102 if (LI
&& LI
->getType() == Type::getInt1Ty(Ctx
) &&
103 (LI
->getOpcode() == Instruction::And
||
104 LI
->getOpcode() == Instruction::Or
||
105 LI
->getOpcode() == Instruction::Xor
)) {
106 LogicOpsWorklist
.push_back(LI
);
112 } // end anonymous namespace
114 char SystemZTDCPass::ID
= 0;
115 INITIALIZE_PASS(SystemZTDCPass
, "systemz-tdc",
116 "SystemZ Test Data Class optimization", false, false)
118 FunctionPass
*llvm::createSystemZTDCPass() {
119 return new SystemZTDCPass();
122 void SystemZTDCPass::convertFCmp(CmpInst
&I
) {
123 Value
*Op0
= I
.getOperand(0);
124 auto *Const
= dyn_cast
<ConstantFP
>(I
.getOperand(1));
125 auto Pred
= I
.getPredicate();
126 // Only comparisons with consts are interesting.
129 // Compute the smallest normal number (and its negation).
130 auto &Sem
= Op0
->getType()->getFltSemantics();
131 APFloat Smallest
= APFloat::getSmallestNormalized(Sem
);
132 APFloat NegSmallest
= Smallest
;
133 NegSmallest
.changeSign();
134 // Check if Const is one of our recognized consts.
136 if (Const
->isZero()) {
137 // All comparisons with 0 can be converted.
139 } else if (Const
->isInfinity()) {
140 // Likewise for infinities.
141 WhichConst
= Const
->isNegative() ? 2 : 1;
142 } else if (Const
->isExactlyValue(Smallest
)) {
143 // For Smallest, we cannot do EQ separately from GT.
144 if ((Pred
& CmpInst::FCMP_OGE
) != CmpInst::FCMP_OGE
&&
145 (Pred
& CmpInst::FCMP_OGE
) != 0)
148 } else if (Const
->isExactlyValue(NegSmallest
)) {
149 // Likewise for NegSmallest, we cannot do EQ separately from LT.
150 if ((Pred
& CmpInst::FCMP_OLE
) != CmpInst::FCMP_OLE
&&
151 (Pred
& CmpInst::FCMP_OLE
) != 0)
155 // Not one of our special constants.
158 // Partial masks to use for EQ, GT, LT, UN comparisons, respectively.
159 static const int Masks
[][4] = {
161 SystemZ::TDCMASK_ZERO
, // eq
162 SystemZ::TDCMASK_POSITIVE
, // gt
163 SystemZ::TDCMASK_NEGATIVE
, // lt
164 SystemZ::TDCMASK_NAN
, // un
167 SystemZ::TDCMASK_INFINITY_PLUS
, // eq
169 (SystemZ::TDCMASK_ZERO
|
170 SystemZ::TDCMASK_NEGATIVE
|
171 SystemZ::TDCMASK_NORMAL_PLUS
|
172 SystemZ::TDCMASK_SUBNORMAL_PLUS
), // lt
173 SystemZ::TDCMASK_NAN
, // un
176 SystemZ::TDCMASK_INFINITY_MINUS
, // eq
177 (SystemZ::TDCMASK_ZERO
|
178 SystemZ::TDCMASK_POSITIVE
|
179 SystemZ::TDCMASK_NORMAL_MINUS
|
180 SystemZ::TDCMASK_SUBNORMAL_MINUS
), // gt
182 SystemZ::TDCMASK_NAN
, // un
185 0, // eq (unsupported)
186 (SystemZ::TDCMASK_NORMAL_PLUS
|
187 SystemZ::TDCMASK_INFINITY_PLUS
), // gt (actually ge)
188 (SystemZ::TDCMASK_ZERO
|
189 SystemZ::TDCMASK_NEGATIVE
|
190 SystemZ::TDCMASK_SUBNORMAL_PLUS
), // lt
191 SystemZ::TDCMASK_NAN
, // un
194 0, // eq (unsupported)
195 (SystemZ::TDCMASK_ZERO
|
196 SystemZ::TDCMASK_POSITIVE
|
197 SystemZ::TDCMASK_SUBNORMAL_MINUS
), // gt
198 (SystemZ::TDCMASK_NORMAL_MINUS
|
199 SystemZ::TDCMASK_INFINITY_MINUS
), // lt (actually le)
200 SystemZ::TDCMASK_NAN
, // un
203 // Construct the mask as a combination of the partial masks.
205 if (Pred
& CmpInst::FCMP_OEQ
)
206 Mask
|= Masks
[WhichConst
][0];
207 if (Pred
& CmpInst::FCMP_OGT
)
208 Mask
|= Masks
[WhichConst
][1];
209 if (Pred
& CmpInst::FCMP_OLT
)
210 Mask
|= Masks
[WhichConst
][2];
211 if (Pred
& CmpInst::FCMP_UNO
)
212 Mask
|= Masks
[WhichConst
][3];
213 // A lone fcmp is unworthy of tdc conversion on its own, but may become
214 // worthy if combined with fabs.
216 if (CallInst
*CI
= dyn_cast
<CallInst
>(Op0
)) {
217 Function
*F
= CI
->getCalledFunction();
218 if (F
&& F
->getIntrinsicID() == Intrinsic::fabs
) {
219 // Fold with fabs - adjust the mask appropriately.
220 Mask
&= SystemZ::TDCMASK_PLUS
;
222 Op0
= CI
->getArgOperand(0);
223 // A combination of fcmp with fabs is a win, unless the constant
224 // involved is 0 (which is handled by later passes).
225 Worthy
= WhichConst
!= 0;
226 PossibleJunk
.insert(CI
);
229 converted(&I
, Op0
, Mask
, Worthy
);
232 void SystemZTDCPass::convertICmp(CmpInst
&I
) {
233 Value
*Op0
= I
.getOperand(0);
234 auto *Const
= dyn_cast
<ConstantInt
>(I
.getOperand(1));
235 auto Pred
= I
.getPredicate();
236 // All our icmp rules involve comparisons with consts.
239 if (auto *Cast
= dyn_cast
<BitCastInst
>(Op0
)) {
240 // Check for icmp+bitcast used for signbit.
241 if (!Cast
->getSrcTy()->isFloatTy() &&
242 !Cast
->getSrcTy()->isDoubleTy() &&
243 !Cast
->getSrcTy()->isFP128Ty())
245 Value
*V
= Cast
->getOperand(0);
247 if (Pred
== CmpInst::ICMP_SLT
&& Const
->isZero()) {
248 // icmp slt (bitcast X), 0 - set if sign bit true
249 Mask
= SystemZ::TDCMASK_MINUS
;
250 } else if (Pred
== CmpInst::ICMP_SGT
&& Const
->isMinusOne()) {
251 // icmp sgt (bitcast X), -1 - set if sign bit false
252 Mask
= SystemZ::TDCMASK_PLUS
;
254 // Not a sign bit check.
257 PossibleJunk
.insert(Cast
);
258 converted(&I
, V
, Mask
, true);
259 } else if (auto *CI
= dyn_cast
<CallInst
>(Op0
)) {
260 // Check if this is a pre-existing call of our tdc intrinsic.
261 Function
*F
= CI
->getCalledFunction();
262 if (!F
|| F
->getIntrinsicID() != Intrinsic::s390_tdc
)
264 if (!Const
->isZero())
266 Value
*V
= CI
->getArgOperand(0);
267 auto *MaskC
= dyn_cast
<ConstantInt
>(CI
->getArgOperand(1));
268 // Bail if the mask is not a constant.
271 int Mask
= MaskC
->getZExtValue();
272 Mask
&= SystemZ::TDCMASK_ALL
;
273 if (Pred
== CmpInst::ICMP_NE
) {
274 // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC
275 } else if (Pred
== CmpInst::ICMP_EQ
) {
276 // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask
277 Mask
^= SystemZ::TDCMASK_ALL
;
279 // An unknown comparison - ignore.
282 PossibleJunk
.insert(CI
);
283 converted(&I
, V
, Mask
, false);
287 void SystemZTDCPass::convertLogicOp(BinaryOperator
&I
) {
290 bool Worthy0
, Worthy1
;
291 std::tie(Op0
, Mask0
, Worthy0
) = ConvertedInsts
[cast
<Instruction
>(I
.getOperand(0))];
292 std::tie(Op1
, Mask1
, Worthy1
) = ConvertedInsts
[cast
<Instruction
>(I
.getOperand(1))];
296 switch (I
.getOpcode()) {
297 case Instruction::And
:
298 Mask
= Mask0
& Mask1
;
300 case Instruction::Or
:
301 Mask
= Mask0
| Mask1
;
303 case Instruction::Xor
:
304 Mask
= Mask0
^ Mask1
;
307 llvm_unreachable("Unknown op in convertLogicOp");
309 converted(&I
, Op0
, Mask
, true);
312 bool SystemZTDCPass::runOnFunction(Function
&F
) {
313 ConvertedInsts
.clear();
314 LogicOpsWorklist
.clear();
315 PossibleJunk
.clear();
317 // Look for icmp+fcmp instructions.
318 for (auto &I
: instructions(F
)) {
319 if (I
.getOpcode() == Instruction::FCmp
)
320 convertFCmp(cast
<CmpInst
>(I
));
321 else if (I
.getOpcode() == Instruction::ICmp
)
322 convertICmp(cast
<CmpInst
>(I
));
325 // If none found, bail already.
326 if (ConvertedInsts
.empty())
329 // Process the queue of logic instructions.
330 while (!LogicOpsWorklist
.empty()) {
331 BinaryOperator
*Op
= LogicOpsWorklist
.back();
332 LogicOpsWorklist
.pop_back();
333 // If both operands mapped, and the instruction itself not yet mapped,
335 if (ConvertedInsts
.count(dyn_cast
<Instruction
>(Op
->getOperand(0))) &&
336 ConvertedInsts
.count(dyn_cast
<Instruction
>(Op
->getOperand(1))) &&
337 !ConvertedInsts
.count(Op
))
341 // Time to actually replace the instructions. Do it in the reverse order
342 // of finding them, since there's a good chance the earlier ones will be
343 // unused (due to being folded into later ones).
344 Module
&M
= *F
.getParent();
345 auto &Ctx
= M
.getContext();
346 Value
*Zero32
= ConstantInt::get(Type::getInt32Ty(Ctx
), 0);
347 bool MadeChange
= false;
348 for (auto &It
: reverse(ConvertedInsts
)) {
349 Instruction
*I
= It
.first
;
353 std::tie(V
, Mask
, Worthy
) = It
.second
;
354 if (!I
->user_empty()) {
355 // If used and unworthy of conversion, skip it.
358 // Call the intrinsic, compare result with 0.
360 Intrinsic::getDeclaration(&M
, Intrinsic::s390_tdc
, V
->getType());
362 Value
*MaskVal
= ConstantInt::get(Type::getInt64Ty(Ctx
), Mask
);
363 Instruction
*TDC
= IRB
.CreateCall(TDCFunc
, {V
, MaskVal
});
364 Value
*ICmp
= IRB
.CreateICmp(CmpInst::ICMP_NE
, TDC
, Zero32
);
365 I
->replaceAllUsesWith(ICmp
);
367 // If unused, or used and converted, remove it.
368 I
->eraseFromParent();
375 // We've actually done something - now clear misc accumulated junk (fabs,
377 for (auto *I
: PossibleJunk
)
379 I
->eraseFromParent();