1 //===- RISCVMatInt.cpp - Immediate materialisation -------------*- C++ -*--===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "RISCVMatInt.h"
10 #include "MCTargetDesc/RISCVMCTargetDesc.h"
11 #include "llvm/ADT/APInt.h"
12 #include "llvm/Support/MathExtras.h"
15 static int getInstSeqCost(RISCVMatInt::InstSeq
&Res
, bool HasRVC
) {
20 for (auto Instr
: Res
) {
21 // Assume instructions that aren't listed aren't compressible.
22 bool Compressed
= false;
23 switch (Instr
.getOpcode()) {
31 Compressed
= isInt
<6>(Instr
.getImm());
34 // Two RVC instructions take the same space as one RVI instruction, but
35 // can take longer to execute than the single RVI instruction. Thus, we
36 // consider that two RVC instruction are slightly more costly than one
37 // RVI instruction. For longer sequences of RVC instructions the space
38 // savings can be worth it, though. The costs below try to model that.
40 Cost
+= 100; // Baseline cost of one RVI instruction: 100%.
42 Cost
+= 70; // 70% cost of baseline.
47 // Recursively generate a sequence for materializing an integer.
48 static void generateInstSeqImpl(int64_t Val
,
49 const FeatureBitset
&ActiveFeatures
,
50 RISCVMatInt::InstSeq
&Res
) {
51 bool IsRV64
= ActiveFeatures
[RISCV::Feature64Bit
];
53 // Use BSETI for a single bit that can't be expressed by a single LUI or ADDI.
54 if (ActiveFeatures
[RISCV::FeatureStdExtZbs
] && isPowerOf2_64(Val
) &&
55 (!isInt
<32>(Val
) || Val
== 0x800)) {
56 Res
.emplace_back(RISCV::BSETI
, Log2_64(Val
));
61 // Depending on the active bits in the immediate Value v, the following
62 // instruction sequences are emitted:
65 // v[0,12) != 0 && v[12,32) == 0 : ADDI
66 // v[0,12) == 0 && v[12,32) != 0 : LUI
67 // v[0,32) != 0 : LUI+ADDI(W)
68 int64_t Hi20
= ((Val
+ 0x800) >> 12) & 0xFFFFF;
69 int64_t Lo12
= SignExtend64
<12>(Val
);
72 Res
.emplace_back(RISCV::LUI
, Hi20
);
74 if (Lo12
|| Hi20
== 0) {
75 unsigned AddiOpc
= (IsRV64
&& Hi20
) ? RISCV::ADDIW
: RISCV::ADDI
;
76 Res
.emplace_back(AddiOpc
, Lo12
);
81 assert(IsRV64
&& "Can't emit >32-bit imm for non-RV64 target");
83 // In the worst case, for a full 64-bit constant, a sequence of 8 instructions
84 // (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emitted. Note
85 // that the first two instructions (LUI+ADDIW) can contribute up to 32 bits
86 // while the following ADDI instructions contribute up to 12 bits each.
88 // On the first glance, implementing this seems to be possible by simply
89 // emitting the most significant 32 bits (LUI+ADDIW) followed by as many left
90 // shift (SLLI) and immediate additions (ADDI) as needed. However, due to the
91 // fact that ADDI performs a sign extended addition, doing it like that would
92 // only be possible when at most 11 bits of the ADDI instructions are used.
93 // Using all 12 bits of the ADDI instructions, like done by GAS, actually
94 // requires that the constant is processed starting with the least significant
97 // In the following, constants are processed from LSB to MSB but instruction
98 // emission is performed from MSB to LSB by recursively calling
99 // generateInstSeq. In each recursion, first the lowest 12 bits are removed
100 // from the constant and the optimal shift amount, which can be greater than
101 // 12 bits if the constant is sparse, is determined. Then, the shifted
102 // remaining constant is processed recursively and gets emitted as soon as it
103 // fits into 32 bits. The emission of the shifts and additions is subsequently
104 // performed when the recursion returns.
106 int64_t Lo12
= SignExtend64
<12>(Val
);
107 Val
= (uint64_t)Val
- (uint64_t)Lo12
;
110 bool Unsigned
= false;
112 // Val might now be valid for LUI without needing a shift.
113 if (!isInt
<32>(Val
)) {
114 ShiftAmount
= llvm::countr_zero((uint64_t)Val
);
117 // If the remaining bits don't fit in 12 bits, we might be able to reduce the
118 // shift amount in order to use LUI which will zero the lower 12 bits.
119 if (ShiftAmount
> 12 && !isInt
<12>(Val
)) {
120 if (isInt
<32>((uint64_t)Val
<< 12)) {
121 // Reduce the shift amount and add zeros to the LSBs so it will match LUI.
123 Val
= (uint64_t)Val
<< 12;
124 } else if (isUInt
<32>((uint64_t)Val
<< 12) &&
125 ActiveFeatures
[RISCV::FeatureStdExtZba
]) {
126 // Reduce the shift amount and add zeros to the LSBs so it will match
127 // LUI, then shift left with SLLI.UW to clear the upper 32 set bits.
129 Val
= ((uint64_t)Val
<< 12) | (0xffffffffull
<< 32);
134 // Try to use SLLI_UW for Val when it is uint32 but not int32.
135 if (isUInt
<32>((uint64_t)Val
) && !isInt
<32>((uint64_t)Val
) &&
136 ActiveFeatures
[RISCV::FeatureStdExtZba
]) {
137 // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with
139 Val
= ((uint64_t)Val
) | (0xffffffffull
<< 32);
144 generateInstSeqImpl(Val
, ActiveFeatures
, Res
);
146 // Skip shift if we were able to use LUI directly.
148 unsigned Opc
= Unsigned
? RISCV::SLLI_UW
: RISCV::SLLI
;
149 Res
.emplace_back(Opc
, ShiftAmount
);
153 Res
.emplace_back(RISCV::ADDI
, Lo12
);
156 static unsigned extractRotateInfo(int64_t Val
) {
157 // for case: 0b111..1..xxxxxx1..1..
158 unsigned LeadingOnes
= llvm::countl_one((uint64_t)Val
);
159 unsigned TrailingOnes
= llvm::countr_one((uint64_t)Val
);
160 if (TrailingOnes
> 0 && TrailingOnes
< 64 &&
161 (LeadingOnes
+ TrailingOnes
) > (64 - 12))
162 return 64 - TrailingOnes
;
164 // for case: 0bxxx1..1..1...xxx
165 unsigned UpperTrailingOnes
= llvm::countr_one(Hi_32(Val
));
166 unsigned LowerLeadingOnes
= llvm::countl_one(Lo_32(Val
));
167 if (UpperTrailingOnes
< 32 &&
168 (UpperTrailingOnes
+ LowerLeadingOnes
) > (64 - 12))
169 return 32 - UpperTrailingOnes
;
174 static void generateInstSeqLeadingZeros(int64_t Val
,
175 const FeatureBitset
&ActiveFeatures
,
176 RISCVMatInt::InstSeq
&Res
) {
177 assert(Val
> 0 && "Expected postive val");
179 unsigned LeadingZeros
= llvm::countl_zero((uint64_t)Val
);
180 uint64_t ShiftedVal
= (uint64_t)Val
<< LeadingZeros
;
181 // Fill in the bits that will be shifted out with 1s. An example where this
182 // helps is trailing one masks with 32 or more ones. This will generate
183 // ADDI -1 and an SRLI.
184 ShiftedVal
|= maskTrailingOnes
<uint64_t>(LeadingZeros
);
186 RISCVMatInt::InstSeq TmpSeq
;
187 generateInstSeqImpl(ShiftedVal
, ActiveFeatures
, TmpSeq
);
189 // Keep the new sequence if it is an improvement or the original is empty.
190 if ((TmpSeq
.size() + 1) < Res
.size() ||
191 (Res
.empty() && TmpSeq
.size() < 8)) {
192 TmpSeq
.emplace_back(RISCV::SRLI
, LeadingZeros
);
196 // Some cases can benefit from filling the lower bits with zeros instead.
197 ShiftedVal
&= maskTrailingZeros
<uint64_t>(LeadingZeros
);
199 generateInstSeqImpl(ShiftedVal
, ActiveFeatures
, TmpSeq
);
201 // Keep the new sequence if it is an improvement or the original is empty.
202 if ((TmpSeq
.size() + 1) < Res
.size() ||
203 (Res
.empty() && TmpSeq
.size() < 8)) {
204 TmpSeq
.emplace_back(RISCV::SRLI
, LeadingZeros
);
208 // If we have exactly 32 leading zeros and Zba, we can try using zext.w at
209 // the end of the sequence.
210 if (LeadingZeros
== 32 && ActiveFeatures
[RISCV::FeatureStdExtZba
]) {
211 // Try replacing upper bits with 1.
212 uint64_t LeadingOnesVal
= Val
| maskLeadingOnes
<uint64_t>(LeadingZeros
);
214 generateInstSeqImpl(LeadingOnesVal
, ActiveFeatures
, TmpSeq
);
216 // Keep the new sequence if it is an improvement.
217 if ((TmpSeq
.size() + 1) < Res
.size() ||
218 (Res
.empty() && TmpSeq
.size() < 8)) {
219 TmpSeq
.emplace_back(RISCV::ADD_UW
, 0);
225 namespace llvm::RISCVMatInt
{
226 InstSeq
generateInstSeq(int64_t Val
, const FeatureBitset
&ActiveFeatures
) {
227 RISCVMatInt::InstSeq Res
;
228 generateInstSeqImpl(Val
, ActiveFeatures
, Res
);
230 // If the low 12 bits are non-zero, the first expansion may end with an ADDI
231 // or ADDIW. If there are trailing zeros, try generating a sign extended
232 // constant with no trailing zeros and use a final SLLI to restore them.
233 if ((Val
& 0xfff) != 0 && (Val
& 1) == 0 && Res
.size() >= 2) {
234 unsigned TrailingZeros
= llvm::countr_zero((uint64_t)Val
);
235 int64_t ShiftedVal
= Val
>> TrailingZeros
;
236 // If we can use C.LI+C.SLLI instead of LUI+ADDI(W) prefer that since
237 // its more compressible. But only if LUI+ADDI(W) isn't fusable.
238 // NOTE: We don't check for C extension to minimize differences in generated
240 bool IsShiftedCompressible
=
241 isInt
<6>(ShiftedVal
) && !ActiveFeatures
[RISCV::TuneLUIADDIFusion
];
242 RISCVMatInt::InstSeq TmpSeq
;
243 generateInstSeqImpl(ShiftedVal
, ActiveFeatures
, TmpSeq
);
245 // Keep the new sequence if it is an improvement.
246 if ((TmpSeq
.size() + 1) < Res
.size() || IsShiftedCompressible
) {
247 TmpSeq
.emplace_back(RISCV::SLLI
, TrailingZeros
);
252 // If we have a 1 or 2 instruction sequence this is the best we can do. This
253 // will always be true for RV32 and will often be true for RV64.
257 assert(ActiveFeatures
[RISCV::Feature64Bit
] &&
258 "Expected RV32 to only need 2 instructions");
260 // If the lower 13 bits are something like 0x17ff, try to add 1 to change the
261 // lower 13 bits to 0x1800. We can restore this with an ADDI of -1 at the end
262 // of the sequence. Call generateInstSeqImpl on the new constant which may
263 // subtract 0xfffffffffffff800 to create another ADDI. This will leave a
264 // constant with more than 12 trailing zeros for the next recursive step.
265 if ((Val
& 0xfff) != 0 && (Val
& 0x1800) == 0x1000) {
266 int64_t Imm12
= -(0x800 - (Val
& 0xfff));
267 int64_t AdjustedVal
= Val
- Imm12
;
268 RISCVMatInt::InstSeq TmpSeq
;
269 generateInstSeqImpl(AdjustedVal
, ActiveFeatures
, TmpSeq
);
271 // Keep the new sequence if it is an improvement.
272 if ((TmpSeq
.size() + 1) < Res
.size()) {
273 TmpSeq
.emplace_back(RISCV::ADDI
, Imm12
);
278 // If the constant is positive we might be able to generate a shifted constant
279 // with no leading zeros and use a final SRLI to restore them.
280 if (Val
> 0 && Res
.size() > 2) {
281 generateInstSeqLeadingZeros(Val
, ActiveFeatures
, Res
);
284 // If the constant is negative, trying inverting and using our trailing zero
285 // optimizations. Use an xori to invert the final value.
286 if (Val
< 0 && Res
.size() > 3) {
287 uint64_t InvertedVal
= ~(uint64_t)Val
;
288 RISCVMatInt::InstSeq TmpSeq
;
289 generateInstSeqLeadingZeros(InvertedVal
, ActiveFeatures
, TmpSeq
);
291 // Keep it if we found a sequence that is smaller after inverting.
292 if (!TmpSeq
.empty() && (TmpSeq
.size() + 1) < Res
.size()) {
293 TmpSeq
.emplace_back(RISCV::XORI
, -1);
298 // If the Low and High halves are the same, use pack. The pack instruction
299 // packs the XLEN/2-bit lower halves of rs1 and rs2 into rd, with rs1 in the
300 // lower half and rs2 in the upper half.
301 if (Res
.size() > 2 && ActiveFeatures
[RISCV::FeatureStdExtZbkb
]) {
302 int64_t LoVal
= SignExtend64
<32>(Val
);
303 int64_t HiVal
= SignExtend64
<32>(Val
>> 32);
304 if (LoVal
== HiVal
) {
305 RISCVMatInt::InstSeq TmpSeq
;
306 generateInstSeqImpl(LoVal
, ActiveFeatures
, TmpSeq
);
307 if ((TmpSeq
.size() + 1) < Res
.size()) {
308 TmpSeq
.emplace_back(RISCV::PACK
, 0);
314 // Perform optimization with BCLRI/BSETI in the Zbs extension.
315 if (Res
.size() > 2 && ActiveFeatures
[RISCV::FeatureStdExtZbs
]) {
316 // 1. For values in range 0xffffffff 7fffffff ~ 0xffffffff 00000000,
317 // call generateInstSeqImpl with Val|0x80000000 (which is expected be
318 // an int32), then emit (BCLRI r, 31).
319 // 2. For values in range 0x80000000 ~ 0xffffffff, call generateInstSeqImpl
320 // with Val&~0x80000000 (which is expected to be an int32), then
321 // emit (BSETI r, 31).
326 NewVal
= Val
| 0x80000000ll
;
329 NewVal
= Val
& ~0x80000000ll
;
331 if (isInt
<32>(NewVal
)) {
332 RISCVMatInt::InstSeq TmpSeq
;
333 generateInstSeqImpl(NewVal
, ActiveFeatures
, TmpSeq
);
334 if ((TmpSeq
.size() + 1) < Res
.size()) {
335 TmpSeq
.emplace_back(Opc
, 31);
340 // Try to use BCLRI for upper 32 bits if the original lower 32 bits are
341 // negative int32, or use BSETI for upper 32 bits if the original lower
342 // 32 bits are positive int32.
343 int32_t Lo
= Lo_32(Val
);
344 uint32_t Hi
= Hi_32(Val
);
346 RISCVMatInt::InstSeq TmpSeq
;
347 generateInstSeqImpl(Lo
, ActiveFeatures
, TmpSeq
);
348 // Check if it is profitable to use BCLRI/BSETI.
349 if (Lo
> 0 && TmpSeq
.size() + llvm::popcount(Hi
) < Res
.size()) {
351 } else if (Lo
< 0 && TmpSeq
.size() + llvm::popcount(~Hi
) < Res
.size()) {
355 // Search for each bit and build corresponding BCLRI/BSETI.
358 unsigned Bit
= llvm::countr_zero(Hi
);
359 TmpSeq
.emplace_back(Opc
, Bit
+ 32);
360 Hi
&= (Hi
- 1); // Clear lowest set bit.
362 if (TmpSeq
.size() < Res
.size())
367 // Perform optimization with SH*ADD in the Zba extension.
368 if (Res
.size() > 2 && ActiveFeatures
[RISCV::FeatureStdExtZba
]) {
371 RISCVMatInt::InstSeq TmpSeq
;
372 // Select the opcode and divisor.
373 if ((Val
% 3) == 0 && isInt
<32>(Val
/ 3)) {
376 } else if ((Val
% 5) == 0 && isInt
<32>(Val
/ 5)) {
379 } else if ((Val
% 9) == 0 && isInt
<32>(Val
/ 9)) {
383 // Build the new instruction sequence.
385 generateInstSeqImpl(Val
/ Div
, ActiveFeatures
, TmpSeq
);
386 if ((TmpSeq
.size() + 1) < Res
.size()) {
387 TmpSeq
.emplace_back(Opc
, 0);
391 // Try to use LUI+SH*ADD+ADDI.
392 int64_t Hi52
= ((uint64_t)Val
+ 0x800ull
) & ~0xfffull
;
393 int64_t Lo12
= SignExtend64
<12>(Val
);
395 if (isInt
<32>(Hi52
/ 3) && (Hi52
% 3) == 0) {
398 } else if (isInt
<32>(Hi52
/ 5) && (Hi52
% 5) == 0) {
401 } else if (isInt
<32>(Hi52
/ 9) && (Hi52
% 9) == 0) {
405 // Build the new instruction sequence.
407 // For Val that has zero Lo12 (implies Val equals to Hi52) should has
408 // already been processed to LUI+SH*ADD by previous optimization.
410 "unexpected instruction sequence for immediate materialisation");
411 assert(TmpSeq
.empty() && "Expected empty TmpSeq");
412 generateInstSeqImpl(Hi52
/ Div
, ActiveFeatures
, TmpSeq
);
413 if ((TmpSeq
.size() + 2) < Res
.size()) {
414 TmpSeq
.emplace_back(Opc
, 0);
415 TmpSeq
.emplace_back(RISCV::ADDI
, Lo12
);
422 // Perform optimization with rori in the Zbb and th.srri in the XTheadBb
424 if (Res
.size() > 2 && (ActiveFeatures
[RISCV::FeatureStdExtZbb
] ||
425 ActiveFeatures
[RISCV::FeatureVendorXTHeadBb
])) {
426 if (unsigned Rotate
= extractRotateInfo(Val
)) {
427 RISCVMatInt::InstSeq TmpSeq
;
428 uint64_t NegImm12
= llvm::rotl
<uint64_t>(Val
, Rotate
);
429 assert(isInt
<12>(NegImm12
));
430 TmpSeq
.emplace_back(RISCV::ADDI
, NegImm12
);
431 TmpSeq
.emplace_back(ActiveFeatures
[RISCV::FeatureStdExtZbb
]
441 InstSeq
generateTwoRegInstSeq(int64_t Val
, const FeatureBitset
&ActiveFeatures
,
442 unsigned &ShiftAmt
, unsigned &AddOpc
) {
443 int64_t LoVal
= SignExtend64
<32>(Val
);
445 return RISCVMatInt::InstSeq();
447 // Subtract the LoVal to emulate the effect of the final ADD.
448 uint64_t Tmp
= (uint64_t)Val
- (uint64_t)LoVal
;
451 // Use trailing zero counts to figure how far we need to shift LoVal to line
452 // up with the remaining constant.
453 // TODO: This algorithm assumes all non-zero bits in the low 32 bits of the
454 // final constant come from LoVal.
455 unsigned TzLo
= llvm::countr_zero((uint64_t)LoVal
);
456 unsigned TzHi
= llvm::countr_zero(Tmp
);
457 assert(TzLo
< 32 && TzHi
>= 32);
458 ShiftAmt
= TzHi
- TzLo
;
461 if (Tmp
== ((uint64_t)LoVal
<< ShiftAmt
))
462 return RISCVMatInt::generateInstSeq(LoVal
, ActiveFeatures
);
464 // If we have Zba, we can use (ADD_UW X, (SLLI X, 32)).
465 if (ActiveFeatures
[RISCV::FeatureStdExtZba
] && Lo_32(Val
) == Hi_32(Val
)) {
467 AddOpc
= RISCV::ADD_UW
;
468 return RISCVMatInt::generateInstSeq(LoVal
, ActiveFeatures
);
471 return RISCVMatInt::InstSeq();
474 int getIntMatCost(const APInt
&Val
, unsigned Size
,
475 const FeatureBitset
&ActiveFeatures
, bool CompressionCost
) {
476 bool IsRV64
= ActiveFeatures
[RISCV::Feature64Bit
];
477 bool HasRVC
= CompressionCost
&& (ActiveFeatures
[RISCV::FeatureStdExtC
] ||
478 ActiveFeatures
[RISCV::FeatureStdExtZca
]);
479 int PlatRegSize
= IsRV64
? 64 : 32;
481 // Split the constant into platform register sized chunks, and calculate cost
484 for (unsigned ShiftVal
= 0; ShiftVal
< Size
; ShiftVal
+= PlatRegSize
) {
485 APInt Chunk
= Val
.ashr(ShiftVal
).sextOrTrunc(PlatRegSize
);
486 InstSeq MatSeq
= generateInstSeq(Chunk
.getSExtValue(), ActiveFeatures
);
487 Cost
+= getInstSeqCost(MatSeq
, HasRVC
);
489 return std::max(1, Cost
);
492 OpndKind
Inst::getOpndKind() const {
495 llvm_unreachable("Unexpected opcode!");
497 return RISCVMatInt::Imm
;
499 return RISCVMatInt::RegX0
;
504 return RISCVMatInt::RegReg
;
515 return RISCVMatInt::RegImm
;
519 } // namespace llvm::RISCVMatInt