1 //===-- ARMFixCortexA57AES1742098Pass.cpp ---------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 // This pass works around a Cortex Core Fused AES erratum:
9 // - Cortex-A57 Erratum 1742098
10 // - Cortex-A72 Erratum 1655431
12 // The erratum may be triggered if an input vector register to AESE or AESD was
13 // last written by an instruction that only updated 32 bits of it. This can
14 // occur for either of the input registers.
16 // The workaround chosen is to update the input register using `r = VORRq r, r`,
17 // as this updates all 128 bits of the register unconditionally, but does not
18 // change the values observed in `r`, making the input safe.
20 // This pass has to be conservative in a few cases:
21 // - an input vector register to the AES instruction is defined outside the
22 // current function, where we have to assume the register was updated in an
24 // - an input vector register to the AES instruction is updated along multiple
25 // different control-flow paths, where we have to ensure all the register
26 // updating instructions are safe.
28 // Both of these cases may apply to a input vector register. In either case, we
29 // need to ensure that, when the pass is finished, there exists a safe
30 // instruction between every unsafe register updating instruction and the AES
33 //===----------------------------------------------------------------------===//
36 #include "ARMBaseInstrInfo.h"
37 #include "ARMBaseRegisterInfo.h"
38 #include "ARMSubtarget.h"
39 #include "Utils/ARMBaseInfo.h"
40 #include "llvm/ADT/STLExtras.h"
41 #include "llvm/ADT/SmallPtrSet.h"
42 #include "llvm/ADT/SmallVector.h"
43 #include "llvm/ADT/StringRef.h"
44 #include "llvm/CodeGen/MachineBasicBlock.h"
45 #include "llvm/CodeGen/MachineFunction.h"
46 #include "llvm/CodeGen/MachineFunctionPass.h"
47 #include "llvm/CodeGen/MachineInstr.h"
48 #include "llvm/CodeGen/MachineInstrBuilder.h"
49 #include "llvm/CodeGen/MachineInstrBundleIterator.h"
50 #include "llvm/CodeGen/MachineOperand.h"
51 #include "llvm/CodeGen/ReachingDefAnalysis.h"
52 #include "llvm/CodeGen/Register.h"
53 #include "llvm/CodeGen/TargetRegisterInfo.h"
54 #include "llvm/IR/DebugLoc.h"
55 #include "llvm/InitializePasses.h"
56 #include "llvm/Pass.h"
57 #include "llvm/PassRegistry.h"
58 #include "llvm/Support/Debug.h"
59 #include "llvm/Support/raw_ostream.h"
65 #define DEBUG_TYPE "arm-fix-cortex-a57-aes-1742098"
67 //===----------------------------------------------------------------------===//
70 class ARMFixCortexA57AES1742098
: public MachineFunctionPass
{
73 explicit ARMFixCortexA57AES1742098() : MachineFunctionPass(ID
) {
74 initializeARMFixCortexA57AES1742098Pass(*PassRegistry::getPassRegistry());
77 bool runOnMachineFunction(MachineFunction
&F
) override
;
79 MachineFunctionProperties
getRequiredProperties() const override
{
80 return MachineFunctionProperties().set(
81 MachineFunctionProperties::Property::NoVRegs
);
84 StringRef
getPassName() const override
{
85 return "ARM fix for Cortex-A57 AES Erratum 1742098";
88 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
89 AU
.addRequired
<ReachingDefAnalysis
>();
91 MachineFunctionPass::getAnalysisUsage(AU
);
95 // This is the information needed to insert the fixup in the right place.
96 struct AESFixupLocation
{
97 MachineBasicBlock
*Block
;
98 // The fixup instruction will be inserted *before* InsertionPt.
99 MachineInstr
*InsertionPt
;
103 void analyzeMF(MachineFunction
&MF
, ReachingDefAnalysis
&RDA
,
104 const ARMBaseRegisterInfo
*TRI
,
105 SmallVectorImpl
<AESFixupLocation
> &FixupLocsForFn
) const;
107 void insertAESFixup(AESFixupLocation
&FixupLoc
, const ARMBaseInstrInfo
*TII
,
108 const ARMBaseRegisterInfo
*TRI
) const;
110 static bool isFirstAESPairInstr(MachineInstr
&MI
);
111 static bool isSafeAESInput(MachineInstr
&MI
);
113 char ARMFixCortexA57AES1742098::ID
= 0;
115 } // end anonymous namespace
117 INITIALIZE_PASS_BEGIN(ARMFixCortexA57AES1742098
, DEBUG_TYPE
,
118 "ARM fix for Cortex-A57 AES Erratum 1742098", false,
120 INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis
);
121 INITIALIZE_PASS_END(ARMFixCortexA57AES1742098
, DEBUG_TYPE
,
122 "ARM fix for Cortex-A57 AES Erratum 1742098", false, false)
124 //===----------------------------------------------------------------------===//
126 bool ARMFixCortexA57AES1742098::isFirstAESPairInstr(MachineInstr
&MI
) {
127 unsigned Opc
= MI
.getOpcode();
128 return Opc
== ARM::AESD
|| Opc
== ARM::AESE
;
131 bool ARMFixCortexA57AES1742098::isSafeAESInput(MachineInstr
&MI
) {
132 auto CondCodeIsAL
= [](MachineInstr
&MI
) -> bool {
133 int CCIdx
= MI
.findFirstPredOperandIdx();
136 return MI
.getOperand(CCIdx
).getImm() == (int64_t)ARMCC::AL
;
139 switch (MI
.getOpcode()) {
140 // Unknown: Assume not safe.
143 // 128-bit wide AES instructions
150 // 128-bit and 64-bit wide bitwise ops (when condition = al)
159 // VMOV of 64-bit value between D registers (when condition = al)
161 // VMOV of 64 bit value from GPRs (when condition = al)
163 // VMOV of immediate into D or Q registers (when condition = al)
174 // Loads (when condition = al)
175 // VLD Dn, [Rn, #imm]
178 case ARM::VLDMDDB_UPD
:
179 case ARM::VLDMDIA_UPD
:
181 // VLDn to all lanes.
210 // VLD1 (single element to one lane)
212 case ARM::VLD1LNd32_UPD
:
214 case ARM::VLD1LNd8_UPD
:
216 case ARM::VLD1LNd16_UPD
:
217 // VLD1 (single element to all lanes)
218 case ARM::VLD1DUPd32
:
219 case ARM::VLD1DUPd32wb_fixed
:
220 case ARM::VLD1DUPd32wb_register
:
221 case ARM::VLD1DUPd16
:
222 case ARM::VLD1DUPd16wb_fixed
:
223 case ARM::VLD1DUPd16wb_register
:
225 case ARM::VLD1DUPd8wb_fixed
:
226 case ARM::VLD1DUPd8wb_register
:
227 case ARM::VLD1DUPq32
:
228 case ARM::VLD1DUPq32wb_fixed
:
229 case ARM::VLD1DUPq32wb_register
:
230 case ARM::VLD1DUPq16
:
231 case ARM::VLD1DUPq16wb_fixed
:
232 case ARM::VLD1DUPq16wb_register
:
234 case ARM::VLD1DUPq8wb_fixed
:
235 case ARM::VLD1DUPq8wb_register
:
240 return CondCodeIsAL(MI
);
246 bool ARMFixCortexA57AES1742098::runOnMachineFunction(MachineFunction
&F
) {
247 LLVM_DEBUG(dbgs() << "***** ARMFixCortexA57AES1742098 *****\n");
248 auto &STI
= F
.getSubtarget
<ARMSubtarget
>();
250 // Fix not requested or AES instructions not present: skip pass.
251 if (!STI
.hasAES() || !STI
.fixCortexA57AES1742098())
254 const ARMBaseRegisterInfo
*TRI
= STI
.getRegisterInfo();
255 const ARMBaseInstrInfo
*TII
= STI
.getInstrInfo();
257 auto &RDA
= getAnalysis
<ReachingDefAnalysis
>();
259 // Analyze whole function to find instructions which need fixing up...
260 SmallVector
<AESFixupLocation
> FixupLocsForFn
{};
261 analyzeMF(F
, RDA
, TRI
, FixupLocsForFn
);
263 // ... and fix the instructions up all at the same time.
264 bool Changed
= false;
265 LLVM_DEBUG(dbgs() << "Inserting " << FixupLocsForFn
.size() << " fixup(s)\n");
266 for (AESFixupLocation
&FixupLoc
: FixupLocsForFn
) {
267 insertAESFixup(FixupLoc
, TII
, TRI
);
274 void ARMFixCortexA57AES1742098::analyzeMF(
275 MachineFunction
&MF
, ReachingDefAnalysis
&RDA
,
276 const ARMBaseRegisterInfo
*TRI
,
277 SmallVectorImpl
<AESFixupLocation
> &FixupLocsForFn
) const {
278 unsigned MaxAllowedFixups
= 0;
280 for (MachineBasicBlock
&MBB
: MF
) {
281 for (MachineInstr
&MI
: MBB
) {
282 if (!isFirstAESPairInstr(MI
))
285 // Found an instruction to check the operands of.
286 LLVM_DEBUG(dbgs() << "Found AES Pair starting: " << MI
);
287 assert(MI
.getNumExplicitOperands() == 3 && MI
.getNumExplicitDefs() == 1 &&
288 "Unknown AES Instruction Format. Expected 1 def, 2 uses.");
290 // A maximum of two fixups should be inserted for each AES pair (one per
292 MaxAllowedFixups
+= 2;
294 // Inspect all operands, choosing whether to insert a fixup.
295 for (MachineOperand
&MOp
: MI
.uses()) {
296 SmallPtrSet
<MachineInstr
*, 1> AllDefs
{};
297 RDA
.getGlobalReachingDefs(&MI
, MOp
.getReg(), AllDefs
);
299 // Planned Fixup: This should be added to FixupLocsForFn at most once.
300 AESFixupLocation NewLoc
{&MBB
, &MI
, &MOp
};
302 // In small functions with loops, this operand may be both a live-in and
303 // have definitions within the function itself. These will need a fixup.
304 bool IsLiveIn
= MF
.front().isLiveIn(MOp
.getReg());
306 // If the register doesn't have defining instructions, and is not a
307 // live-in, then something is wrong and the fixup must always be
308 // inserted to be safe.
309 if (!IsLiveIn
&& AllDefs
.size() == 0) {
311 << "Fixup Planned: No Defining Instrs found, not live-in: "
312 << printReg(MOp
.getReg(), TRI
) << "\n");
313 FixupLocsForFn
.emplace_back(NewLoc
);
317 auto IsUnsafe
= [](MachineInstr
*MI
) -> bool {
318 return !isSafeAESInput(*MI
);
320 size_t UnsafeCount
= llvm::count_if(AllDefs
, IsUnsafe
);
322 // If there are no unsafe definitions...
323 if (UnsafeCount
== 0) {
324 // ... and the register is not live-in ...
326 // ... then skip the fixup.
327 LLVM_DEBUG(dbgs() << "No Fixup: Defining instrs are all safe: "
328 << printReg(MOp
.getReg(), TRI
) << "\n");
332 // Otherwise, the only unsafe "definition" is a live-in, so insert the
333 // fixup at the start of the function.
335 << "Fixup Planned: Live-In (with safe defining instrs): "
336 << printReg(MOp
.getReg(), TRI
) << "\n");
337 NewLoc
.Block
= &MF
.front();
338 NewLoc
.InsertionPt
= &*NewLoc
.Block
->begin();
339 LLVM_DEBUG(dbgs() << "Moving Fixup for Live-In to immediately before "
340 << *NewLoc
.InsertionPt
);
341 FixupLocsForFn
.emplace_back(NewLoc
);
345 // If a fixup is needed in more than one place, then the best place to
346 // insert it is adjacent to the use rather than introducing a fixup
347 // adjacent to each def.
349 // FIXME: It might be better to hoist this to the start of the BB, if
351 if (IsLiveIn
|| UnsafeCount
> 1) {
352 LLVM_DEBUG(dbgs() << "Fixup Planned: Multiple unsafe defining instrs "
353 "(including live-ins): "
354 << printReg(MOp
.getReg(), TRI
) << "\n");
355 FixupLocsForFn
.emplace_back(NewLoc
);
359 assert(UnsafeCount
== 1 && !IsLiveIn
&&
360 "At this point, there should be one unsafe defining instrs "
361 "and the defined register should not be a live-in.");
362 SmallPtrSetIterator
<MachineInstr
*> It
=
363 llvm::find_if(AllDefs
, IsUnsafe
);
364 assert(It
!= AllDefs
.end() &&
365 "UnsafeCount == 1 but No Unsafe MachineInstr found.");
366 MachineInstr
*DefMI
= *It
;
369 dbgs() << "Fixup Planned: Found single unsafe defining instrs for "
370 << printReg(MOp
.getReg(), TRI
) << ": " << *DefMI
);
372 // There is one unsafe defining instruction, which needs a fixup. It is
373 // generally good to hoist the fixup to be adjacent to the defining
374 // instruction rather than the using instruction, as the using
375 // instruction may be inside a loop when the defining instruction is
377 MachineBasicBlock::iterator DefIt
= DefMI
;
379 if (DefIt
!= DefMI
->getParent()->end()) {
380 LLVM_DEBUG(dbgs() << "Moving Fixup to immediately after " << *DefMI
381 << "And immediately before " << *DefIt
);
382 NewLoc
.Block
= DefIt
->getParent();
383 NewLoc
.InsertionPt
= &*DefIt
;
386 FixupLocsForFn
.emplace_back(NewLoc
);
391 assert(FixupLocsForFn
.size() <= MaxAllowedFixups
&&
392 "Inserted too many fixups for this function.");
393 (void)MaxAllowedFixups
;
396 void ARMFixCortexA57AES1742098::insertAESFixup(
397 AESFixupLocation
&FixupLoc
, const ARMBaseInstrInfo
*TII
,
398 const ARMBaseRegisterInfo
*TRI
) const {
399 MachineOperand
*OperandToFixup
= FixupLoc
.MOp
;
401 assert(OperandToFixup
->isReg() && "OperandToFixup must be a register");
402 Register RegToFixup
= OperandToFixup
->getReg();
404 LLVM_DEBUG(dbgs() << "Inserting VORRq of " << printReg(RegToFixup
, TRI
)
405 << " before: " << *FixupLoc
.InsertionPt
);
407 // Insert the new `VORRq qN, qN, qN`. There are a few details here:
409 // The uses are marked as killed, even if the original use of OperandToFixup
410 // is not killed, as the new instruction is clobbering the register. This is
411 // safe even if there are other uses of `qN`, as the VORRq value-wise a no-op
412 // (it is inserted for microarchitectural reasons).
414 // The def and the uses are still marked as Renamable if the original register
415 // was, to avoid having to rummage through all the other uses and defs and
416 // unset their renamable bits.
417 unsigned Renamable
= OperandToFixup
->isRenamable() ? RegState::Renamable
: 0;
418 BuildMI(*FixupLoc
.Block
, FixupLoc
.InsertionPt
, DebugLoc(),
419 TII
->get(ARM::VORRq
))
420 .addReg(RegToFixup
, RegState::Define
| Renamable
)
421 .addReg(RegToFixup
, RegState::Kill
| Renamable
)
422 .addReg(RegToFixup
, RegState::Kill
| Renamable
)
423 .addImm((uint64_t)ARMCC::AL
)
424 .addReg(ARM::NoRegister
);
427 // Factory function used by AArch64TargetMachine to add the pass to
429 FunctionPass
*llvm::createARMFixCortexA57AES1742098Pass() {
430 return new ARMFixCortexA57AES1742098();