AMDGPU: Mark test as XFAIL in expensive_checks builds
[llvm-project.git] / llvm / lib / Target / ARM / ARMFixCortexA57AES1742098Pass.cpp
blob5ff602364933cb2dc0a2c9f57363964db8597748
1 //===-- ARMFixCortexA57AES1742098Pass.cpp ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This pass works around a Cortex Core Fused AES erratum:
9 // - Cortex-A57 Erratum 1742098
10 // - Cortex-A72 Erratum 1655431
12 // The erratum may be triggered if an input vector register to AESE or AESD was
13 // last written by an instruction that only updated 32 bits of it. This can
14 // occur for either of the input registers.
16 // The workaround chosen is to update the input register using `r = VORRq r, r`,
17 // as this updates all 128 bits of the register unconditionally, but does not
18 // change the values observed in `r`, making the input safe.
20 // This pass has to be conservative in a few cases:
21 // - an input vector register to the AES instruction is defined outside the
22 // current function, where we have to assume the register was updated in an
23 // unsafe way; and
24 // - an input vector register to the AES instruction is updated along multiple
25 // different control-flow paths, where we have to ensure all the register
26 // updating instructions are safe.
28 // Both of these cases may apply to a input vector register. In either case, we
29 // need to ensure that, when the pass is finished, there exists a safe
30 // instruction between every unsafe register updating instruction and the AES
31 // instruction.
33 //===----------------------------------------------------------------------===//
35 #include "ARM.h"
36 #include "ARMBaseInstrInfo.h"
37 #include "ARMBaseRegisterInfo.h"
38 #include "ARMSubtarget.h"
39 #include "Utils/ARMBaseInfo.h"
40 #include "llvm/ADT/STLExtras.h"
41 #include "llvm/ADT/SmallPtrSet.h"
42 #include "llvm/ADT/SmallVector.h"
43 #include "llvm/ADT/StringRef.h"
44 #include "llvm/CodeGen/MachineBasicBlock.h"
45 #include "llvm/CodeGen/MachineFunction.h"
46 #include "llvm/CodeGen/MachineFunctionPass.h"
47 #include "llvm/CodeGen/MachineInstr.h"
48 #include "llvm/CodeGen/MachineInstrBuilder.h"
49 #include "llvm/CodeGen/MachineInstrBundleIterator.h"
50 #include "llvm/CodeGen/MachineOperand.h"
51 #include "llvm/CodeGen/ReachingDefAnalysis.h"
52 #include "llvm/CodeGen/Register.h"
53 #include "llvm/CodeGen/TargetRegisterInfo.h"
54 #include "llvm/IR/DebugLoc.h"
55 #include "llvm/InitializePasses.h"
56 #include "llvm/Pass.h"
57 #include "llvm/PassRegistry.h"
58 #include "llvm/Support/Debug.h"
59 #include "llvm/Support/raw_ostream.h"
60 #include <assert.h>
61 #include <stdint.h>
63 using namespace llvm;
65 #define DEBUG_TYPE "arm-fix-cortex-a57-aes-1742098"
67 //===----------------------------------------------------------------------===//
69 namespace {
70 class ARMFixCortexA57AES1742098 : public MachineFunctionPass {
71 public:
72 static char ID;
73 explicit ARMFixCortexA57AES1742098() : MachineFunctionPass(ID) {
74 initializeARMFixCortexA57AES1742098Pass(*PassRegistry::getPassRegistry());
77 bool runOnMachineFunction(MachineFunction &F) override;
79 MachineFunctionProperties getRequiredProperties() const override {
80 return MachineFunctionProperties().set(
81 MachineFunctionProperties::Property::NoVRegs);
84 StringRef getPassName() const override {
85 return "ARM fix for Cortex-A57 AES Erratum 1742098";
88 void getAnalysisUsage(AnalysisUsage &AU) const override {
89 AU.addRequired<ReachingDefAnalysis>();
90 AU.setPreservesCFG();
91 MachineFunctionPass::getAnalysisUsage(AU);
94 private:
95 // This is the information needed to insert the fixup in the right place.
96 struct AESFixupLocation {
97 MachineBasicBlock *Block;
98 // The fixup instruction will be inserted *before* InsertionPt.
99 MachineInstr *InsertionPt;
100 MachineOperand *MOp;
103 void analyzeMF(MachineFunction &MF, ReachingDefAnalysis &RDA,
104 const ARMBaseRegisterInfo *TRI,
105 SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const;
107 void insertAESFixup(AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII,
108 const ARMBaseRegisterInfo *TRI) const;
110 static bool isFirstAESPairInstr(MachineInstr &MI);
111 static bool isSafeAESInput(MachineInstr &MI);
113 char ARMFixCortexA57AES1742098::ID = 0;
115 } // end anonymous namespace
117 INITIALIZE_PASS_BEGIN(ARMFixCortexA57AES1742098, DEBUG_TYPE,
118 "ARM fix for Cortex-A57 AES Erratum 1742098", false,
119 false)
120 INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis);
121 INITIALIZE_PASS_END(ARMFixCortexA57AES1742098, DEBUG_TYPE,
122 "ARM fix for Cortex-A57 AES Erratum 1742098", false, false)
124 //===----------------------------------------------------------------------===//
126 bool ARMFixCortexA57AES1742098::isFirstAESPairInstr(MachineInstr &MI) {
127 unsigned Opc = MI.getOpcode();
128 return Opc == ARM::AESD || Opc == ARM::AESE;
131 bool ARMFixCortexA57AES1742098::isSafeAESInput(MachineInstr &MI) {
132 auto CondCodeIsAL = [](MachineInstr &MI) -> bool {
133 int CCIdx = MI.findFirstPredOperandIdx();
134 if (CCIdx == -1)
135 return false;
136 return MI.getOperand(CCIdx).getImm() == (int64_t)ARMCC::AL;
139 switch (MI.getOpcode()) {
140 // Unknown: Assume not safe.
141 default:
142 return false;
143 // 128-bit wide AES instructions
144 case ARM::AESD:
145 case ARM::AESE:
146 case ARM::AESMC:
147 case ARM::AESIMC:
148 // No CondCode.
149 return true;
150 // 128-bit and 64-bit wide bitwise ops (when condition = al)
151 case ARM::VANDd:
152 case ARM::VANDq:
153 case ARM::VORRd:
154 case ARM::VORRq:
155 case ARM::VEORd:
156 case ARM::VEORq:
157 case ARM::VMVNd:
158 case ARM::VMVNq:
159 // VMOV of 64-bit value between D registers (when condition = al)
160 case ARM::VMOVD:
161 // VMOV of 64 bit value from GPRs (when condition = al)
162 case ARM::VMOVDRR:
163 // VMOV of immediate into D or Q registers (when condition = al)
164 case ARM::VMOVv2i64:
165 case ARM::VMOVv1i64:
166 case ARM::VMOVv2f32:
167 case ARM::VMOVv4f32:
168 case ARM::VMOVv2i32:
169 case ARM::VMOVv4i32:
170 case ARM::VMOVv4i16:
171 case ARM::VMOVv8i16:
172 case ARM::VMOVv8i8:
173 case ARM::VMOVv16i8:
174 // Loads (when condition = al)
175 // VLD Dn, [Rn, #imm]
176 case ARM::VLDRD:
177 // VLDM
178 case ARM::VLDMDDB_UPD:
179 case ARM::VLDMDIA_UPD:
180 case ARM::VLDMDIA:
181 // VLDn to all lanes.
182 case ARM::VLD1d64:
183 case ARM::VLD1q64:
184 case ARM::VLD1d32:
185 case ARM::VLD1q32:
186 case ARM::VLD2b32:
187 case ARM::VLD2d32:
188 case ARM::VLD2q32:
189 case ARM::VLD1d16:
190 case ARM::VLD1q16:
191 case ARM::VLD2d16:
192 case ARM::VLD2q16:
193 case ARM::VLD1d8:
194 case ARM::VLD1q8:
195 case ARM::VLD2b8:
196 case ARM::VLD2d8:
197 case ARM::VLD2q8:
198 case ARM::VLD3d32:
199 case ARM::VLD3q32:
200 case ARM::VLD3d16:
201 case ARM::VLD3q16:
202 case ARM::VLD3d8:
203 case ARM::VLD3q8:
204 case ARM::VLD4d32:
205 case ARM::VLD4q32:
206 case ARM::VLD4d16:
207 case ARM::VLD4q16:
208 case ARM::VLD4d8:
209 case ARM::VLD4q8:
210 // VLD1 (single element to one lane)
211 case ARM::VLD1LNd32:
212 case ARM::VLD1LNd32_UPD:
213 case ARM::VLD1LNd8:
214 case ARM::VLD1LNd8_UPD:
215 case ARM::VLD1LNd16:
216 case ARM::VLD1LNd16_UPD:
217 // VLD1 (single element to all lanes)
218 case ARM::VLD1DUPd32:
219 case ARM::VLD1DUPd32wb_fixed:
220 case ARM::VLD1DUPd32wb_register:
221 case ARM::VLD1DUPd16:
222 case ARM::VLD1DUPd16wb_fixed:
223 case ARM::VLD1DUPd16wb_register:
224 case ARM::VLD1DUPd8:
225 case ARM::VLD1DUPd8wb_fixed:
226 case ARM::VLD1DUPd8wb_register:
227 case ARM::VLD1DUPq32:
228 case ARM::VLD1DUPq32wb_fixed:
229 case ARM::VLD1DUPq32wb_register:
230 case ARM::VLD1DUPq16:
231 case ARM::VLD1DUPq16wb_fixed:
232 case ARM::VLD1DUPq16wb_register:
233 case ARM::VLD1DUPq8:
234 case ARM::VLD1DUPq8wb_fixed:
235 case ARM::VLD1DUPq8wb_register:
236 // VMOV
237 case ARM::VSETLNi32:
238 case ARM::VSETLNi16:
239 case ARM::VSETLNi8:
240 return CondCodeIsAL(MI);
243 return false;
246 bool ARMFixCortexA57AES1742098::runOnMachineFunction(MachineFunction &F) {
247 LLVM_DEBUG(dbgs() << "***** ARMFixCortexA57AES1742098 *****\n");
248 auto &STI = F.getSubtarget<ARMSubtarget>();
250 // Fix not requested or AES instructions not present: skip pass.
251 if (!STI.hasAES() || !STI.fixCortexA57AES1742098())
252 return false;
254 const ARMBaseRegisterInfo *TRI = STI.getRegisterInfo();
255 const ARMBaseInstrInfo *TII = STI.getInstrInfo();
257 auto &RDA = getAnalysis<ReachingDefAnalysis>();
259 // Analyze whole function to find instructions which need fixing up...
260 SmallVector<AESFixupLocation> FixupLocsForFn{};
261 analyzeMF(F, RDA, TRI, FixupLocsForFn);
263 // ... and fix the instructions up all at the same time.
264 bool Changed = false;
265 LLVM_DEBUG(dbgs() << "Inserting " << FixupLocsForFn.size() << " fixup(s)\n");
266 for (AESFixupLocation &FixupLoc : FixupLocsForFn) {
267 insertAESFixup(FixupLoc, TII, TRI);
268 Changed |= true;
271 return Changed;
274 void ARMFixCortexA57AES1742098::analyzeMF(
275 MachineFunction &MF, ReachingDefAnalysis &RDA,
276 const ARMBaseRegisterInfo *TRI,
277 SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const {
278 unsigned MaxAllowedFixups = 0;
280 for (MachineBasicBlock &MBB : MF) {
281 for (MachineInstr &MI : MBB) {
282 if (!isFirstAESPairInstr(MI))
283 continue;
285 // Found an instruction to check the operands of.
286 LLVM_DEBUG(dbgs() << "Found AES Pair starting: " << MI);
287 assert(MI.getNumExplicitOperands() == 3 && MI.getNumExplicitDefs() == 1 &&
288 "Unknown AES Instruction Format. Expected 1 def, 2 uses.");
290 // A maximum of two fixups should be inserted for each AES pair (one per
291 // register use).
292 MaxAllowedFixups += 2;
294 // Inspect all operands, choosing whether to insert a fixup.
295 for (MachineOperand &MOp : MI.uses()) {
296 SmallPtrSet<MachineInstr *, 1> AllDefs{};
297 RDA.getGlobalReachingDefs(&MI, MOp.getReg(), AllDefs);
299 // Planned Fixup: This should be added to FixupLocsForFn at most once.
300 AESFixupLocation NewLoc{&MBB, &MI, &MOp};
302 // In small functions with loops, this operand may be both a live-in and
303 // have definitions within the function itself. These will need a fixup.
304 bool IsLiveIn = MF.front().isLiveIn(MOp.getReg());
306 // If the register doesn't have defining instructions, and is not a
307 // live-in, then something is wrong and the fixup must always be
308 // inserted to be safe.
309 if (!IsLiveIn && AllDefs.size() == 0) {
310 LLVM_DEBUG(dbgs()
311 << "Fixup Planned: No Defining Instrs found, not live-in: "
312 << printReg(MOp.getReg(), TRI) << "\n");
313 FixupLocsForFn.emplace_back(NewLoc);
314 continue;
317 auto IsUnsafe = [](MachineInstr *MI) -> bool {
318 return !isSafeAESInput(*MI);
320 size_t UnsafeCount = llvm::count_if(AllDefs, IsUnsafe);
322 // If there are no unsafe definitions...
323 if (UnsafeCount == 0) {
324 // ... and the register is not live-in ...
325 if (!IsLiveIn) {
326 // ... then skip the fixup.
327 LLVM_DEBUG(dbgs() << "No Fixup: Defining instrs are all safe: "
328 << printReg(MOp.getReg(), TRI) << "\n");
329 continue;
332 // Otherwise, the only unsafe "definition" is a live-in, so insert the
333 // fixup at the start of the function.
334 LLVM_DEBUG(dbgs()
335 << "Fixup Planned: Live-In (with safe defining instrs): "
336 << printReg(MOp.getReg(), TRI) << "\n");
337 NewLoc.Block = &MF.front();
338 NewLoc.InsertionPt = &*NewLoc.Block->begin();
339 LLVM_DEBUG(dbgs() << "Moving Fixup for Live-In to immediately before "
340 << *NewLoc.InsertionPt);
341 FixupLocsForFn.emplace_back(NewLoc);
342 continue;
345 // If a fixup is needed in more than one place, then the best place to
346 // insert it is adjacent to the use rather than introducing a fixup
347 // adjacent to each def.
349 // FIXME: It might be better to hoist this to the start of the BB, if
350 // possible.
351 if (IsLiveIn || UnsafeCount > 1) {
352 LLVM_DEBUG(dbgs() << "Fixup Planned: Multiple unsafe defining instrs "
353 "(including live-ins): "
354 << printReg(MOp.getReg(), TRI) << "\n");
355 FixupLocsForFn.emplace_back(NewLoc);
356 continue;
359 assert(UnsafeCount == 1 && !IsLiveIn &&
360 "At this point, there should be one unsafe defining instrs "
361 "and the defined register should not be a live-in.");
362 SmallPtrSetIterator<MachineInstr *> It =
363 llvm::find_if(AllDefs, IsUnsafe);
364 assert(It != AllDefs.end() &&
365 "UnsafeCount == 1 but No Unsafe MachineInstr found.");
366 MachineInstr *DefMI = *It;
368 LLVM_DEBUG(
369 dbgs() << "Fixup Planned: Found single unsafe defining instrs for "
370 << printReg(MOp.getReg(), TRI) << ": " << *DefMI);
372 // There is one unsafe defining instruction, which needs a fixup. It is
373 // generally good to hoist the fixup to be adjacent to the defining
374 // instruction rather than the using instruction, as the using
375 // instruction may be inside a loop when the defining instruction is
376 // not.
377 MachineBasicBlock::iterator DefIt = DefMI;
378 ++DefIt;
379 if (DefIt != DefMI->getParent()->end()) {
380 LLVM_DEBUG(dbgs() << "Moving Fixup to immediately after " << *DefMI
381 << "And immediately before " << *DefIt);
382 NewLoc.Block = DefIt->getParent();
383 NewLoc.InsertionPt = &*DefIt;
386 FixupLocsForFn.emplace_back(NewLoc);
391 assert(FixupLocsForFn.size() <= MaxAllowedFixups &&
392 "Inserted too many fixups for this function.");
393 (void)MaxAllowedFixups;
396 void ARMFixCortexA57AES1742098::insertAESFixup(
397 AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII,
398 const ARMBaseRegisterInfo *TRI) const {
399 MachineOperand *OperandToFixup = FixupLoc.MOp;
401 assert(OperandToFixup->isReg() && "OperandToFixup must be a register");
402 Register RegToFixup = OperandToFixup->getReg();
404 LLVM_DEBUG(dbgs() << "Inserting VORRq of " << printReg(RegToFixup, TRI)
405 << " before: " << *FixupLoc.InsertionPt);
407 // Insert the new `VORRq qN, qN, qN`. There are a few details here:
409 // The uses are marked as killed, even if the original use of OperandToFixup
410 // is not killed, as the new instruction is clobbering the register. This is
411 // safe even if there are other uses of `qN`, as the VORRq value-wise a no-op
412 // (it is inserted for microarchitectural reasons).
414 // The def and the uses are still marked as Renamable if the original register
415 // was, to avoid having to rummage through all the other uses and defs and
416 // unset their renamable bits.
417 unsigned Renamable = OperandToFixup->isRenamable() ? RegState::Renamable : 0;
418 BuildMI(*FixupLoc.Block, FixupLoc.InsertionPt, DebugLoc(),
419 TII->get(ARM::VORRq))
420 .addReg(RegToFixup, RegState::Define | Renamable)
421 .addReg(RegToFixup, RegState::Kill | Renamable)
422 .addReg(RegToFixup, RegState::Kill | Renamable)
423 .addImm((uint64_t)ARMCC::AL)
424 .addReg(ARM::NoRegister);
427 // Factory function used by AArch64TargetMachine to add the pass to
428 // the passmanager.
429 FunctionPass *llvm::createARMFixCortexA57AES1742098Pass() {
430 return new ARMFixCortexA57AES1742098();