[clang][NFC] simplify the unset check in `ParseLabeledStatement` (#117430)
[llvm-project.git] / llvm / lib / Target / AMDGPU / AMDGPURegBankCombiner.cpp
blob1e31fa3218d9cbea0196841231b7c980218ad8fb
1 //=== lib/CodeGen/GlobalISel/AMDGPURegBankCombiner.cpp ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does combining of machine instructions at the generic MI level,
10 // after register banks are known.
12 //===----------------------------------------------------------------------===//
14 #include "AMDGPU.h"
15 #include "AMDGPULegalizerInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "llvm/CodeGen/GlobalISel/Combiner.h"
21 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
22 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
23 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
24 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26 #include "llvm/CodeGen/MachineDominators.h"
27 #include "llvm/CodeGen/TargetPassConfig.h"
28 #include "llvm/Target/TargetMachine.h"
30 #define GET_GICOMBINER_DEPS
31 #include "AMDGPUGenPreLegalizeGICombiner.inc"
32 #undef GET_GICOMBINER_DEPS
34 #define DEBUG_TYPE "amdgpu-regbank-combiner"
36 using namespace llvm;
37 using namespace MIPatternMatch;
39 namespace {
40 #define GET_GICOMBINER_TYPES
41 #include "AMDGPUGenRegBankGICombiner.inc"
42 #undef GET_GICOMBINER_TYPES
44 class AMDGPURegBankCombinerImpl : public Combiner {
45 protected:
46 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig;
47 const GCNSubtarget &STI;
48 const RegisterBankInfo &RBI;
49 const TargetRegisterInfo &TRI;
50 const SIInstrInfo &TII;
51 // TODO: Make CombinerHelper methods const.
52 mutable CombinerHelper Helper;
54 public:
55 AMDGPURegBankCombinerImpl(
56 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
57 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
58 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
59 const GCNSubtarget &STI, MachineDominatorTree *MDT,
60 const LegalizerInfo *LI);
62 static const char *getName() { return "AMDGPURegBankCombinerImpl"; }
64 bool tryCombineAll(MachineInstr &I) const override;
66 bool isVgprRegBank(Register Reg) const;
67 Register getAsVgpr(Register Reg) const;
69 struct MinMaxMedOpc {
70 unsigned Min, Max, Med;
73 struct Med3MatchInfo {
74 unsigned Opc;
75 Register Val0, Val1, Val2;
78 MinMaxMedOpc getMinMaxPair(unsigned Opc) const;
80 template <class m_Cst, typename CstTy>
81 bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,
82 Register &Val, CstTy &K0, CstTy &K1) const;
84 bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
85 bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
86 bool matchFPMinMaxToClamp(MachineInstr &MI, Register &Reg) const;
87 bool matchFPMed3ToClamp(MachineInstr &MI, Register &Reg) const;
88 void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
89 void applyClamp(MachineInstr &MI, Register &Reg) const;
91 private:
92 SIModeRegisterDefaults getMode() const;
93 bool getIEEE() const;
94 bool getDX10Clamp() const;
95 bool isFminnumIeee(const MachineInstr &MI) const;
96 bool isFCst(MachineInstr *MI) const;
97 bool isClampZeroToOne(MachineInstr *K0, MachineInstr *K1) const;
99 #define GET_GICOMBINER_CLASS_MEMBERS
100 #define AMDGPUSubtarget GCNSubtarget
101 #include "AMDGPUGenRegBankGICombiner.inc"
102 #undef GET_GICOMBINER_CLASS_MEMBERS
103 #undef AMDGPUSubtarget
106 #define GET_GICOMBINER_IMPL
107 #define AMDGPUSubtarget GCNSubtarget
108 #include "AMDGPUGenRegBankGICombiner.inc"
109 #undef AMDGPUSubtarget
110 #undef GET_GICOMBINER_IMPL
112 AMDGPURegBankCombinerImpl::AMDGPURegBankCombinerImpl(
113 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
114 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
115 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
116 const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
117 : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
118 RBI(*STI.getRegBankInfo()), TRI(*STI.getRegisterInfo()),
119 TII(*STI.getInstrInfo()),
120 Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
121 #define GET_GICOMBINER_CONSTRUCTOR_INITS
122 #include "AMDGPUGenRegBankGICombiner.inc"
123 #undef GET_GICOMBINER_CONSTRUCTOR_INITS
127 bool AMDGPURegBankCombinerImpl::isVgprRegBank(Register Reg) const {
128 return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
131 Register AMDGPURegBankCombinerImpl::getAsVgpr(Register Reg) const {
132 if (isVgprRegBank(Reg))
133 return Reg;
135 // Search for existing copy of Reg to vgpr.
136 for (MachineInstr &Use : MRI.use_instructions(Reg)) {
137 Register Def = Use.getOperand(0).getReg();
138 if (Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def))
139 return Def;
142 // Copy Reg to vgpr.
143 Register VgprReg = B.buildCopy(MRI.getType(Reg), Reg).getReg(0);
144 MRI.setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID));
145 return VgprReg;
148 AMDGPURegBankCombinerImpl::MinMaxMedOpc
149 AMDGPURegBankCombinerImpl::getMinMaxPair(unsigned Opc) const {
150 switch (Opc) {
151 default:
152 llvm_unreachable("Unsupported opcode");
153 case AMDGPU::G_SMAX:
154 case AMDGPU::G_SMIN:
155 return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
156 case AMDGPU::G_UMAX:
157 case AMDGPU::G_UMIN:
158 return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
159 case AMDGPU::G_FMAXNUM:
160 case AMDGPU::G_FMINNUM:
161 return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3};
162 case AMDGPU::G_FMAXNUM_IEEE:
163 case AMDGPU::G_FMINNUM_IEEE:
164 return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE,
165 AMDGPU::G_AMDGPU_FMED3};
169 template <class m_Cst, typename CstTy>
170 bool AMDGPURegBankCombinerImpl::matchMed(MachineInstr &MI,
171 MachineRegisterInfo &MRI,
172 MinMaxMedOpc MMMOpc, Register &Val,
173 CstTy &K0, CstTy &K1) const {
174 // 4 operand commutes of: min(max(Val, K0), K1).
175 // Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)).
176 // Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0).
177 // 4 operand commutes of: max(min(Val, K1), K0).
178 // Find K0 from outer instr: max(min(...), K0) or max(K0, min(...)).
179 // Find K1 and Val from inner instr: min(K1, Val) or min(Val, K1).
180 return mi_match(
181 MI, MRI,
182 m_any_of(
183 m_CommutativeBinOp(
184 MMMOpc.Min, m_CommutativeBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)),
185 m_Cst(K1)),
186 m_CommutativeBinOp(
187 MMMOpc.Max, m_CommutativeBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)),
188 m_Cst(K0))));
191 bool AMDGPURegBankCombinerImpl::matchIntMinMaxToMed3(
192 MachineInstr &MI, Med3MatchInfo &MatchInfo) const {
193 Register Dst = MI.getOperand(0).getReg();
194 if (!isVgprRegBank(Dst))
195 return false;
197 // med3 for i16 is only available on gfx9+, and not available for v2i16.
198 LLT Ty = MRI.getType(Dst);
199 if ((Ty != LLT::scalar(16) || !STI.hasMed3_16()) && Ty != LLT::scalar(32))
200 return false;
202 MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
203 Register Val;
204 std::optional<ValueAndVReg> K0, K1;
205 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
206 if (!matchMed<GCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
207 return false;
209 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value))
210 return false;
211 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value))
212 return false;
214 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
215 return true;
218 // fmed3(NaN, K0, K1) = min(min(NaN, K0), K1)
219 // ieee = true : min/max(SNaN, K) = QNaN, min/max(QNaN, K) = K
220 // ieee = false : min/max(NaN, K) = K
221 // clamp(NaN) = dx10_clamp ? 0.0 : NaN
222 // Consider values of min(max(Val, K0), K1) and max(min(Val, K1), K0) as input.
223 // Other operand commutes (see matchMed) give same result since min and max are
224 // commutative.
226 // Try to replace fp min(max(Val, K0), K1) or max(min(Val, K1), K0), KO<=K1
227 // with fmed3(Val, K0, K1) or clamp(Val). Clamp requires K0 = 0.0 and K1 = 1.0.
228 // Val = SNaN only for ieee = true
229 // fmed3(SNaN, K0, K1) = min(min(SNaN, K0), K1) = min(QNaN, K1) = K1
230 // min(max(SNaN, K0), K1) = min(QNaN, K1) = K1
231 // max(min(SNaN, K1), K0) = max(K1, K0) = K1
232 // Val = NaN,ieee = false or Val = QNaN,ieee = true
233 // fmed3(NaN, K0, K1) = min(min(NaN, K0), K1) = min(K0, K1) = K0
234 // min(max(NaN, K0), K1) = min(K0, K1) = K0 (can clamp when dx10_clamp = true)
235 // max(min(NaN, K1), K0) = max(K1, K0) = K1 != K0
236 bool AMDGPURegBankCombinerImpl::matchFPMinMaxToMed3(
237 MachineInstr &MI, Med3MatchInfo &MatchInfo) const {
238 Register Dst = MI.getOperand(0).getReg();
239 LLT Ty = MRI.getType(Dst);
241 // med3 for f16 is only available on gfx9+, and not available for v2f16.
242 if ((Ty != LLT::scalar(16) || !STI.hasMed3_16()) && Ty != LLT::scalar(32))
243 return false;
245 auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
247 Register Val;
248 std::optional<FPValueAndVReg> K0, K1;
249 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
250 if (!matchMed<GFCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
251 return false;
253 if (K0->Value > K1->Value)
254 return false;
256 // For IEEE=false perform combine only when it's safe to assume that there are
257 // no NaN inputs. Most often MI is marked with nnan fast math flag.
258 // For IEEE=true consider NaN inputs. fmed3(NaN, K0, K1) is equivalent to
259 // min(min(NaN, K0), K1). Safe to fold for min(max(Val, K0), K1) since inner
260 // nodes(max/min) have same behavior when one input is NaN and other isn't.
261 // Don't consider max(min(SNaN, K1), K0) since there is no isKnownNeverQNaN,
262 // also post-legalizer inputs to min/max are fcanonicalized (never SNaN).
263 if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI)) {
264 // Don't fold single use constant that can't be inlined.
265 if ((!MRI.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) &&
266 (!MRI.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) {
267 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
268 return true;
272 return false;
275 bool AMDGPURegBankCombinerImpl::matchFPMinMaxToClamp(MachineInstr &MI,
276 Register &Reg) const {
277 // Clamp is available on all types after regbankselect (f16, f32, f64, v2f16).
278 auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
279 Register Val;
280 std::optional<FPValueAndVReg> K0, K1;
281 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0).
282 if (!matchMed<GFCstOrSplatGFCstMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
283 return false;
285 if (!K0->Value.isExactlyValue(0.0) || !K1->Value.isExactlyValue(1.0))
286 return false;
288 // For IEEE=false perform combine only when it's safe to assume that there are
289 // no NaN inputs. Most often MI is marked with nnan fast math flag.
290 // For IEEE=true consider NaN inputs. Only min(max(QNaN, 0.0), 1.0) evaluates
291 // to 0.0 requires dx10_clamp = true.
292 if ((getIEEE() && getDX10Clamp() && isFminnumIeee(MI) &&
293 isKnownNeverSNaN(Val, MRI)) ||
294 isKnownNeverNaN(MI.getOperand(0).getReg(), MRI)) {
295 Reg = Val;
296 return true;
299 return false;
302 // Replacing fmed3(NaN, 0.0, 1.0) with clamp. Requires dx10_clamp = true.
303 // Val = SNaN only for ieee = true. It is important which operand is NaN.
304 // min(min(SNaN, 0.0), 1.0) = min(QNaN, 1.0) = 1.0
305 // min(min(SNaN, 1.0), 0.0) = min(QNaN, 0.0) = 0.0
306 // min(min(0.0, 1.0), SNaN) = min(0.0, SNaN) = QNaN
307 // Val = NaN,ieee = false or Val = QNaN,ieee = true
308 // min(min(NaN, 0.0), 1.0) = min(0.0, 1.0) = 0.0
309 // min(min(NaN, 1.0), 0.0) = min(1.0, 0.0) = 0.0
310 // min(min(0.0, 1.0), NaN) = min(0.0, NaN) = 0.0
311 bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &MI,
312 Register &Reg) const {
313 // In llvm-ir, clamp is often represented as an intrinsic call to
314 // @llvm.amdgcn.fmed3.f32(%Val, 0.0, 1.0). Check for other operand orders.
315 MachineInstr *Src0 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
316 MachineInstr *Src1 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
317 MachineInstr *Src2 = getDefIgnoringCopies(MI.getOperand(3).getReg(), MRI);
319 if (isFCst(Src0) && !isFCst(Src1))
320 std::swap(Src0, Src1);
321 if (isFCst(Src1) && !isFCst(Src2))
322 std::swap(Src1, Src2);
323 if (isFCst(Src0) && !isFCst(Src1))
324 std::swap(Src0, Src1);
325 if (!isClampZeroToOne(Src1, Src2))
326 return false;
328 Register Val = Src0->getOperand(0).getReg();
330 auto isOp3Zero = [&]() {
331 MachineInstr *Op3 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI);
332 if (Op3->getOpcode() == TargetOpcode::G_FCONSTANT)
333 return Op3->getOperand(1).getFPImm()->isExactlyValue(0.0);
334 return false;
336 // For IEEE=false perform combine only when it's safe to assume that there are
337 // no NaN inputs. Most often MI is marked with nnan fast math flag.
338 // For IEEE=true consider NaN inputs. Requires dx10_clamp = true. Safe to fold
339 // when Val could be QNaN. If Val can also be SNaN third input should be 0.0.
340 if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI) ||
341 (getIEEE() && getDX10Clamp() &&
342 (isKnownNeverSNaN(Val, MRI) || isOp3Zero()))) {
343 Reg = Val;
344 return true;
347 return false;
350 void AMDGPURegBankCombinerImpl::applyClamp(MachineInstr &MI,
351 Register &Reg) const {
352 B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {MI.getOperand(0)}, {Reg},
353 MI.getFlags());
354 MI.eraseFromParent();
357 void AMDGPURegBankCombinerImpl::applyMed3(MachineInstr &MI,
358 Med3MatchInfo &MatchInfo) const {
359 B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
360 {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
361 getAsVgpr(MatchInfo.Val2)},
362 MI.getFlags());
363 MI.eraseFromParent();
366 SIModeRegisterDefaults AMDGPURegBankCombinerImpl::getMode() const {
367 return MF.getInfo<SIMachineFunctionInfo>()->getMode();
370 bool AMDGPURegBankCombinerImpl::getIEEE() const { return getMode().IEEE; }
372 bool AMDGPURegBankCombinerImpl::getDX10Clamp() const {
373 return getMode().DX10Clamp;
376 bool AMDGPURegBankCombinerImpl::isFminnumIeee(const MachineInstr &MI) const {
377 return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE;
380 bool AMDGPURegBankCombinerImpl::isFCst(MachineInstr *MI) const {
381 return MI->getOpcode() == AMDGPU::G_FCONSTANT;
384 bool AMDGPURegBankCombinerImpl::isClampZeroToOne(MachineInstr *K0,
385 MachineInstr *K1) const {
386 if (isFCst(K0) && isFCst(K1)) {
387 const ConstantFP *KO_FPImm = K0->getOperand(1).getFPImm();
388 const ConstantFP *K1_FPImm = K1->getOperand(1).getFPImm();
389 return (KO_FPImm->isExactlyValue(0.0) && K1_FPImm->isExactlyValue(1.0)) ||
390 (KO_FPImm->isExactlyValue(1.0) && K1_FPImm->isExactlyValue(0.0));
392 return false;
395 // Pass boilerplate
396 // ================
398 class AMDGPURegBankCombiner : public MachineFunctionPass {
399 public:
400 static char ID;
402 AMDGPURegBankCombiner(bool IsOptNone = false);
404 StringRef getPassName() const override { return "AMDGPURegBankCombiner"; }
406 bool runOnMachineFunction(MachineFunction &MF) override;
408 void getAnalysisUsage(AnalysisUsage &AU) const override;
410 private:
411 bool IsOptNone;
412 AMDGPURegBankCombinerImplRuleConfig RuleConfig;
414 } // end anonymous namespace
416 void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
417 AU.addRequired<TargetPassConfig>();
418 AU.setPreservesCFG();
419 getSelectionDAGFallbackAnalysisUsage(AU);
420 AU.addRequired<GISelKnownBitsAnalysis>();
421 AU.addPreserved<GISelKnownBitsAnalysis>();
422 if (!IsOptNone) {
423 AU.addRequired<MachineDominatorTreeWrapperPass>();
424 AU.addPreserved<MachineDominatorTreeWrapperPass>();
426 MachineFunctionPass::getAnalysisUsage(AU);
429 AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone)
430 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
431 initializeAMDGPURegBankCombinerPass(*PassRegistry::getPassRegistry());
433 if (!RuleConfig.parseCommandLineOption())
434 report_fatal_error("Invalid rule identifier");
437 bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) {
438 if (MF.getProperties().hasProperty(
439 MachineFunctionProperties::Property::FailedISel))
440 return false;
441 auto *TPC = &getAnalysis<TargetPassConfig>();
442 const Function &F = MF.getFunction();
443 bool EnableOpt =
444 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
446 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
447 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
449 const auto *LI = ST.getLegalizerInfo();
450 MachineDominatorTree *MDT =
451 IsOptNone ? nullptr
452 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
454 CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
455 LI, EnableOpt, F.hasOptSize(), F.hasMinSize());
456 // Disable fixed-point iteration to reduce compile-time
457 CInfo.MaxIterations = 1;
458 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
459 // RegBankSelect seems not to leave dead instructions, so a full DCE pass is
460 // unnecessary.
461 CInfo.EnableFullDCE = false;
462 AMDGPURegBankCombinerImpl Impl(MF, CInfo, TPC, *KB, /*CSEInfo*/ nullptr,
463 RuleConfig, ST, MDT, LI);
464 return Impl.combineMachineInstrs();
467 char AMDGPURegBankCombiner::ID = 0;
468 INITIALIZE_PASS_BEGIN(AMDGPURegBankCombiner, DEBUG_TYPE,
469 "Combine AMDGPU machine instrs after regbankselect",
470 false, false)
471 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
472 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
473 INITIALIZE_PASS_END(AMDGPURegBankCombiner, DEBUG_TYPE,
474 "Combine AMDGPU machine instrs after regbankselect", false,
475 false)
477 namespace llvm {
478 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone) {
479 return new AMDGPURegBankCombiner(IsOptNone);
481 } // end namespace llvm