[x86] fix assert with horizontal math + broadcast of vector (PR43402)
[llvm-core.git] / lib / Target / AMDGPU / AMDGPUMachineFunction.h
blob52987e2fa4111c30934e4748f84449b248cea664
1 //===-- AMDGPUMachineFunctionInfo.h -------------------------------*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
10 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
12 #include "llvm/ADT/DenseMap.h"
13 #include "llvm/CodeGen/MachineFunction.h"
15 namespace llvm {
17 class GCNSubtarget;
19 class AMDGPUMachineFunction : public MachineFunctionInfo {
20 /// A map to keep track of local memory objects and their offsets within the
21 /// local memory space.
22 SmallDenseMap<const GlobalValue *, unsigned, 4> LocalMemoryObjects;
24 protected:
25 uint64_t ExplicitKernArgSize; // Cache for this.
26 unsigned MaxKernArgAlign; // Cache for this.
28 /// Number of bytes in the LDS that are being used.
29 unsigned LDSSize;
31 // Kernels + shaders. i.e. functions called by the driver and not called
32 // by other functions.
33 bool IsEntryFunction;
35 bool NoSignedZerosFPMath;
37 // Function may be memory bound.
38 bool MemoryBound;
40 // Kernel may need limited waves per EU for better performance.
41 bool WaveLimiter;
43 public:
44 AMDGPUMachineFunction(const MachineFunction &MF);
46 uint64_t getExplicitKernArgSize() const {
47 return ExplicitKernArgSize;
50 unsigned getMaxKernArgAlign() const {
51 return MaxKernArgAlign;
54 unsigned getLDSSize() const {
55 return LDSSize;
58 bool isEntryFunction() const {
59 return IsEntryFunction;
62 bool hasNoSignedZerosFPMath() const {
63 return NoSignedZerosFPMath;
66 bool isMemoryBound() const {
67 return MemoryBound;
70 bool needsWaveLimiter() const {
71 return WaveLimiter;
74 unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalValue &GV);
78 #endif