1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 //===----------------------------------------------------------------------===//
10 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
11 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
13 #include "llvm/Target/TargetMachine.h"
17 class AMDGPUTargetMachine
;
19 class GCNTargetMachine
;
29 FunctionPass
*createR600VectorRegMerger();
30 FunctionPass
*createR600ExpandSpecialInstrsPass();
31 FunctionPass
*createR600EmitClauseMarkers();
32 FunctionPass
*createR600ClauseMergePass();
33 FunctionPass
*createR600Packetizer();
34 FunctionPass
*createR600ControlFlowFinalizer();
35 FunctionPass
*createAMDGPUCFGStructurizerPass();
36 FunctionPass
*createR600ISelDag(TargetMachine
*TM
, CodeGenOpt::Level OptLevel
);
39 FunctionPass
*createGCNDPPCombinePass();
40 FunctionPass
*createSIAnnotateControlFlowPass();
41 FunctionPass
*createSIFoldOperandsPass();
42 FunctionPass
*createSIPeepholeSDWAPass();
43 FunctionPass
*createSILowerI1CopiesPass();
44 FunctionPass
*createSIFixupVectorISelPass();
45 FunctionPass
*createSIAddIMGInitPass();
46 FunctionPass
*createSIShrinkInstructionsPass();
47 FunctionPass
*createSILoadStoreOptimizerPass();
48 FunctionPass
*createSIWholeQuadModePass();
49 FunctionPass
*createSIFixControlFlowLiveIntervalsPass();
50 FunctionPass
*createSIOptimizeExecMaskingPreRAPass();
51 FunctionPass
*createSIFixSGPRCopiesPass();
52 FunctionPass
*createSIMemoryLegalizerPass();
53 FunctionPass
*createSIInsertWaitcntsPass();
54 FunctionPass
*createSIPreAllocateWWMRegsPass();
55 FunctionPass
*createSIFormMemoryClausesPass();
56 FunctionPass
*createAMDGPUSimplifyLibCallsPass(const TargetOptions
&,
57 const TargetMachine
*);
58 FunctionPass
*createAMDGPUUseNativeCallsPass();
59 FunctionPass
*createAMDGPUCodeGenPreparePass();
60 FunctionPass
*createAMDGPUMachineCFGStructurizerPass();
61 FunctionPass
*createAMDGPUPropagateAttributesEarlyPass(const TargetMachine
*);
62 ModulePass
*createAMDGPUPropagateAttributesLatePass(const TargetMachine
*);
63 FunctionPass
*createAMDGPURewriteOutArgumentsPass();
64 FunctionPass
*createSIModeRegisterPass();
66 void initializeAMDGPUDAGToDAGISelPass(PassRegistry
&);
68 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry
&);
69 extern char &AMDGPUMachineCFGStructurizerID
;
71 void initializeAMDGPUAlwaysInlinePass(PassRegistry
&);
73 Pass
*createAMDGPUAnnotateKernelFeaturesPass();
74 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry
&);
75 extern char &AMDGPUAnnotateKernelFeaturesID
;
77 FunctionPass
*createAMDGPUAtomicOptimizerPass();
78 void initializeAMDGPUAtomicOptimizerPass(PassRegistry
&);
79 extern char &AMDGPUAtomicOptimizerID
;
81 ModulePass
*createAMDGPULowerIntrinsicsPass();
82 void initializeAMDGPULowerIntrinsicsPass(PassRegistry
&);
83 extern char &AMDGPULowerIntrinsicsID
;
85 ModulePass
*createAMDGPUFixFunctionBitcastsPass();
86 void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry
&);
87 extern char &AMDGPUFixFunctionBitcastsID
;
89 FunctionPass
*createAMDGPULowerKernelArgumentsPass();
90 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry
&);
91 extern char &AMDGPULowerKernelArgumentsID
;
93 ModulePass
*createAMDGPULowerKernelAttributesPass();
94 void initializeAMDGPULowerKernelAttributesPass(PassRegistry
&);
95 extern char &AMDGPULowerKernelAttributesID
;
97 void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry
&);
98 extern char &AMDGPUPropagateAttributesEarlyID
;
100 void initializeAMDGPUPropagateAttributesLatePass(PassRegistry
&);
101 extern char &AMDGPUPropagateAttributesLateID
;
103 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry
&);
104 extern char &AMDGPURewriteOutArgumentsID
;
106 void initializeGCNDPPCombinePass(PassRegistry
&);
107 extern char &GCNDPPCombineID
;
109 void initializeR600ClauseMergePassPass(PassRegistry
&);
110 extern char &R600ClauseMergePassID
;
112 void initializeR600ControlFlowFinalizerPass(PassRegistry
&);
113 extern char &R600ControlFlowFinalizerID
;
115 void initializeR600ExpandSpecialInstrsPassPass(PassRegistry
&);
116 extern char &R600ExpandSpecialInstrsPassID
;
118 void initializeR600VectorRegMergerPass(PassRegistry
&);
119 extern char &R600VectorRegMergerID
;
121 void initializeR600PacketizerPass(PassRegistry
&);
122 extern char &R600PacketizerID
;
124 void initializeSIFoldOperandsPass(PassRegistry
&);
125 extern char &SIFoldOperandsID
;
127 void initializeSIPeepholeSDWAPass(PassRegistry
&);
128 extern char &SIPeepholeSDWAID
;
130 void initializeSIShrinkInstructionsPass(PassRegistry
&);
131 extern char &SIShrinkInstructionsID
;
133 void initializeSIFixSGPRCopiesPass(PassRegistry
&);
134 extern char &SIFixSGPRCopiesID
;
136 void initializeSIFixVGPRCopiesPass(PassRegistry
&);
137 extern char &SIFixVGPRCopiesID
;
139 void initializeSIFixupVectorISelPass(PassRegistry
&);
140 extern char &SIFixupVectorISelID
;
142 void initializeSILowerI1CopiesPass(PassRegistry
&);
143 extern char &SILowerI1CopiesID
;
145 void initializeSILowerSGPRSpillsPass(PassRegistry
&);
146 extern char &SILowerSGPRSpillsID
;
148 void initializeSILoadStoreOptimizerPass(PassRegistry
&);
149 extern char &SILoadStoreOptimizerID
;
151 void initializeSIWholeQuadModePass(PassRegistry
&);
152 extern char &SIWholeQuadModeID
;
154 void initializeSILowerControlFlowPass(PassRegistry
&);
155 extern char &SILowerControlFlowID
;
157 void initializeSIInsertSkipsPass(PassRegistry
&);
158 extern char &SIInsertSkipsPassID
;
160 void initializeSIOptimizeExecMaskingPass(PassRegistry
&);
161 extern char &SIOptimizeExecMaskingID
;
163 void initializeSIPreAllocateWWMRegsPass(PassRegistry
&);
164 extern char &SIPreAllocateWWMRegsID
;
166 void initializeAMDGPUSimplifyLibCallsPass(PassRegistry
&);
167 extern char &AMDGPUSimplifyLibCallsID
;
169 void initializeAMDGPUUseNativeCallsPass(PassRegistry
&);
170 extern char &AMDGPUUseNativeCallsID
;
172 void initializeSIAddIMGInitPass(PassRegistry
&);
173 extern char &SIAddIMGInitID
;
175 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry
&);
176 extern char &AMDGPUPerfHintAnalysisID
;
178 // Passes common to R600 and SI
179 FunctionPass
*createAMDGPUPromoteAlloca();
180 void initializeAMDGPUPromoteAllocaPass(PassRegistry
&);
181 extern char &AMDGPUPromoteAllocaID
;
183 Pass
*createAMDGPUStructurizeCFGPass();
184 FunctionPass
*createAMDGPUISelDag(
185 TargetMachine
*TM
= nullptr,
186 CodeGenOpt::Level OptLevel
= CodeGenOpt::Default
);
187 ModulePass
*createAMDGPUAlwaysInlinePass(bool GlobalOpt
= true);
188 ModulePass
*createR600OpenCLImageTypeLoweringPass();
189 FunctionPass
*createAMDGPUAnnotateUniformValues();
191 ModulePass
*createAMDGPUPrintfRuntimeBinding();
192 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry
&);
193 extern char &AMDGPUPrintfRuntimeBindingID
;
195 ModulePass
* createAMDGPUUnifyMetadataPass();
196 void initializeAMDGPUUnifyMetadataPass(PassRegistry
&);
197 extern char &AMDGPUUnifyMetadataID
;
199 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry
&);
200 extern char &SIOptimizeExecMaskingPreRAID
;
202 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry
&);
203 extern char &AMDGPUAnnotateUniformValuesPassID
;
205 void initializeAMDGPUCodeGenPreparePass(PassRegistry
&);
206 extern char &AMDGPUCodeGenPrepareID
;
208 void initializeSIAnnotateControlFlowPass(PassRegistry
&);
209 extern char &SIAnnotateControlFlowPassID
;
211 void initializeSIMemoryLegalizerPass(PassRegistry
&);
212 extern char &SIMemoryLegalizerID
;
214 void initializeSIModeRegisterPass(PassRegistry
&);
215 extern char &SIModeRegisterID
;
217 void initializeSIInsertWaitcntsPass(PassRegistry
&);
218 extern char &SIInsertWaitcntsID
;
220 void initializeSIFormMemoryClausesPass(PassRegistry
&);
221 extern char &SIFormMemoryClausesID
;
223 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry
&);
224 extern char &AMDGPUUnifyDivergentExitNodesID
;
226 ImmutablePass
*createAMDGPUAAWrapperPass();
227 void initializeAMDGPUAAWrapperPassPass(PassRegistry
&);
228 ImmutablePass
*createAMDGPUExternalAAWrapperPass();
229 void initializeAMDGPUExternalAAWrapperPass(PassRegistry
&);
231 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry
&);
233 Pass
*createAMDGPUFunctionInliningPass();
234 void initializeAMDGPUInlinerPass(PassRegistry
&);
236 ModulePass
*createAMDGPUOpenCLEnqueuedBlockLoweringPass();
237 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry
&);
238 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID
;
240 void initializeGCNRegBankReassignPass(PassRegistry
&);
241 extern char &GCNRegBankReassignID
;
243 void initializeGCNNSAReassignPass(PassRegistry
&);
244 extern char &GCNNSAReassignID
;
249 TI_SCRATCH_RSRC_DWORD0
,
250 TI_SCRATCH_RSRC_DWORD1
,
251 TI_SCRATCH_RSRC_DWORD2
,
252 TI_SCRATCH_RSRC_DWORD3
256 } // End namespace llvm
258 /// OpenCL uses address spaces to differentiate between
259 /// various memory regions on the hardware. On the CPU
260 /// all of the address spaces point to the same memory,
261 /// however on the GPU, each address space points to
262 /// a separate piece of memory that is unique from other
263 /// memory locations.
266 // The maximum value for flat, generic, local, private, constant and region.
267 MAX_AMDGPU_ADDRESS
= 7,
269 FLAT_ADDRESS
= 0, ///< Address space for flat memory.
270 GLOBAL_ADDRESS
= 1, ///< Address space for global memory (RAT0, VTX0).
271 REGION_ADDRESS
= 2, ///< Address space for region memory. (GDS)
273 CONSTANT_ADDRESS
= 4, ///< Address space for constant memory (VTX2).
274 LOCAL_ADDRESS
= 3, ///< Address space for local memory.
275 PRIVATE_ADDRESS
= 5, ///< Address space for private memory.
277 CONSTANT_ADDRESS_32BIT
= 6, ///< Address space for 32-bit constant memory.
279 BUFFER_FAT_POINTER
= 7, ///< Address space for 160-bit buffer fat pointers.
281 /// Address space for direct addressible parameter memory (CONST0).
283 /// Address space for indirect addressible parameter memory (VTX1).
286 // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on
287 // this order to be able to dynamically index a constant buffer, for
290 // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx
292 CONSTANT_BUFFER_0
= 8,
293 CONSTANT_BUFFER_1
= 9,
294 CONSTANT_BUFFER_2
= 10,
295 CONSTANT_BUFFER_3
= 11,
296 CONSTANT_BUFFER_4
= 12,
297 CONSTANT_BUFFER_5
= 13,
298 CONSTANT_BUFFER_6
= 14,
299 CONSTANT_BUFFER_7
= 15,
300 CONSTANT_BUFFER_8
= 16,
301 CONSTANT_BUFFER_9
= 17,
302 CONSTANT_BUFFER_10
= 18,
303 CONSTANT_BUFFER_11
= 19,
304 CONSTANT_BUFFER_12
= 20,
305 CONSTANT_BUFFER_13
= 21,
306 CONSTANT_BUFFER_14
= 22,
307 CONSTANT_BUFFER_15
= 23,
309 // Some places use this if the address space can't be determined.
310 UNKNOWN_ADDRESS_SPACE
= ~0u,