1 //===---- SMInstructions.td - Scalar Memory Instruction Definitions -------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 def smrd_offset_8 : NamedOperandU32<"SMRDOffset8",
10 NamedMatchClass<"SMRDOffset8">> {
11 let OperandType = "OPERAND_IMMEDIATE";
14 def smem_offset : NamedOperandU32<"SMEMOffset",
15 NamedMatchClass<"SMEMOffset">> {
16 let OperandType = "OPERAND_IMMEDIATE";
17 let EncoderMethod = "getSMEMOffsetEncoding";
18 let DecoderMethod = "decodeSMEMOffset";
21 //===----------------------------------------------------------------------===//
22 // Scalar Memory classes
23 //===----------------------------------------------------------------------===//
25 class SM_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]> :
26 InstSI <outs, ins, "", pattern>,
27 SIMCInstr<opName, SIEncodingFamily.NONE> {
29 let isCodeGenOnly = 1;
35 let hasSideEffects = 0;
36 let UseNamedOperandTable = 1;
37 let SchedRW = [WriteSMEM];
39 string Mnemonic = opName;
40 string AsmOperands = asmOps;
42 bits<1> has_sbase = 1;
46 bits<1> has_offset = 1;
47 bits<1> offset_is_imm = 0;
51 class SM_Real <SM_Pseudo ps>
52 : InstSI<ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []> {
55 let isCodeGenOnly = 0;
57 Instruction Opcode = !cast<Instruction>(NAME);
59 // copy relevant pseudo op flags
60 let LGKM_CNT = ps.LGKM_CNT;
62 let mayStore = ps.mayStore;
63 let mayLoad = ps.mayLoad;
64 let hasSideEffects = ps.hasSideEffects;
65 let UseNamedOperandTable = ps.UseNamedOperandTable;
66 let SchedRW = ps.SchedRW;
67 let SubtargetPredicate = ps.SubtargetPredicate;
68 let AsmMatchConverter = ps.AsmMatchConverter;
69 let IsAtomicRet = ps.IsAtomicRet;
70 let IsAtomicNoRet = ps.IsAtomicNoRet;
72 let TSFlags = ps.TSFlags;
74 bit is_buffer = ps.is_buffer;
80 bits<1> imm = !if(ps.has_offset, ps.offset_is_imm, 0);
84 class SM_Probe_Pseudo <string opName, dag ins, bit isImm>
85 : SM_Pseudo<opName, (outs), ins, " $sdata, $sbase, $offset"> {
91 let hasSideEffects = 1;
92 let offset_is_imm = isImm;
93 let PseudoInstr = opName # !if(isImm, "_IMM", "_SGPR");
96 class SM_Load_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]>
97 : SM_Pseudo<opName, outs, ins, asmOps, pattern> {
98 RegisterClass BaseClass;
105 class SM_Store_Pseudo <string opName, dag ins, string asmOps, list<dag> pattern = []>
106 : SM_Pseudo<opName, (outs), ins, asmOps, pattern> {
107 RegisterClass BaseClass;
108 RegisterClass SrcClass;
116 class SM_Discard_Pseudo <string opName, dag ins, bit isImm>
117 : SM_Pseudo<opName, (outs), ins, " $sbase, $offset"> {
123 let hasSideEffects = 1;
124 let offset_is_imm = isImm;
125 let PseudoInstr = opName # !if(isImm, "_IMM", "_SGPR");
128 multiclass SM_Pseudo_Loads<string opName,
129 RegisterClass baseClass,
130 RegisterClass dstClass> {
131 def _IMM : SM_Load_Pseudo <opName,
132 (outs dstClass:$sdst),
133 (ins baseClass:$sbase, i32imm:$offset, CPol:$cpol),
134 " $sdst, $sbase, $offset$cpol", []> {
135 let offset_is_imm = 1;
136 let BaseClass = baseClass;
137 let PseudoInstr = opName # "_IMM";
142 def _SGPR : SM_Load_Pseudo <opName,
143 (outs dstClass:$sdst),
144 (ins baseClass:$sbase, SReg_32:$soff, CPol:$cpol),
145 " $sdst, $sbase, $offset$cpol", []> {
146 let BaseClass = baseClass;
147 let PseudoInstr = opName # "_SGPR";
153 multiclass SM_Pseudo_Stores<string opName,
154 RegisterClass baseClass,
155 RegisterClass srcClass> {
156 def _IMM : SM_Store_Pseudo <opName,
157 (ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, CPol:$cpol),
158 " $sdata, $sbase, $offset$cpol", []> {
159 let offset_is_imm = 1;
160 let BaseClass = baseClass;
161 let SrcClass = srcClass;
162 let PseudoInstr = opName # "_IMM";
165 def _SGPR : SM_Store_Pseudo <opName,
166 (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, CPol:$cpol),
167 " $sdata, $sbase, $offset$cpol", []> {
168 let BaseClass = baseClass;
169 let SrcClass = srcClass;
170 let PseudoInstr = opName # "_SGPR";
174 multiclass SM_Pseudo_Discards<string opName> {
175 def _IMM : SM_Discard_Pseudo <opName, (ins SReg_64:$sbase, smem_offset:$offset), 1>;
176 def _SGPR : SM_Discard_Pseudo <opName, (ins SReg_64:$sbase, SReg_32:$offset), 0>;
179 class SM_Time_Pseudo<string opName, SDPatternOperator node = null_frag> : SM_Pseudo<
180 opName, (outs SReg_64_XEXEC:$sdst), (ins),
181 " $sdst", [(set i64:$sdst, (node))]> {
182 let hasSideEffects = 1;
184 // FIXME: This should be definitively mayStore = 0. TableGen
185 // brokenly tries to infer these based on the intrinsic properties
186 // corresponding to the IR attributes. The target intrinsics are
187 // considered as writing to memory for IR dependency purposes, but
188 // those can be modeled with hasSideEffects here. These also end up
189 // inferring differently for llvm.readcyclecounter and the amdgcn
197 class SM_Inval_Pseudo <string opName, SDPatternOperator node = null_frag> : SM_Pseudo<
198 opName, (outs), (ins), "", [(node)]> {
199 let hasSideEffects = 1;
206 multiclass SM_Pseudo_Probe<string opName, RegisterClass baseClass> {
207 def _IMM : SM_Probe_Pseudo <opName, (ins i8imm:$sdata, baseClass:$sbase, smem_offset:$offset), 1>;
208 def _SGPR : SM_Probe_Pseudo <opName, (ins i8imm:$sdata, baseClass:$sbase, SReg_32:$offset), 0>;
211 class SM_WaveId_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo<
212 opName, (outs SReg_32_XM0_XEXEC:$sdst), (ins),
213 " $sdst", [(set i32:$sdst, (node))]> {
214 let hasSideEffects = 1;
221 //===----------------------------------------------------------------------===//
222 // Scalar Atomic Memory Classes
223 //===----------------------------------------------------------------------===//
225 class SM_Atomic_Pseudo <string opName,
226 dag outs, dag ins, string asmOps, bit isRet>
227 : SM_Pseudo<opName, outs, ins, asmOps, []> {
236 // Should these be set?
238 let hasSideEffects = 1;
241 let IsAtomicNoRet = !not(isRet);
242 let IsAtomicRet = isRet;
244 let AsmMatchConverter = "cvtSMEMAtomic";
247 class SM_Pseudo_Atomic<string opName,
248 RegisterClass baseClass,
249 RegisterClass dataClass,
252 string opNameWithSuffix = opName # !if(isImm,
253 !if(isRet, "_IMM_RTN", "_IMM"),
254 !if(isRet, "_SGPR_RTN", "_SGPR")),
255 Operand CPolTy = !if(isRet, CPol_GLC1, CPol)> :
256 SM_Atomic_Pseudo<opName,
257 !if(isRet, (outs dataClass:$sdst), (outs)),
259 (ins dataClass:$sdata, baseClass:$sbase, smem_offset:$offset, CPolTy:$cpol),
260 (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset, CPolTy:$cpol)),
261 !if(isRet, " $sdst", " $sdata") # ", $sbase, $offset$cpol",
263 AtomicNoRet <opNameWithSuffix, isRet> {
264 let offset_is_imm = isImm;
265 let PseudoInstr = opNameWithSuffix;
267 let Constraints = !if(isRet, "$sdst = $sdata", "");
268 let DisableEncoding = !if(isRet, "$sdata", "");
271 multiclass SM_Pseudo_Atomics<string opName,
272 RegisterClass baseClass,
273 RegisterClass dataClass> {
274 def _IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, 1, 0>;
275 def _SGPR : SM_Pseudo_Atomic <opName, baseClass, dataClass, 0, 0>;
276 def _IMM_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, 1, 1>;
277 def _SGPR_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, 0, 1>;
280 //===----------------------------------------------------------------------===//
281 // Scalar Memory Instructions
282 //===----------------------------------------------------------------------===//
284 // We are using the SReg_32_XM0 and not the SReg_32 register class for 32-bit
285 // SMRD instructions, because the SReg_32_XM0 register class does not include M0
286 // and writing to M0 from an SMRD instruction will hang the GPU.
288 // XXX - SMEM instructions do not allow exec for data operand, but
289 // does sdst for SMRD on SI/CI?
290 defm S_LOAD_DWORD : SM_Pseudo_Loads <"s_load_dword", SReg_64, SReg_32_XM0_XEXEC>;
291 defm S_LOAD_DWORDX2 : SM_Pseudo_Loads <"s_load_dwordx2", SReg_64, SReg_64_XEXEC>;
292 defm S_LOAD_DWORDX4 : SM_Pseudo_Loads <"s_load_dwordx4", SReg_64, SReg_128>;
293 defm S_LOAD_DWORDX8 : SM_Pseudo_Loads <"s_load_dwordx8", SReg_64, SReg_256>;
294 defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <"s_load_dwordx16", SReg_64, SReg_512>;
296 let is_buffer = 1 in {
297 defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <
298 "s_buffer_load_dword", SReg_128, SReg_32_XM0_XEXEC
301 // FIXME: exec_lo/exec_hi appear to be allowed for SMRD loads on
302 // SI/CI, bit disallowed for SMEM on VI.
303 defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads <
304 "s_buffer_load_dwordx2", SReg_128, SReg_64_XEXEC
307 defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <
308 "s_buffer_load_dwordx4", SReg_128, SReg_128
311 defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <
312 "s_buffer_load_dwordx8", SReg_128, SReg_256
315 defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <
316 "s_buffer_load_dwordx16", SReg_128, SReg_512
320 let SubtargetPredicate = HasScalarStores in {
321 defm S_STORE_DWORD : SM_Pseudo_Stores <"s_store_dword", SReg_64, SReg_32_XM0_XEXEC>;
322 defm S_STORE_DWORDX2 : SM_Pseudo_Stores <"s_store_dwordx2", SReg_64, SReg_64_XEXEC>;
323 defm S_STORE_DWORDX4 : SM_Pseudo_Stores <"s_store_dwordx4", SReg_64, SReg_128>;
325 let is_buffer = 1 in {
326 defm S_BUFFER_STORE_DWORD : SM_Pseudo_Stores <
327 "s_buffer_store_dword", SReg_128, SReg_32_XM0_XEXEC
330 defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores <
331 "s_buffer_store_dwordx2", SReg_128, SReg_64_XEXEC
334 defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores <
335 "s_buffer_store_dwordx4", SReg_128, SReg_128
338 } // End SubtargetPredicate = HasScalarStores
340 let SubtargetPredicate = HasSMemTimeInst in
341 def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>;
342 def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>;
344 let SubtargetPredicate = isGFX7GFX8GFX9 in {
345 def S_DCACHE_INV_VOL : SM_Inval_Pseudo <"s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>;
346 } // let SubtargetPredicate = isGFX7GFX8GFX9
348 let SubtargetPredicate = isGFX8Plus in {
349 let OtherPredicates = [HasScalarStores] in {
350 def S_DCACHE_WB : SM_Inval_Pseudo <"s_dcache_wb", int_amdgcn_s_dcache_wb>;
351 def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>;
352 } // End OtherPredicates = [HasScalarStores]
354 defm S_ATC_PROBE : SM_Pseudo_Probe <"s_atc_probe", SReg_64>;
355 let is_buffer = 1 in {
356 defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <"s_atc_probe_buffer", SReg_128>;
358 } // SubtargetPredicate = isGFX8Plus
360 let SubtargetPredicate = HasSMemRealTime in
361 def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>;
363 let SubtargetPredicate = isGFX10Plus in
364 def S_GL1_INV : SM_Inval_Pseudo<"s_gl1_inv">;
365 let SubtargetPredicate = HasGetWaveIdInst in
366 def S_GET_WAVEID_IN_WORKGROUP : SM_WaveId_Pseudo <"s_get_waveid_in_workgroup", int_amdgcn_s_get_waveid_in_workgroup>;
369 let SubtargetPredicate = HasScalarFlatScratchInsts, Uses = [FLAT_SCR] in {
370 defm S_SCRATCH_LOAD_DWORD : SM_Pseudo_Loads <"s_scratch_load_dword", SReg_64, SReg_32_XM0_XEXEC>;
371 defm S_SCRATCH_LOAD_DWORDX2 : SM_Pseudo_Loads <"s_scratch_load_dwordx2", SReg_64, SReg_64_XEXEC>;
372 defm S_SCRATCH_LOAD_DWORDX4 : SM_Pseudo_Loads <"s_scratch_load_dwordx4", SReg_64, SReg_128>;
374 defm S_SCRATCH_STORE_DWORD : SM_Pseudo_Stores <"s_scratch_store_dword", SReg_64, SReg_32_XM0_XEXEC>;
375 defm S_SCRATCH_STORE_DWORDX2 : SM_Pseudo_Stores <"s_scratch_store_dwordx2", SReg_64, SReg_64_XEXEC>;
376 defm S_SCRATCH_STORE_DWORDX4 : SM_Pseudo_Stores <"s_scratch_store_dwordx4", SReg_64, SReg_128>;
377 } // SubtargetPredicate = HasScalarFlatScratchInsts
379 let SubtargetPredicate = HasScalarAtomics in {
381 let is_buffer = 1 in {
382 defm S_BUFFER_ATOMIC_SWAP : SM_Pseudo_Atomics <"s_buffer_atomic_swap", SReg_128, SReg_32_XM0_XEXEC>;
383 defm S_BUFFER_ATOMIC_CMPSWAP : SM_Pseudo_Atomics <"s_buffer_atomic_cmpswap", SReg_128, SReg_64_XEXEC>;
384 defm S_BUFFER_ATOMIC_ADD : SM_Pseudo_Atomics <"s_buffer_atomic_add", SReg_128, SReg_32_XM0_XEXEC>;
385 defm S_BUFFER_ATOMIC_SUB : SM_Pseudo_Atomics <"s_buffer_atomic_sub", SReg_128, SReg_32_XM0_XEXEC>;
386 defm S_BUFFER_ATOMIC_SMIN : SM_Pseudo_Atomics <"s_buffer_atomic_smin", SReg_128, SReg_32_XM0_XEXEC>;
387 defm S_BUFFER_ATOMIC_UMIN : SM_Pseudo_Atomics <"s_buffer_atomic_umin", SReg_128, SReg_32_XM0_XEXEC>;
388 defm S_BUFFER_ATOMIC_SMAX : SM_Pseudo_Atomics <"s_buffer_atomic_smax", SReg_128, SReg_32_XM0_XEXEC>;
389 defm S_BUFFER_ATOMIC_UMAX : SM_Pseudo_Atomics <"s_buffer_atomic_umax", SReg_128, SReg_32_XM0_XEXEC>;
390 defm S_BUFFER_ATOMIC_AND : SM_Pseudo_Atomics <"s_buffer_atomic_and", SReg_128, SReg_32_XM0_XEXEC>;
391 defm S_BUFFER_ATOMIC_OR : SM_Pseudo_Atomics <"s_buffer_atomic_or", SReg_128, SReg_32_XM0_XEXEC>;
392 defm S_BUFFER_ATOMIC_XOR : SM_Pseudo_Atomics <"s_buffer_atomic_xor", SReg_128, SReg_32_XM0_XEXEC>;
393 defm S_BUFFER_ATOMIC_INC : SM_Pseudo_Atomics <"s_buffer_atomic_inc", SReg_128, SReg_32_XM0_XEXEC>;
394 defm S_BUFFER_ATOMIC_DEC : SM_Pseudo_Atomics <"s_buffer_atomic_dec", SReg_128, SReg_32_XM0_XEXEC>;
396 defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_swap_x2", SReg_128, SReg_64_XEXEC>;
397 defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_cmpswap_x2", SReg_128, SReg_128>;
398 defm S_BUFFER_ATOMIC_ADD_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_add_x2", SReg_128, SReg_64_XEXEC>;
399 defm S_BUFFER_ATOMIC_SUB_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_sub_x2", SReg_128, SReg_64_XEXEC>;
400 defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_smin_x2", SReg_128, SReg_64_XEXEC>;
401 defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_umin_x2", SReg_128, SReg_64_XEXEC>;
402 defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_smax_x2", SReg_128, SReg_64_XEXEC>;
403 defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_umax_x2", SReg_128, SReg_64_XEXEC>;
404 defm S_BUFFER_ATOMIC_AND_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_and_x2", SReg_128, SReg_64_XEXEC>;
405 defm S_BUFFER_ATOMIC_OR_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_or_x2", SReg_128, SReg_64_XEXEC>;
406 defm S_BUFFER_ATOMIC_XOR_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_xor_x2", SReg_128, SReg_64_XEXEC>;
407 defm S_BUFFER_ATOMIC_INC_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_inc_x2", SReg_128, SReg_64_XEXEC>;
408 defm S_BUFFER_ATOMIC_DEC_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_dec_x2", SReg_128, SReg_64_XEXEC>;
411 defm S_ATOMIC_SWAP : SM_Pseudo_Atomics <"s_atomic_swap", SReg_64, SReg_32_XM0_XEXEC>;
412 defm S_ATOMIC_CMPSWAP : SM_Pseudo_Atomics <"s_atomic_cmpswap", SReg_64, SReg_64_XEXEC>;
413 defm S_ATOMIC_ADD : SM_Pseudo_Atomics <"s_atomic_add", SReg_64, SReg_32_XM0_XEXEC>;
414 defm S_ATOMIC_SUB : SM_Pseudo_Atomics <"s_atomic_sub", SReg_64, SReg_32_XM0_XEXEC>;
415 defm S_ATOMIC_SMIN : SM_Pseudo_Atomics <"s_atomic_smin", SReg_64, SReg_32_XM0_XEXEC>;
416 defm S_ATOMIC_UMIN : SM_Pseudo_Atomics <"s_atomic_umin", SReg_64, SReg_32_XM0_XEXEC>;
417 defm S_ATOMIC_SMAX : SM_Pseudo_Atomics <"s_atomic_smax", SReg_64, SReg_32_XM0_XEXEC>;
418 defm S_ATOMIC_UMAX : SM_Pseudo_Atomics <"s_atomic_umax", SReg_64, SReg_32_XM0_XEXEC>;
419 defm S_ATOMIC_AND : SM_Pseudo_Atomics <"s_atomic_and", SReg_64, SReg_32_XM0_XEXEC>;
420 defm S_ATOMIC_OR : SM_Pseudo_Atomics <"s_atomic_or", SReg_64, SReg_32_XM0_XEXEC>;
421 defm S_ATOMIC_XOR : SM_Pseudo_Atomics <"s_atomic_xor", SReg_64, SReg_32_XM0_XEXEC>;
422 defm S_ATOMIC_INC : SM_Pseudo_Atomics <"s_atomic_inc", SReg_64, SReg_32_XM0_XEXEC>;
423 defm S_ATOMIC_DEC : SM_Pseudo_Atomics <"s_atomic_dec", SReg_64, SReg_32_XM0_XEXEC>;
425 defm S_ATOMIC_SWAP_X2 : SM_Pseudo_Atomics <"s_atomic_swap_x2", SReg_64, SReg_64_XEXEC>;
426 defm S_ATOMIC_CMPSWAP_X2 : SM_Pseudo_Atomics <"s_atomic_cmpswap_x2", SReg_64, SReg_128>;
427 defm S_ATOMIC_ADD_X2 : SM_Pseudo_Atomics <"s_atomic_add_x2", SReg_64, SReg_64_XEXEC>;
428 defm S_ATOMIC_SUB_X2 : SM_Pseudo_Atomics <"s_atomic_sub_x2", SReg_64, SReg_64_XEXEC>;
429 defm S_ATOMIC_SMIN_X2 : SM_Pseudo_Atomics <"s_atomic_smin_x2", SReg_64, SReg_64_XEXEC>;
430 defm S_ATOMIC_UMIN_X2 : SM_Pseudo_Atomics <"s_atomic_umin_x2", SReg_64, SReg_64_XEXEC>;
431 defm S_ATOMIC_SMAX_X2 : SM_Pseudo_Atomics <"s_atomic_smax_x2", SReg_64, SReg_64_XEXEC>;
432 defm S_ATOMIC_UMAX_X2 : SM_Pseudo_Atomics <"s_atomic_umax_x2", SReg_64, SReg_64_XEXEC>;
433 defm S_ATOMIC_AND_X2 : SM_Pseudo_Atomics <"s_atomic_and_x2", SReg_64, SReg_64_XEXEC>;
434 defm S_ATOMIC_OR_X2 : SM_Pseudo_Atomics <"s_atomic_or_x2", SReg_64, SReg_64_XEXEC>;
435 defm S_ATOMIC_XOR_X2 : SM_Pseudo_Atomics <"s_atomic_xor_x2", SReg_64, SReg_64_XEXEC>;
436 defm S_ATOMIC_INC_X2 : SM_Pseudo_Atomics <"s_atomic_inc_x2", SReg_64, SReg_64_XEXEC>;
437 defm S_ATOMIC_DEC_X2 : SM_Pseudo_Atomics <"s_atomic_dec_x2", SReg_64, SReg_64_XEXEC>;
439 } // let SubtargetPredicate = HasScalarAtomics
441 let SubtargetPredicate = HasScalarAtomics in {
442 defm S_DCACHE_DISCARD : SM_Pseudo_Discards <"s_dcache_discard">;
443 defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards <"s_dcache_discard_x2">;
446 //===----------------------------------------------------------------------===//
448 //===----------------------------------------------------------------------===//
450 //===----------------------------------------------------------------------===//
452 //===----------------------------------------------------------------------===//
454 class SMRD_Real_si <bits<5> op, SM_Pseudo ps>
456 , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI>
459 let AssemblerPredicate = isGFX6GFX7;
460 let DecoderNamespace = "GFX6GFX7";
462 let Inst{7-0} = !if(ps.has_offset, offset{7-0}, ?);
464 let Inst{14-9} = !if(ps.has_sbase, sbase{6-1}, ?);
465 let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?);
466 let Inst{26-22} = op;
467 let Inst{31-27} = 0x18; //encoding
470 // FIXME: Assembler should reject trying to use glc on SMRD
471 // instructions on SI.
472 multiclass SM_Real_Loads_si<bits<5> op, string ps,
473 SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
474 SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
476 def _IMM_si : SMRD_Real_si <op, immPs> {
477 let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, CPol:$cpol);
480 // FIXME: The operand name $offset is inconsistent with $soff used
482 def _SGPR_si : SMRD_Real_si <op, sgprPs> {
483 let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
488 defm S_LOAD_DWORD : SM_Real_Loads_si <0x00, "S_LOAD_DWORD">;
489 defm S_LOAD_DWORDX2 : SM_Real_Loads_si <0x01, "S_LOAD_DWORDX2">;
490 defm S_LOAD_DWORDX4 : SM_Real_Loads_si <0x02, "S_LOAD_DWORDX4">;
491 defm S_LOAD_DWORDX8 : SM_Real_Loads_si <0x03, "S_LOAD_DWORDX8">;
492 defm S_LOAD_DWORDX16 : SM_Real_Loads_si <0x04, "S_LOAD_DWORDX16">;
493 defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_si <0x08, "S_BUFFER_LOAD_DWORD">;
494 defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_si <0x09, "S_BUFFER_LOAD_DWORDX2">;
495 defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_si <0x0a, "S_BUFFER_LOAD_DWORDX4">;
496 defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_si <0x0b, "S_BUFFER_LOAD_DWORDX8">;
497 defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_si <0x0c, "S_BUFFER_LOAD_DWORDX16">;
499 def S_MEMTIME_si : SMRD_Real_si <0x1e, S_MEMTIME>;
500 def S_DCACHE_INV_si : SMRD_Real_si <0x1f, S_DCACHE_INV>;
503 //===----------------------------------------------------------------------===//
505 //===----------------------------------------------------------------------===//
507 class SMEM_Real_vi <bits<8> op, SM_Pseudo ps>
509 , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI>
511 let AssemblerPredicate = isGFX8GFX9;
512 let DecoderNamespace = "GFX8";
514 let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
515 let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?);
517 let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?);
519 let Inst{25-18} = op;
520 let Inst{31-26} = 0x30; //encoding
522 // VI supports 20-bit unsigned offsets while GFX9+ supports 21-bit signed.
523 // Offset value is corrected accordingly when offset is encoded/decoded.
524 let Inst{38-32} = !if(ps.has_offset, offset{6-0}, ?);
525 let Inst{52-39} = !if(ps.has_offset, !if(imm, offset{20-7}, ?), ?);
528 multiclass SM_Real_Loads_vi<bits<8> op, string ps,
529 SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
530 SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
531 def _IMM_vi : SMEM_Real_vi <op, immPs> {
532 let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol);
534 def _SGPR_vi : SMEM_Real_vi <op, sgprPs> {
535 let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
539 class SMEM_Real_Store_vi <bits<8> op, SM_Pseudo ps> : SMEM_Real_vi <op, ps> {
544 let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?);
547 multiclass SM_Real_Stores_vi<bits<8> op, string ps,
548 SM_Store_Pseudo immPs = !cast<SM_Store_Pseudo>(ps#_IMM),
549 SM_Store_Pseudo sgprPs = !cast<SM_Store_Pseudo>(ps#_SGPR)> {
550 // FIXME: The operand name $offset is inconsistent with $soff used
552 def _IMM_vi : SMEM_Real_Store_vi <op, immPs> {
553 let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol);
556 def _SGPR_vi : SMEM_Real_Store_vi <op, sgprPs> {
557 let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
561 multiclass SM_Real_Probe_vi<bits<8> op, string ps> {
562 def _IMM_vi : SMEM_Real_Store_vi <op, !cast<SM_Probe_Pseudo>(ps#_IMM)>;
563 def _SGPR_vi : SMEM_Real_Store_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>;
566 defm S_LOAD_DWORD : SM_Real_Loads_vi <0x00, "S_LOAD_DWORD">;
567 defm S_LOAD_DWORDX2 : SM_Real_Loads_vi <0x01, "S_LOAD_DWORDX2">;
568 defm S_LOAD_DWORDX4 : SM_Real_Loads_vi <0x02, "S_LOAD_DWORDX4">;
569 defm S_LOAD_DWORDX8 : SM_Real_Loads_vi <0x03, "S_LOAD_DWORDX8">;
570 defm S_LOAD_DWORDX16 : SM_Real_Loads_vi <0x04, "S_LOAD_DWORDX16">;
571 defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_vi <0x08, "S_BUFFER_LOAD_DWORD">;
572 defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_vi <0x09, "S_BUFFER_LOAD_DWORDX2">;
573 defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_vi <0x0a, "S_BUFFER_LOAD_DWORDX4">;
574 defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_vi <0x0b, "S_BUFFER_LOAD_DWORDX8">;
575 defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_vi <0x0c, "S_BUFFER_LOAD_DWORDX16">;
577 defm S_STORE_DWORD : SM_Real_Stores_vi <0x10, "S_STORE_DWORD">;
578 defm S_STORE_DWORDX2 : SM_Real_Stores_vi <0x11, "S_STORE_DWORDX2">;
579 defm S_STORE_DWORDX4 : SM_Real_Stores_vi <0x12, "S_STORE_DWORDX4">;
581 defm S_BUFFER_STORE_DWORD : SM_Real_Stores_vi <0x18, "S_BUFFER_STORE_DWORD">;
582 defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_vi <0x19, "S_BUFFER_STORE_DWORDX2">;
583 defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_vi <0x1a, "S_BUFFER_STORE_DWORDX4">;
585 // These instructions use same encoding
586 def S_DCACHE_INV_vi : SMEM_Real_vi <0x20, S_DCACHE_INV>;
587 def S_DCACHE_WB_vi : SMEM_Real_vi <0x21, S_DCACHE_WB>;
588 def S_DCACHE_INV_VOL_vi : SMEM_Real_vi <0x22, S_DCACHE_INV_VOL>;
589 def S_DCACHE_WB_VOL_vi : SMEM_Real_vi <0x23, S_DCACHE_WB_VOL>;
590 def S_MEMTIME_vi : SMEM_Real_vi <0x24, S_MEMTIME>;
591 def S_MEMREALTIME_vi : SMEM_Real_vi <0x25, S_MEMREALTIME>;
593 defm S_SCRATCH_LOAD_DWORD : SM_Real_Loads_vi <0x05, "S_SCRATCH_LOAD_DWORD">;
594 defm S_SCRATCH_LOAD_DWORDX2 : SM_Real_Loads_vi <0x06, "S_SCRATCH_LOAD_DWORDX2">;
595 defm S_SCRATCH_LOAD_DWORDX4 : SM_Real_Loads_vi <0x07, "S_SCRATCH_LOAD_DWORDX4">;
597 defm S_SCRATCH_STORE_DWORD : SM_Real_Stores_vi <0x15, "S_SCRATCH_STORE_DWORD">;
598 defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_vi <0x16, "S_SCRATCH_STORE_DWORDX2">;
599 defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_vi <0x17, "S_SCRATCH_STORE_DWORDX4">;
601 defm S_ATC_PROBE : SM_Real_Probe_vi <0x26, "S_ATC_PROBE">;
602 defm S_ATC_PROBE_BUFFER : SM_Real_Probe_vi <0x27, "S_ATC_PROBE_BUFFER">;
604 //===----------------------------------------------------------------------===//
606 //===----------------------------------------------------------------------===//
608 class SMEM_Atomic_Real_vi <bits<8> op, SM_Atomic_Pseudo ps>
609 : SMEM_Real_vi <op, ps>,
610 AtomicNoRet <!subst("_RTN","",NAME), ps.glc> {
614 let Constraints = ps.Constraints;
615 let DisableEncoding = ps.DisableEncoding;
617 let cpol{CPolBit.GLC} = ps.glc;
618 let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0});
621 multiclass SM_Real_Atomics_vi<bits<8> op, string ps> {
622 def _IMM_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
623 def _SGPR_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
624 def _IMM_RTN_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
625 def _SGPR_RTN_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
628 defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_vi <0x40, "S_BUFFER_ATOMIC_SWAP">;
629 defm S_BUFFER_ATOMIC_CMPSWAP : SM_Real_Atomics_vi <0x41, "S_BUFFER_ATOMIC_CMPSWAP">;
630 defm S_BUFFER_ATOMIC_ADD : SM_Real_Atomics_vi <0x42, "S_BUFFER_ATOMIC_ADD">;
631 defm S_BUFFER_ATOMIC_SUB : SM_Real_Atomics_vi <0x43, "S_BUFFER_ATOMIC_SUB">;
632 defm S_BUFFER_ATOMIC_SMIN : SM_Real_Atomics_vi <0x44, "S_BUFFER_ATOMIC_SMIN">;
633 defm S_BUFFER_ATOMIC_UMIN : SM_Real_Atomics_vi <0x45, "S_BUFFER_ATOMIC_UMIN">;
634 defm S_BUFFER_ATOMIC_SMAX : SM_Real_Atomics_vi <0x46, "S_BUFFER_ATOMIC_SMAX">;
635 defm S_BUFFER_ATOMIC_UMAX : SM_Real_Atomics_vi <0x47, "S_BUFFER_ATOMIC_UMAX">;
636 defm S_BUFFER_ATOMIC_AND : SM_Real_Atomics_vi <0x48, "S_BUFFER_ATOMIC_AND">;
637 defm S_BUFFER_ATOMIC_OR : SM_Real_Atomics_vi <0x49, "S_BUFFER_ATOMIC_OR">;
638 defm S_BUFFER_ATOMIC_XOR : SM_Real_Atomics_vi <0x4a, "S_BUFFER_ATOMIC_XOR">;
639 defm S_BUFFER_ATOMIC_INC : SM_Real_Atomics_vi <0x4b, "S_BUFFER_ATOMIC_INC">;
640 defm S_BUFFER_ATOMIC_DEC : SM_Real_Atomics_vi <0x4c, "S_BUFFER_ATOMIC_DEC">;
642 defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Real_Atomics_vi <0x60, "S_BUFFER_ATOMIC_SWAP_X2">;
643 defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_vi <0x61, "S_BUFFER_ATOMIC_CMPSWAP_X2">;
644 defm S_BUFFER_ATOMIC_ADD_X2 : SM_Real_Atomics_vi <0x62, "S_BUFFER_ATOMIC_ADD_X2">;
645 defm S_BUFFER_ATOMIC_SUB_X2 : SM_Real_Atomics_vi <0x63, "S_BUFFER_ATOMIC_SUB_X2">;
646 defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Real_Atomics_vi <0x64, "S_BUFFER_ATOMIC_SMIN_X2">;
647 defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Real_Atomics_vi <0x65, "S_BUFFER_ATOMIC_UMIN_X2">;
648 defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Real_Atomics_vi <0x66, "S_BUFFER_ATOMIC_SMAX_X2">;
649 defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Real_Atomics_vi <0x67, "S_BUFFER_ATOMIC_UMAX_X2">;
650 defm S_BUFFER_ATOMIC_AND_X2 : SM_Real_Atomics_vi <0x68, "S_BUFFER_ATOMIC_AND_X2">;
651 defm S_BUFFER_ATOMIC_OR_X2 : SM_Real_Atomics_vi <0x69, "S_BUFFER_ATOMIC_OR_X2">;
652 defm S_BUFFER_ATOMIC_XOR_X2 : SM_Real_Atomics_vi <0x6a, "S_BUFFER_ATOMIC_XOR_X2">;
653 defm S_BUFFER_ATOMIC_INC_X2 : SM_Real_Atomics_vi <0x6b, "S_BUFFER_ATOMIC_INC_X2">;
654 defm S_BUFFER_ATOMIC_DEC_X2 : SM_Real_Atomics_vi <0x6c, "S_BUFFER_ATOMIC_DEC_X2">;
656 defm S_ATOMIC_SWAP : SM_Real_Atomics_vi <0x80, "S_ATOMIC_SWAP">;
657 defm S_ATOMIC_CMPSWAP : SM_Real_Atomics_vi <0x81, "S_ATOMIC_CMPSWAP">;
658 defm S_ATOMIC_ADD : SM_Real_Atomics_vi <0x82, "S_ATOMIC_ADD">;
659 defm S_ATOMIC_SUB : SM_Real_Atomics_vi <0x83, "S_ATOMIC_SUB">;
660 defm S_ATOMIC_SMIN : SM_Real_Atomics_vi <0x84, "S_ATOMIC_SMIN">;
661 defm S_ATOMIC_UMIN : SM_Real_Atomics_vi <0x85, "S_ATOMIC_UMIN">;
662 defm S_ATOMIC_SMAX : SM_Real_Atomics_vi <0x86, "S_ATOMIC_SMAX">;
663 defm S_ATOMIC_UMAX : SM_Real_Atomics_vi <0x87, "S_ATOMIC_UMAX">;
664 defm S_ATOMIC_AND : SM_Real_Atomics_vi <0x88, "S_ATOMIC_AND">;
665 defm S_ATOMIC_OR : SM_Real_Atomics_vi <0x89, "S_ATOMIC_OR">;
666 defm S_ATOMIC_XOR : SM_Real_Atomics_vi <0x8a, "S_ATOMIC_XOR">;
667 defm S_ATOMIC_INC : SM_Real_Atomics_vi <0x8b, "S_ATOMIC_INC">;
668 defm S_ATOMIC_DEC : SM_Real_Atomics_vi <0x8c, "S_ATOMIC_DEC">;
670 defm S_ATOMIC_SWAP_X2 : SM_Real_Atomics_vi <0xa0, "S_ATOMIC_SWAP_X2">;
671 defm S_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_vi <0xa1, "S_ATOMIC_CMPSWAP_X2">;
672 defm S_ATOMIC_ADD_X2 : SM_Real_Atomics_vi <0xa2, "S_ATOMIC_ADD_X2">;
673 defm S_ATOMIC_SUB_X2 : SM_Real_Atomics_vi <0xa3, "S_ATOMIC_SUB_X2">;
674 defm S_ATOMIC_SMIN_X2 : SM_Real_Atomics_vi <0xa4, "S_ATOMIC_SMIN_X2">;
675 defm S_ATOMIC_UMIN_X2 : SM_Real_Atomics_vi <0xa5, "S_ATOMIC_UMIN_X2">;
676 defm S_ATOMIC_SMAX_X2 : SM_Real_Atomics_vi <0xa6, "S_ATOMIC_SMAX_X2">;
677 defm S_ATOMIC_UMAX_X2 : SM_Real_Atomics_vi <0xa7, "S_ATOMIC_UMAX_X2">;
678 defm S_ATOMIC_AND_X2 : SM_Real_Atomics_vi <0xa8, "S_ATOMIC_AND_X2">;
679 defm S_ATOMIC_OR_X2 : SM_Real_Atomics_vi <0xa9, "S_ATOMIC_OR_X2">;
680 defm S_ATOMIC_XOR_X2 : SM_Real_Atomics_vi <0xaa, "S_ATOMIC_XOR_X2">;
681 defm S_ATOMIC_INC_X2 : SM_Real_Atomics_vi <0xab, "S_ATOMIC_INC_X2">;
682 defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_vi <0xac, "S_ATOMIC_DEC_X2">;
684 multiclass SM_Real_Discard_vi<bits<8> op, string ps> {
685 def _IMM_vi : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_IMM)>;
686 def _SGPR_vi : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR)>;
689 defm S_DCACHE_DISCARD : SM_Real_Discard_vi <0x28, "S_DCACHE_DISCARD">;
690 defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_vi <0x29, "S_DCACHE_DISCARD_X2">;
692 //===----------------------------------------------------------------------===//
694 //===----------------------------------------------------------------------===//
696 def smrd_literal_offset : NamedOperandU32<"SMRDLiteralOffset",
697 NamedMatchClass<"SMRDLiteralOffset">> {
698 let OperandType = "OPERAND_IMMEDIATE";
701 class SMRD_Real_Load_IMM_ci <bits<5> op, SM_Load_Pseudo ps> :
705 let AssemblerPredicate = isGFX7Only;
706 let DecoderNamespace = "GFX7";
707 let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, CPol:$cpol);
709 let Inst{7-0} = 0xff;
711 let Inst{14-9} = sbase{6-1};
712 let Inst{21-15} = sdst{6-0};
713 let Inst{26-22} = op;
714 let Inst{31-27} = 0x18; //encoding
715 let Inst{63-32} = offset{31-0};
718 def S_LOAD_DWORD_IMM_ci : SMRD_Real_Load_IMM_ci <0x00, S_LOAD_DWORD_IMM>;
719 def S_LOAD_DWORDX2_IMM_ci : SMRD_Real_Load_IMM_ci <0x01, S_LOAD_DWORDX2_IMM>;
720 def S_LOAD_DWORDX4_IMM_ci : SMRD_Real_Load_IMM_ci <0x02, S_LOAD_DWORDX4_IMM>;
721 def S_LOAD_DWORDX8_IMM_ci : SMRD_Real_Load_IMM_ci <0x03, S_LOAD_DWORDX8_IMM>;
722 def S_LOAD_DWORDX16_IMM_ci : SMRD_Real_Load_IMM_ci <0x04, S_LOAD_DWORDX16_IMM>;
723 def S_BUFFER_LOAD_DWORD_IMM_ci : SMRD_Real_Load_IMM_ci <0x08, S_BUFFER_LOAD_DWORD_IMM>;
724 def S_BUFFER_LOAD_DWORDX2_IMM_ci : SMRD_Real_Load_IMM_ci <0x09, S_BUFFER_LOAD_DWORDX2_IMM>;
725 def S_BUFFER_LOAD_DWORDX4_IMM_ci : SMRD_Real_Load_IMM_ci <0x0a, S_BUFFER_LOAD_DWORDX4_IMM>;
726 def S_BUFFER_LOAD_DWORDX8_IMM_ci : SMRD_Real_Load_IMM_ci <0x0b, S_BUFFER_LOAD_DWORDX8_IMM>;
727 def S_BUFFER_LOAD_DWORDX16_IMM_ci : SMRD_Real_Load_IMM_ci <0x0c, S_BUFFER_LOAD_DWORDX16_IMM>;
729 class SMRD_Real_ci <bits<5> op, SM_Pseudo ps>
731 , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI>
734 let AssemblerPredicate = isGFX7Only;
735 let DecoderNamespace = "GFX7";
737 let Inst{7-0} = !if(ps.has_offset, offset{7-0}, ?);
739 let Inst{14-9} = !if(ps.has_sbase, sbase{6-1}, ?);
740 let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?);
741 let Inst{26-22} = op;
742 let Inst{31-27} = 0x18; //encoding
745 def S_DCACHE_INV_VOL_ci : SMRD_Real_ci <0x1d, S_DCACHE_INV_VOL>;
747 //===----------------------------------------------------------------------===//
748 // Scalar Memory Patterns
749 //===----------------------------------------------------------------------===//
751 def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isUniformLoad(N);}]> {
752 let GISelPredicateCode = [{
753 if (!MI.hasOneMemOperand())
755 if (!isInstrUniform(MI))
758 // FIXME: We should probably be caching this.
759 SmallVector<GEPInfo, 4> AddrInfo;
760 getAddrModeInfo(MI, MRI, AddrInfo);
762 if (hasVgprParts(AddrInfo))
768 def SMRDImm : ComplexPattern<i64, 2, "SelectSMRDImm">;
769 def SMRDImm32 : ComplexPattern<i64, 2, "SelectSMRDImm32">;
770 def SMRDSgpr : ComplexPattern<i64, 2, "SelectSMRDSgpr">;
771 def SMRDBufferImm : ComplexPattern<i32, 1, "SelectSMRDBufferImm">;
772 def SMRDBufferImm32 : ComplexPattern<i32, 1, "SelectSMRDBufferImm32">;
774 multiclass SMRD_Pattern <string Instr, ValueType vt> {
778 (smrd_load (SMRDImm i64:$sbase, i32:$offset)),
779 (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))
782 // 2. 32-bit IMM offset on CI
784 (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
785 (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0))> {
786 let OtherPredicates = [isGFX7Only];
791 (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
792 (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0))
797 (vt (smrd_load (i64 SReg_64:$sbase))),
798 (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0))
802 multiclass SMLoad_Pattern <string Instr, ValueType vt> {
803 // 1. Offset as an immediate
805 (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy),
806 (vt (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_cpol $cachepolicy)))> {
807 let AddedComplexity = 2;
810 // 2. 32-bit IMM offset on CI
812 (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), timm:$cachepolicy)),
813 (!cast<InstSI>(Instr#"_IMM_ci") SReg_128:$sbase, smrd_literal_offset:$offset,
814 (extract_cpol $cachepolicy))> {
815 let OtherPredicates = [isGFX7Only];
816 let AddedComplexity = 1;
819 // 3. Offset loaded in an 32bit SGPR
821 (SIsbuffer_load v4i32:$sbase, i32:$offset, timm:$cachepolicy),
822 (vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$offset, (extract_cpol $cachepolicy)))
826 // Global and constant loads can be selected to either MUBUF or SMRD
827 // instructions, but SMRD instructions are faster so we want the instruction
828 // selector to prefer those.
829 let AddedComplexity = 100 in {
831 foreach vt = Reg32Types.types in {
832 defm : SMRD_Pattern <"S_LOAD_DWORD", vt>;
835 foreach vt = SReg_64.RegTypes in {
836 defm : SMRD_Pattern <"S_LOAD_DWORDX2", vt>;
839 foreach vt = SReg_128.RegTypes in {
840 defm : SMRD_Pattern <"S_LOAD_DWORDX4", vt>;
843 foreach vt = SReg_256.RegTypes in {
844 defm : SMRD_Pattern <"S_LOAD_DWORDX8", vt>;
847 foreach vt = SReg_512.RegTypes in {
848 defm : SMRD_Pattern <"S_LOAD_DWORDX16", vt>;
851 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", i32>;
852 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2i32>;
853 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4i32>;
854 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8i32>;
855 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16i32>;
857 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", f32>;
858 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2f32>;
859 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4f32>;
860 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8f32>;
861 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16f32>;
862 } // End let AddedComplexity = 100
864 let OtherPredicates = [HasSMemTimeInst] in {
866 (i64 (readcyclecounter)),
869 } // let OtherPredicates = [HasSMemTimeInst]
871 let OtherPredicates = [HasShaderCyclesRegister] in {
873 (i64 (readcyclecounter)),
874 (REG_SEQUENCE SReg_64,
875 (S_GETREG_B32 getHwRegImm<HWREG.SHADER_CYCLES, 0, -12>.ret), sub0,
876 (S_MOV_B32 (i32 0)), sub1)> {
877 // Prefer this to s_memtime because it has lower and more predictable latency.
878 let AddedComplexity = 1;
880 } // let OtherPredicates = [HasShaderCyclesRegister]
882 //===----------------------------------------------------------------------===//
884 //===----------------------------------------------------------------------===//
886 class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps> :
887 SM_Real<ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10>, Enc64 {
888 let AssemblerPredicate = isGFX10Plus;
889 let DecoderNamespace = "GFX10";
891 let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
892 let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?);
893 let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ?);
894 let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?);
895 let Inst{25-18} = op;
896 let Inst{31-26} = 0x3d;
897 let Inst{52-32} = !if(ps.offset_is_imm, !if(ps.has_offset, offset{20-0}, ?), ?);
898 let Inst{63-57} = !if(ps.offset_is_imm, !cast<int>(SGPR_NULL.HWEncoding),
899 !if(ps.has_offset, offset{6-0}, ?));
902 multiclass SM_Real_Loads_gfx10<bits<8> op, string ps,
903 SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
904 SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
905 def _IMM_gfx10 : SMEM_Real_gfx10<op, immPs> {
906 let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol);
908 def _SGPR_gfx10 : SMEM_Real_gfx10<op, sgprPs> {
909 let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
913 class SMEM_Real_Store_gfx10<bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx10<op, ps> {
917 let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?);
920 multiclass SM_Real_Stores_gfx10<bits<8> op, string ps,
921 SM_Store_Pseudo immPs = !cast<SM_Store_Pseudo>(ps#_IMM),
922 SM_Store_Pseudo sgprPs = !cast<SM_Store_Pseudo>(ps#_SGPR)> {
923 // FIXME: The operand name $offset is inconsistent with $soff used
925 def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, immPs> {
926 let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol);
929 def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs> {
930 let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
934 defm S_LOAD_DWORD : SM_Real_Loads_gfx10<0x000, "S_LOAD_DWORD">;
935 defm S_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x001, "S_LOAD_DWORDX2">;
936 defm S_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x002, "S_LOAD_DWORDX4">;
937 defm S_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x003, "S_LOAD_DWORDX8">;
938 defm S_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x004, "S_LOAD_DWORDX16">;
940 let SubtargetPredicate = HasScalarFlatScratchInsts in {
941 defm S_SCRATCH_LOAD_DWORD : SM_Real_Loads_gfx10<0x005, "S_SCRATCH_LOAD_DWORD">;
942 defm S_SCRATCH_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x006, "S_SCRATCH_LOAD_DWORDX2">;
943 defm S_SCRATCH_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x007, "S_SCRATCH_LOAD_DWORDX4">;
944 } // End SubtargetPredicate = HasScalarFlatScratchInsts
946 defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_gfx10<0x008, "S_BUFFER_LOAD_DWORD">;
947 defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x009, "S_BUFFER_LOAD_DWORDX2">;
948 defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x00a, "S_BUFFER_LOAD_DWORDX4">;
949 defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x00b, "S_BUFFER_LOAD_DWORDX8">;
950 defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x00c, "S_BUFFER_LOAD_DWORDX16">;
952 let SubtargetPredicate = HasScalarStores in {
953 defm S_STORE_DWORD : SM_Real_Stores_gfx10<0x010, "S_STORE_DWORD">;
954 defm S_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x011, "S_STORE_DWORDX2">;
955 defm S_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x012, "S_STORE_DWORDX4">;
956 let OtherPredicates = [HasScalarFlatScratchInsts] in {
957 defm S_SCRATCH_STORE_DWORD : SM_Real_Stores_gfx10<0x015, "S_SCRATCH_STORE_DWORD">;
958 defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x016, "S_SCRATCH_STORE_DWORDX2">;
959 defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x017, "S_SCRATCH_STORE_DWORDX4">;
960 } // End OtherPredicates = [HasScalarFlatScratchInsts]
961 defm S_BUFFER_STORE_DWORD : SM_Real_Stores_gfx10<0x018, "S_BUFFER_STORE_DWORD">;
962 defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x019, "S_BUFFER_STORE_DWORDX2">;
963 defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x01a, "S_BUFFER_STORE_DWORDX4">;
964 } // End SubtargetPredicate = HasScalarStores
966 def S_MEMREALTIME_gfx10 : SMEM_Real_gfx10<0x025, S_MEMREALTIME>;
967 def S_MEMTIME_gfx10 : SMEM_Real_gfx10<0x024, S_MEMTIME>;
968 def S_GL1_INV_gfx10 : SMEM_Real_gfx10<0x01f, S_GL1_INV>;
969 def S_GET_WAVEID_IN_WORKGROUP_gfx10 : SMEM_Real_gfx10<0x02a, S_GET_WAVEID_IN_WORKGROUP>;
970 def S_DCACHE_INV_gfx10 : SMEM_Real_gfx10<0x020, S_DCACHE_INV>;
972 let SubtargetPredicate = HasScalarStores in {
973 def S_DCACHE_WB_gfx10 : SMEM_Real_gfx10<0x021, S_DCACHE_WB>;
974 } // End SubtargetPredicate = HasScalarStores
976 multiclass SM_Real_Probe_gfx10<bits<8> op, string ps> {
977 def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>;
978 def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>;
981 defm S_ATC_PROBE : SM_Real_Probe_gfx10 <0x26, "S_ATC_PROBE">;
982 defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx10 <0x27, "S_ATC_PROBE_BUFFER">;
984 class SMEM_Atomic_Real_gfx10 <bits<8> op, SM_Atomic_Pseudo ps>
985 : SMEM_Real_gfx10 <op, ps>,
986 AtomicNoRet <!subst("_RTN","",NAME), ps.glc> {
990 let Constraints = ps.Constraints;
991 let DisableEncoding = ps.DisableEncoding;
993 let cpol{CPolBit.GLC} = ps.glc;
995 let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0);
996 let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0});
999 multiclass SM_Real_Atomics_gfx10<bits<8> op, string ps> {
1000 def _IMM_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
1001 def _SGPR_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
1002 def _IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
1003 def _SGPR_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
1006 let SubtargetPredicate = HasScalarAtomics in {
1008 defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x40, "S_BUFFER_ATOMIC_SWAP">;
1009 defm S_BUFFER_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x41, "S_BUFFER_ATOMIC_CMPSWAP">;
1010 defm S_BUFFER_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x42, "S_BUFFER_ATOMIC_ADD">;
1011 defm S_BUFFER_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x43, "S_BUFFER_ATOMIC_SUB">;
1012 defm S_BUFFER_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x44, "S_BUFFER_ATOMIC_SMIN">;
1013 defm S_BUFFER_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x45, "S_BUFFER_ATOMIC_UMIN">;
1014 defm S_BUFFER_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x46, "S_BUFFER_ATOMIC_SMAX">;
1015 defm S_BUFFER_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x47, "S_BUFFER_ATOMIC_UMAX">;
1016 defm S_BUFFER_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x48, "S_BUFFER_ATOMIC_AND">;
1017 defm S_BUFFER_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x49, "S_BUFFER_ATOMIC_OR">;
1018 defm S_BUFFER_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x4a, "S_BUFFER_ATOMIC_XOR">;
1019 defm S_BUFFER_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x4b, "S_BUFFER_ATOMIC_INC">;
1020 defm S_BUFFER_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x4c, "S_BUFFER_ATOMIC_DEC">;
1022 defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0x60, "S_BUFFER_ATOMIC_SWAP_X2">;
1023 defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0x61, "S_BUFFER_ATOMIC_CMPSWAP_X2">;
1024 defm S_BUFFER_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0x62, "S_BUFFER_ATOMIC_ADD_X2">;
1025 defm S_BUFFER_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0x63, "S_BUFFER_ATOMIC_SUB_X2">;
1026 defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0x64, "S_BUFFER_ATOMIC_SMIN_X2">;
1027 defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0x65, "S_BUFFER_ATOMIC_UMIN_X2">;
1028 defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0x66, "S_BUFFER_ATOMIC_SMAX_X2">;
1029 defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0x67, "S_BUFFER_ATOMIC_UMAX_X2">;
1030 defm S_BUFFER_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0x68, "S_BUFFER_ATOMIC_AND_X2">;
1031 defm S_BUFFER_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0x69, "S_BUFFER_ATOMIC_OR_X2">;
1032 defm S_BUFFER_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0x6a, "S_BUFFER_ATOMIC_XOR_X2">;
1033 defm S_BUFFER_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0x6b, "S_BUFFER_ATOMIC_INC_X2">;
1034 defm S_BUFFER_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0x6c, "S_BUFFER_ATOMIC_DEC_X2">;
1036 defm S_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x80, "S_ATOMIC_SWAP">;
1037 defm S_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x81, "S_ATOMIC_CMPSWAP">;
1038 defm S_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x82, "S_ATOMIC_ADD">;
1039 defm S_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x83, "S_ATOMIC_SUB">;
1040 defm S_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x84, "S_ATOMIC_SMIN">;
1041 defm S_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x85, "S_ATOMIC_UMIN">;
1042 defm S_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x86, "S_ATOMIC_SMAX">;
1043 defm S_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x87, "S_ATOMIC_UMAX">;
1044 defm S_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x88, "S_ATOMIC_AND">;
1045 defm S_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x89, "S_ATOMIC_OR">;
1046 defm S_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x8a, "S_ATOMIC_XOR">;
1047 defm S_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x8b, "S_ATOMIC_INC">;
1048 defm S_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x8c, "S_ATOMIC_DEC">;
1050 defm S_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0xa0, "S_ATOMIC_SWAP_X2">;
1051 defm S_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0xa1, "S_ATOMIC_CMPSWAP_X2">;
1052 defm S_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0xa2, "S_ATOMIC_ADD_X2">;
1053 defm S_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0xa3, "S_ATOMIC_SUB_X2">;
1054 defm S_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0xa4, "S_ATOMIC_SMIN_X2">;
1055 defm S_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0xa5, "S_ATOMIC_UMIN_X2">;
1056 defm S_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0xa6, "S_ATOMIC_SMAX_X2">;
1057 defm S_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0xa7, "S_ATOMIC_UMAX_X2">;
1058 defm S_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0xa8, "S_ATOMIC_AND_X2">;
1059 defm S_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0xa9, "S_ATOMIC_OR_X2">;
1060 defm S_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0xaa, "S_ATOMIC_XOR_X2">;
1061 defm S_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0xab, "S_ATOMIC_INC_X2">;
1062 defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0xac, "S_ATOMIC_DEC_X2">;
1064 multiclass SM_Real_Discard_gfx10<bits<8> op, string ps> {
1065 def _IMM_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>;
1066 def _SGPR_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>;
1069 defm S_DCACHE_DISCARD : SM_Real_Discard_gfx10 <0x28, "S_DCACHE_DISCARD">;
1070 defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_gfx10 <0x29, "S_DCACHE_DISCARD_X2">;
1072 } // End SubtargetPredicate = HasScalarAtomics
1074 def SMInfoTable : GenericTable {
1075 let FilterClass = "SM_Real";
1076 let CppTypeName = "SMInfo";
1077 let Fields = ["Opcode", "is_buffer"];
1079 let PrimaryKey = ["Opcode"];
1080 let PrimaryKeyName = "getSMEMOpcodeHelper";