1 //===---- SMInstructions.td - Scalar Memory Instruction Defintions --------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 def smrd_offset_8 : NamedOperandU32<"SMRDOffset8",
10 NamedMatchClass<"SMRDOffset8">> {
11 let OperandType = "OPERAND_IMMEDIATE";
14 def smrd_offset_20 : NamedOperandU32<"SMRDOffset20",
15 NamedMatchClass<"SMRDOffset20">> {
16 let OperandType = "OPERAND_IMMEDIATE";
19 //===----------------------------------------------------------------------===//
20 // Scalar Memory classes
21 //===----------------------------------------------------------------------===//
23 class SM_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]> :
24 InstSI <outs, ins, "", pattern>,
25 SIMCInstr<opName, SIEncodingFamily.NONE> {
27 let isCodeGenOnly = 1;
33 let hasSideEffects = 0;
34 let UseNamedOperandTable = 1;
35 let SchedRW = [WriteSMEM];
37 string Mnemonic = opName;
38 string AsmOperands = asmOps;
40 bits<1> has_sbase = 1;
44 bits<1> has_offset = 1;
45 bits<1> offset_is_imm = 0;
48 class SM_Real <SM_Pseudo ps>
49 : InstSI<ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []> {
52 let isCodeGenOnly = 0;
54 // copy relevant pseudo op flags
55 let SubtargetPredicate = ps.SubtargetPredicate;
56 let AsmMatchConverter = ps.AsmMatchConverter;
62 bits<1> imm = !if(ps.has_offset, ps.offset_is_imm, 0);
65 class SM_Probe_Pseudo <string opName, dag ins, bit isImm>
66 : SM_Pseudo<opName, (outs), ins, " $sdata, $sbase, $offset"> {
72 let hasSideEffects = 1;
73 let offset_is_imm = isImm;
74 let PseudoInstr = opName # !if(isImm, "_IMM", "_SGPR");
77 class SM_Load_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]>
78 : SM_Pseudo<opName, outs, ins, asmOps, pattern> {
79 RegisterClass BaseClass;
86 class SM_Store_Pseudo <string opName, dag ins, string asmOps, list<dag> pattern = []>
87 : SM_Pseudo<opName, (outs), ins, asmOps, pattern> {
88 RegisterClass BaseClass;
89 RegisterClass SrcClass;
97 class SM_Discard_Pseudo <string opName, dag ins, bit isImm>
98 : SM_Pseudo<opName, (outs), ins, " $sbase, $offset"> {
104 let hasSideEffects = 1;
105 let offset_is_imm = isImm;
106 let PseudoInstr = opName # !if(isImm, "_IMM", "_SGPR");
109 multiclass SM_Pseudo_Loads<string opName,
110 RegisterClass baseClass,
111 RegisterClass dstClass> {
112 def _IMM : SM_Load_Pseudo <opName,
113 (outs dstClass:$sdst),
114 (ins baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc),
115 " $sdst, $sbase, $offset$glc$dlc", []> {
116 let offset_is_imm = 1;
117 let BaseClass = baseClass;
118 let PseudoInstr = opName # "_IMM";
123 def _SGPR : SM_Load_Pseudo <opName,
124 (outs dstClass:$sdst),
125 (ins baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc),
126 " $sdst, $sbase, $offset$glc$dlc", []> {
127 let BaseClass = baseClass;
128 let PseudoInstr = opName # "_SGPR";
134 multiclass SM_Pseudo_Stores<string opName,
135 RegisterClass baseClass,
136 RegisterClass srcClass> {
137 def _IMM : SM_Store_Pseudo <opName,
138 (ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc),
139 " $sdata, $sbase, $offset$glc$dlc", []> {
140 let offset_is_imm = 1;
141 let BaseClass = baseClass;
142 let SrcClass = srcClass;
143 let PseudoInstr = opName # "_IMM";
146 def _SGPR : SM_Store_Pseudo <opName,
147 (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc),
148 " $sdata, $sbase, $offset$glc$dlc", []> {
149 let BaseClass = baseClass;
150 let SrcClass = srcClass;
151 let PseudoInstr = opName # "_SGPR";
155 multiclass SM_Pseudo_Discards<string opName> {
156 def _IMM : SM_Discard_Pseudo <opName, (ins SReg_64:$sbase, smrd_offset_20:$offset), 1>;
157 def _SGPR : SM_Discard_Pseudo <opName, (ins SReg_64:$sbase, SReg_32:$offset), 0>;
160 class SM_Time_Pseudo<string opName, SDPatternOperator node = null_frag> : SM_Pseudo<
161 opName, (outs SReg_64_XEXEC:$sdst), (ins),
162 " $sdst", [(set i64:$sdst, (node))]> {
163 let hasSideEffects = 1;
165 // FIXME: This should be definitively mayStore = 0. TableGen
166 // brokenly tries to infer these based on the intrinsic properties
167 // corresponding to the IR attributes. The target intrinsics are
168 // considered as writing to memory for IR dependency purposes, but
169 // those can be modeled with hasSideEffects here. These also end up
170 // inferring differently for llvm.readcyclecounter and the amdgcn
178 class SM_Inval_Pseudo <string opName, SDPatternOperator node = null_frag> : SM_Pseudo<
179 opName, (outs), (ins), "", [(node)]> {
180 let hasSideEffects = 1;
187 multiclass SM_Pseudo_Probe<string opName, RegisterClass baseClass> {
188 def _IMM : SM_Probe_Pseudo <opName, (ins i8imm:$sdata, baseClass:$sbase, smrd_offset_20:$offset), 1>;
189 def _SGPR : SM_Probe_Pseudo <opName, (ins i8imm:$sdata, baseClass:$sbase, SReg_32:$offset), 0>;
192 class SM_WaveId_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo<
193 opName, (outs SReg_32_XM0_XEXEC:$sdst), (ins),
194 " $sdst", [(set i32:$sdst, (node))]> {
195 let hasSideEffects = 1;
202 //===----------------------------------------------------------------------===//
203 // Scalar Atomic Memory Classes
204 //===----------------------------------------------------------------------===//
206 class SM_Atomic_Pseudo <string opName,
207 dag outs, dag ins, string asmOps, bit isRet>
208 : SM_Pseudo<opName, outs, ins, asmOps, []> {
217 // Should these be set?
219 let hasSideEffects = 1;
223 class SM_Pseudo_Atomic<string opName,
224 RegisterClass baseClass,
225 RegisterClass dataClass,
228 SM_Atomic_Pseudo<opName,
229 !if(isRet, (outs dataClass:$sdst), (outs)),
231 (ins dataClass:$sdata, baseClass:$sbase, smrd_offset_20:$offset, DLC:$dlc),
232 (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset, DLC:$dlc)),
233 !if(isRet, " $sdst", " $sdata") # ", $sbase, $offset" # !if(isRet, " glc", "") # "$dlc",
235 let offset_is_imm = isImm;
236 let PseudoInstr = opName # !if(isImm,
237 !if(isRet, "_IMM_RTN", "_IMM"),
238 !if(isRet, "_SGPR_RTN", "_SGPR"));
240 let Constraints = !if(isRet, "$sdst = $sdata", "");
241 let DisableEncoding = !if(isRet, "$sdata", "");
244 multiclass SM_Pseudo_Atomics<string opName,
245 RegisterClass baseClass,
246 RegisterClass dataClass> {
247 def _IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, 1, 0>;
248 def _SGPR : SM_Pseudo_Atomic <opName, baseClass, dataClass, 0, 0>;
249 def _IMM_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, 1, 1>;
250 def _SGPR_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, 0, 1>;
253 //===----------------------------------------------------------------------===//
254 // Scalar Memory Instructions
255 //===----------------------------------------------------------------------===//
257 // We are using the SReg_32_XM0 and not the SReg_32 register class for 32-bit
258 // SMRD instructions, because the SReg_32_XM0 register class does not include M0
259 // and writing to M0 from an SMRD instruction will hang the GPU.
261 // XXX - SMEM instructions do not allow exec for data operand, but
262 // does sdst for SMRD on SI/CI?
263 defm S_LOAD_DWORD : SM_Pseudo_Loads <"s_load_dword", SReg_64, SReg_32_XM0_XEXEC>;
264 defm S_LOAD_DWORDX2 : SM_Pseudo_Loads <"s_load_dwordx2", SReg_64, SReg_64_XEXEC>;
265 defm S_LOAD_DWORDX4 : SM_Pseudo_Loads <"s_load_dwordx4", SReg_64, SReg_128>;
266 defm S_LOAD_DWORDX8 : SM_Pseudo_Loads <"s_load_dwordx8", SReg_64, SReg_256>;
267 defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <"s_load_dwordx16", SReg_64, SReg_512>;
269 defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <
270 "s_buffer_load_dword", SReg_128, SReg_32_XM0_XEXEC
273 // FIXME: exec_lo/exec_hi appear to be allowed for SMRD loads on
274 // SI/CI, bit disallowed for SMEM on VI.
275 defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads <
276 "s_buffer_load_dwordx2", SReg_128, SReg_64_XEXEC
279 defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <
280 "s_buffer_load_dwordx4", SReg_128, SReg_128
283 defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <
284 "s_buffer_load_dwordx8", SReg_128, SReg_256
287 defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <
288 "s_buffer_load_dwordx16", SReg_128, SReg_512
291 let SubtargetPredicate = HasScalarStores in {
292 defm S_STORE_DWORD : SM_Pseudo_Stores <"s_store_dword", SReg_64, SReg_32_XM0_XEXEC>;
293 defm S_STORE_DWORDX2 : SM_Pseudo_Stores <"s_store_dwordx2", SReg_64, SReg_64_XEXEC>;
294 defm S_STORE_DWORDX4 : SM_Pseudo_Stores <"s_store_dwordx4", SReg_64, SReg_128>;
296 defm S_BUFFER_STORE_DWORD : SM_Pseudo_Stores <
297 "s_buffer_store_dword", SReg_128, SReg_32_XM0_XEXEC
300 defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores <
301 "s_buffer_store_dwordx2", SReg_128, SReg_64_XEXEC
304 defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores <
305 "s_buffer_store_dwordx4", SReg_128, SReg_128
307 } // End SubtargetPredicate = HasScalarStores
309 def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>;
310 def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>;
312 let SubtargetPredicate = isGFX7GFX8GFX9 in {
313 def S_DCACHE_INV_VOL : SM_Inval_Pseudo <"s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>;
314 } // let SubtargetPredicate = isGFX7GFX8GFX9
316 let SubtargetPredicate = isGFX8Plus in {
317 let OtherPredicates = [HasScalarStores] in {
318 def S_DCACHE_WB : SM_Inval_Pseudo <"s_dcache_wb", int_amdgcn_s_dcache_wb>;
319 def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>;
320 } // End OtherPredicates = [HasScalarStores]
321 def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>;
323 defm S_ATC_PROBE : SM_Pseudo_Probe <"s_atc_probe", SReg_64>;
324 defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <"s_atc_probe_buffer", SReg_128>;
325 } // SubtargetPredicate = isGFX8Plus
327 let SubtargetPredicate = isGFX10Plus in {
328 def S_GL1_INV : SM_Inval_Pseudo<"s_gl1_inv">;
329 def S_GET_WAVEID_IN_WORKGROUP : SM_WaveId_Pseudo <"s_get_waveid_in_workgroup", int_amdgcn_s_get_waveid_in_workgroup>;
330 } // End SubtargetPredicate = isGFX10Plus
332 let SubtargetPredicate = HasScalarFlatScratchInsts, Uses = [FLAT_SCR] in {
333 defm S_SCRATCH_LOAD_DWORD : SM_Pseudo_Loads <"s_scratch_load_dword", SReg_64, SReg_32_XM0_XEXEC>;
334 defm S_SCRATCH_LOAD_DWORDX2 : SM_Pseudo_Loads <"s_scratch_load_dwordx2", SReg_64, SReg_64_XEXEC>;
335 defm S_SCRATCH_LOAD_DWORDX4 : SM_Pseudo_Loads <"s_scratch_load_dwordx4", SReg_64, SReg_128>;
337 defm S_SCRATCH_STORE_DWORD : SM_Pseudo_Stores <"s_scratch_store_dword", SReg_64, SReg_32_XM0_XEXEC>;
338 defm S_SCRATCH_STORE_DWORDX2 : SM_Pseudo_Stores <"s_scratch_store_dwordx2", SReg_64, SReg_64_XEXEC>;
339 defm S_SCRATCH_STORE_DWORDX4 : SM_Pseudo_Stores <"s_scratch_store_dwordx4", SReg_64, SReg_128>;
340 } // SubtargetPredicate = HasScalarFlatScratchInsts
342 let SubtargetPredicate = HasScalarAtomics in {
344 defm S_BUFFER_ATOMIC_SWAP : SM_Pseudo_Atomics <"s_buffer_atomic_swap", SReg_128, SReg_32_XM0_XEXEC>;
345 defm S_BUFFER_ATOMIC_CMPSWAP : SM_Pseudo_Atomics <"s_buffer_atomic_cmpswap", SReg_128, SReg_64_XEXEC>;
346 defm S_BUFFER_ATOMIC_ADD : SM_Pseudo_Atomics <"s_buffer_atomic_add", SReg_128, SReg_32_XM0_XEXEC>;
347 defm S_BUFFER_ATOMIC_SUB : SM_Pseudo_Atomics <"s_buffer_atomic_sub", SReg_128, SReg_32_XM0_XEXEC>;
348 defm S_BUFFER_ATOMIC_SMIN : SM_Pseudo_Atomics <"s_buffer_atomic_smin", SReg_128, SReg_32_XM0_XEXEC>;
349 defm S_BUFFER_ATOMIC_UMIN : SM_Pseudo_Atomics <"s_buffer_atomic_umin", SReg_128, SReg_32_XM0_XEXEC>;
350 defm S_BUFFER_ATOMIC_SMAX : SM_Pseudo_Atomics <"s_buffer_atomic_smax", SReg_128, SReg_32_XM0_XEXEC>;
351 defm S_BUFFER_ATOMIC_UMAX : SM_Pseudo_Atomics <"s_buffer_atomic_umax", SReg_128, SReg_32_XM0_XEXEC>;
352 defm S_BUFFER_ATOMIC_AND : SM_Pseudo_Atomics <"s_buffer_atomic_and", SReg_128, SReg_32_XM0_XEXEC>;
353 defm S_BUFFER_ATOMIC_OR : SM_Pseudo_Atomics <"s_buffer_atomic_or", SReg_128, SReg_32_XM0_XEXEC>;
354 defm S_BUFFER_ATOMIC_XOR : SM_Pseudo_Atomics <"s_buffer_atomic_xor", SReg_128, SReg_32_XM0_XEXEC>;
355 defm S_BUFFER_ATOMIC_INC : SM_Pseudo_Atomics <"s_buffer_atomic_inc", SReg_128, SReg_32_XM0_XEXEC>;
356 defm S_BUFFER_ATOMIC_DEC : SM_Pseudo_Atomics <"s_buffer_atomic_dec", SReg_128, SReg_32_XM0_XEXEC>;
358 defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_swap_x2", SReg_128, SReg_64_XEXEC>;
359 defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_cmpswap_x2", SReg_128, SReg_128>;
360 defm S_BUFFER_ATOMIC_ADD_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_add_x2", SReg_128, SReg_64_XEXEC>;
361 defm S_BUFFER_ATOMIC_SUB_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_sub_x2", SReg_128, SReg_64_XEXEC>;
362 defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_smin_x2", SReg_128, SReg_64_XEXEC>;
363 defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_umin_x2", SReg_128, SReg_64_XEXEC>;
364 defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_smax_x2", SReg_128, SReg_64_XEXEC>;
365 defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_umax_x2", SReg_128, SReg_64_XEXEC>;
366 defm S_BUFFER_ATOMIC_AND_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_and_x2", SReg_128, SReg_64_XEXEC>;
367 defm S_BUFFER_ATOMIC_OR_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_or_x2", SReg_128, SReg_64_XEXEC>;
368 defm S_BUFFER_ATOMIC_XOR_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_xor_x2", SReg_128, SReg_64_XEXEC>;
369 defm S_BUFFER_ATOMIC_INC_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_inc_x2", SReg_128, SReg_64_XEXEC>;
370 defm S_BUFFER_ATOMIC_DEC_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_dec_x2", SReg_128, SReg_64_XEXEC>;
372 defm S_ATOMIC_SWAP : SM_Pseudo_Atomics <"s_atomic_swap", SReg_64, SReg_32_XM0_XEXEC>;
373 defm S_ATOMIC_CMPSWAP : SM_Pseudo_Atomics <"s_atomic_cmpswap", SReg_64, SReg_64_XEXEC>;
374 defm S_ATOMIC_ADD : SM_Pseudo_Atomics <"s_atomic_add", SReg_64, SReg_32_XM0_XEXEC>;
375 defm S_ATOMIC_SUB : SM_Pseudo_Atomics <"s_atomic_sub", SReg_64, SReg_32_XM0_XEXEC>;
376 defm S_ATOMIC_SMIN : SM_Pseudo_Atomics <"s_atomic_smin", SReg_64, SReg_32_XM0_XEXEC>;
377 defm S_ATOMIC_UMIN : SM_Pseudo_Atomics <"s_atomic_umin", SReg_64, SReg_32_XM0_XEXEC>;
378 defm S_ATOMIC_SMAX : SM_Pseudo_Atomics <"s_atomic_smax", SReg_64, SReg_32_XM0_XEXEC>;
379 defm S_ATOMIC_UMAX : SM_Pseudo_Atomics <"s_atomic_umax", SReg_64, SReg_32_XM0_XEXEC>;
380 defm S_ATOMIC_AND : SM_Pseudo_Atomics <"s_atomic_and", SReg_64, SReg_32_XM0_XEXEC>;
381 defm S_ATOMIC_OR : SM_Pseudo_Atomics <"s_atomic_or", SReg_64, SReg_32_XM0_XEXEC>;
382 defm S_ATOMIC_XOR : SM_Pseudo_Atomics <"s_atomic_xor", SReg_64, SReg_32_XM0_XEXEC>;
383 defm S_ATOMIC_INC : SM_Pseudo_Atomics <"s_atomic_inc", SReg_64, SReg_32_XM0_XEXEC>;
384 defm S_ATOMIC_DEC : SM_Pseudo_Atomics <"s_atomic_dec", SReg_64, SReg_32_XM0_XEXEC>;
386 defm S_ATOMIC_SWAP_X2 : SM_Pseudo_Atomics <"s_atomic_swap_x2", SReg_64, SReg_64_XEXEC>;
387 defm S_ATOMIC_CMPSWAP_X2 : SM_Pseudo_Atomics <"s_atomic_cmpswap_x2", SReg_64, SReg_128>;
388 defm S_ATOMIC_ADD_X2 : SM_Pseudo_Atomics <"s_atomic_add_x2", SReg_64, SReg_64_XEXEC>;
389 defm S_ATOMIC_SUB_X2 : SM_Pseudo_Atomics <"s_atomic_sub_x2", SReg_64, SReg_64_XEXEC>;
390 defm S_ATOMIC_SMIN_X2 : SM_Pseudo_Atomics <"s_atomic_smin_x2", SReg_64, SReg_64_XEXEC>;
391 defm S_ATOMIC_UMIN_X2 : SM_Pseudo_Atomics <"s_atomic_umin_x2", SReg_64, SReg_64_XEXEC>;
392 defm S_ATOMIC_SMAX_X2 : SM_Pseudo_Atomics <"s_atomic_smax_x2", SReg_64, SReg_64_XEXEC>;
393 defm S_ATOMIC_UMAX_X2 : SM_Pseudo_Atomics <"s_atomic_umax_x2", SReg_64, SReg_64_XEXEC>;
394 defm S_ATOMIC_AND_X2 : SM_Pseudo_Atomics <"s_atomic_and_x2", SReg_64, SReg_64_XEXEC>;
395 defm S_ATOMIC_OR_X2 : SM_Pseudo_Atomics <"s_atomic_or_x2", SReg_64, SReg_64_XEXEC>;
396 defm S_ATOMIC_XOR_X2 : SM_Pseudo_Atomics <"s_atomic_xor_x2", SReg_64, SReg_64_XEXEC>;
397 defm S_ATOMIC_INC_X2 : SM_Pseudo_Atomics <"s_atomic_inc_x2", SReg_64, SReg_64_XEXEC>;
398 defm S_ATOMIC_DEC_X2 : SM_Pseudo_Atomics <"s_atomic_dec_x2", SReg_64, SReg_64_XEXEC>;
400 } // let SubtargetPredicate = HasScalarAtomics
402 let SubtargetPredicate = HasScalarAtomics in {
403 defm S_DCACHE_DISCARD : SM_Pseudo_Discards <"s_dcache_discard">;
404 defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards <"s_dcache_discard_x2">;
407 //===----------------------------------------------------------------------===//
409 //===----------------------------------------------------------------------===//
411 //===----------------------------------------------------------------------===//
413 //===----------------------------------------------------------------------===//
415 class SMRD_Real_si <bits<5> op, SM_Pseudo ps>
417 , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI>
420 let AssemblerPredicates = [isGFX6GFX7];
421 let DecoderNamespace = "GFX6GFX7";
423 let Inst{7-0} = !if(ps.has_offset, offset{7-0}, ?);
425 let Inst{14-9} = !if(ps.has_sbase, sbase{6-1}, ?);
426 let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?);
427 let Inst{26-22} = op;
428 let Inst{31-27} = 0x18; //encoding
431 // FIXME: Assembler should reject trying to use glc on SMRD
432 // instructions on SI.
433 multiclass SM_Real_Loads_si<bits<5> op, string ps,
434 SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
435 SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
437 def _IMM_si : SMRD_Real_si <op, immPs> {
438 let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, GLC:$glc, DLC:$dlc);
441 // FIXME: The operand name $offset is inconsistent with $soff used
443 def _SGPR_si : SMRD_Real_si <op, sgprPs> {
444 let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
449 defm S_LOAD_DWORD : SM_Real_Loads_si <0x00, "S_LOAD_DWORD">;
450 defm S_LOAD_DWORDX2 : SM_Real_Loads_si <0x01, "S_LOAD_DWORDX2">;
451 defm S_LOAD_DWORDX4 : SM_Real_Loads_si <0x02, "S_LOAD_DWORDX4">;
452 defm S_LOAD_DWORDX8 : SM_Real_Loads_si <0x03, "S_LOAD_DWORDX8">;
453 defm S_LOAD_DWORDX16 : SM_Real_Loads_si <0x04, "S_LOAD_DWORDX16">;
454 defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_si <0x08, "S_BUFFER_LOAD_DWORD">;
455 defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_si <0x09, "S_BUFFER_LOAD_DWORDX2">;
456 defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_si <0x0a, "S_BUFFER_LOAD_DWORDX4">;
457 defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_si <0x0b, "S_BUFFER_LOAD_DWORDX8">;
458 defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_si <0x0c, "S_BUFFER_LOAD_DWORDX16">;
460 def S_MEMTIME_si : SMRD_Real_si <0x1e, S_MEMTIME>;
461 def S_DCACHE_INV_si : SMRD_Real_si <0x1f, S_DCACHE_INV>;
464 //===----------------------------------------------------------------------===//
466 //===----------------------------------------------------------------------===//
468 class SMEM_Real_vi <bits<8> op, SM_Pseudo ps>
470 , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI>
474 let AssemblerPredicates = [isGFX8GFX9];
475 let DecoderNamespace = "GFX8";
477 let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
478 let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?);
480 let Inst{16} = !if(ps.has_glc, glc, ?);
482 let Inst{25-18} = op;
483 let Inst{31-26} = 0x30; //encoding
484 let Inst{51-32} = !if(ps.has_offset, offset{19-0}, ?);
487 multiclass SM_Real_Loads_vi<bits<8> op, string ps,
488 SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
489 SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
490 def _IMM_vi : SMEM_Real_vi <op, immPs> {
491 let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
493 def _SGPR_vi : SMEM_Real_vi <op, sgprPs> {
494 let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
498 class SMEM_Real_Store_vi <bits<8> op, SM_Pseudo ps> : SMEM_Real_vi <op, ps> {
503 let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?);
506 multiclass SM_Real_Stores_vi<bits<8> op, string ps,
507 SM_Store_Pseudo immPs = !cast<SM_Store_Pseudo>(ps#_IMM),
508 SM_Store_Pseudo sgprPs = !cast<SM_Store_Pseudo>(ps#_SGPR)> {
509 // FIXME: The operand name $offset is inconsistent with $soff used
511 def _IMM_vi : SMEM_Real_Store_vi <op, immPs> {
512 let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
515 def _SGPR_vi : SMEM_Real_Store_vi <op, sgprPs> {
516 let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
520 multiclass SM_Real_Probe_vi<bits<8> op, string ps> {
521 def _IMM_vi : SMEM_Real_Store_vi <op, !cast<SM_Probe_Pseudo>(ps#_IMM)>;
522 def _SGPR_vi : SMEM_Real_Store_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>;
525 defm S_LOAD_DWORD : SM_Real_Loads_vi <0x00, "S_LOAD_DWORD">;
526 defm S_LOAD_DWORDX2 : SM_Real_Loads_vi <0x01, "S_LOAD_DWORDX2">;
527 defm S_LOAD_DWORDX4 : SM_Real_Loads_vi <0x02, "S_LOAD_DWORDX4">;
528 defm S_LOAD_DWORDX8 : SM_Real_Loads_vi <0x03, "S_LOAD_DWORDX8">;
529 defm S_LOAD_DWORDX16 : SM_Real_Loads_vi <0x04, "S_LOAD_DWORDX16">;
530 defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_vi <0x08, "S_BUFFER_LOAD_DWORD">;
531 defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_vi <0x09, "S_BUFFER_LOAD_DWORDX2">;
532 defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_vi <0x0a, "S_BUFFER_LOAD_DWORDX4">;
533 defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_vi <0x0b, "S_BUFFER_LOAD_DWORDX8">;
534 defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_vi <0x0c, "S_BUFFER_LOAD_DWORDX16">;
536 defm S_STORE_DWORD : SM_Real_Stores_vi <0x10, "S_STORE_DWORD">;
537 defm S_STORE_DWORDX2 : SM_Real_Stores_vi <0x11, "S_STORE_DWORDX2">;
538 defm S_STORE_DWORDX4 : SM_Real_Stores_vi <0x12, "S_STORE_DWORDX4">;
540 defm S_BUFFER_STORE_DWORD : SM_Real_Stores_vi <0x18, "S_BUFFER_STORE_DWORD">;
541 defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_vi <0x19, "S_BUFFER_STORE_DWORDX2">;
542 defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_vi <0x1a, "S_BUFFER_STORE_DWORDX4">;
544 // These instructions use same encoding
545 def S_DCACHE_INV_vi : SMEM_Real_vi <0x20, S_DCACHE_INV>;
546 def S_DCACHE_WB_vi : SMEM_Real_vi <0x21, S_DCACHE_WB>;
547 def S_DCACHE_INV_VOL_vi : SMEM_Real_vi <0x22, S_DCACHE_INV_VOL>;
548 def S_DCACHE_WB_VOL_vi : SMEM_Real_vi <0x23, S_DCACHE_WB_VOL>;
549 def S_MEMTIME_vi : SMEM_Real_vi <0x24, S_MEMTIME>;
550 def S_MEMREALTIME_vi : SMEM_Real_vi <0x25, S_MEMREALTIME>;
552 defm S_SCRATCH_LOAD_DWORD : SM_Real_Loads_vi <0x05, "S_SCRATCH_LOAD_DWORD">;
553 defm S_SCRATCH_LOAD_DWORDX2 : SM_Real_Loads_vi <0x06, "S_SCRATCH_LOAD_DWORDX2">;
554 defm S_SCRATCH_LOAD_DWORDX4 : SM_Real_Loads_vi <0x07, "S_SCRATCH_LOAD_DWORDX4">;
556 defm S_SCRATCH_STORE_DWORD : SM_Real_Stores_vi <0x15, "S_SCRATCH_STORE_DWORD">;
557 defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_vi <0x16, "S_SCRATCH_STORE_DWORDX2">;
558 defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_vi <0x17, "S_SCRATCH_STORE_DWORDX4">;
560 defm S_ATC_PROBE : SM_Real_Probe_vi <0x26, "S_ATC_PROBE">;
561 defm S_ATC_PROBE_BUFFER : SM_Real_Probe_vi <0x27, "S_ATC_PROBE_BUFFER">;
563 //===----------------------------------------------------------------------===//
565 //===----------------------------------------------------------------------===//
567 class SMEM_Atomic_Real_vi <bits<8> op, SM_Atomic_Pseudo ps>
568 : SMEM_Real_vi <op, ps> {
572 let Constraints = ps.Constraints;
573 let DisableEncoding = ps.DisableEncoding;
576 let Inst{12-6} = !if(glc, sdst{6-0}, sdata{6-0});
579 multiclass SM_Real_Atomics_vi<bits<8> op, string ps> {
580 def _IMM_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
581 def _SGPR_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
582 def _IMM_RTN_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
583 def _SGPR_RTN_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
586 defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_vi <0x40, "S_BUFFER_ATOMIC_SWAP">;
587 defm S_BUFFER_ATOMIC_CMPSWAP : SM_Real_Atomics_vi <0x41, "S_BUFFER_ATOMIC_CMPSWAP">;
588 defm S_BUFFER_ATOMIC_ADD : SM_Real_Atomics_vi <0x42, "S_BUFFER_ATOMIC_ADD">;
589 defm S_BUFFER_ATOMIC_SUB : SM_Real_Atomics_vi <0x43, "S_BUFFER_ATOMIC_SUB">;
590 defm S_BUFFER_ATOMIC_SMIN : SM_Real_Atomics_vi <0x44, "S_BUFFER_ATOMIC_SMIN">;
591 defm S_BUFFER_ATOMIC_UMIN : SM_Real_Atomics_vi <0x45, "S_BUFFER_ATOMIC_UMIN">;
592 defm S_BUFFER_ATOMIC_SMAX : SM_Real_Atomics_vi <0x46, "S_BUFFER_ATOMIC_SMAX">;
593 defm S_BUFFER_ATOMIC_UMAX : SM_Real_Atomics_vi <0x47, "S_BUFFER_ATOMIC_UMAX">;
594 defm S_BUFFER_ATOMIC_AND : SM_Real_Atomics_vi <0x48, "S_BUFFER_ATOMIC_AND">;
595 defm S_BUFFER_ATOMIC_OR : SM_Real_Atomics_vi <0x49, "S_BUFFER_ATOMIC_OR">;
596 defm S_BUFFER_ATOMIC_XOR : SM_Real_Atomics_vi <0x4a, "S_BUFFER_ATOMIC_XOR">;
597 defm S_BUFFER_ATOMIC_INC : SM_Real_Atomics_vi <0x4b, "S_BUFFER_ATOMIC_INC">;
598 defm S_BUFFER_ATOMIC_DEC : SM_Real_Atomics_vi <0x4c, "S_BUFFER_ATOMIC_DEC">;
600 defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Real_Atomics_vi <0x60, "S_BUFFER_ATOMIC_SWAP_X2">;
601 defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_vi <0x61, "S_BUFFER_ATOMIC_CMPSWAP_X2">;
602 defm S_BUFFER_ATOMIC_ADD_X2 : SM_Real_Atomics_vi <0x62, "S_BUFFER_ATOMIC_ADD_X2">;
603 defm S_BUFFER_ATOMIC_SUB_X2 : SM_Real_Atomics_vi <0x63, "S_BUFFER_ATOMIC_SUB_X2">;
604 defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Real_Atomics_vi <0x64, "S_BUFFER_ATOMIC_SMIN_X2">;
605 defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Real_Atomics_vi <0x65, "S_BUFFER_ATOMIC_UMIN_X2">;
606 defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Real_Atomics_vi <0x66, "S_BUFFER_ATOMIC_SMAX_X2">;
607 defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Real_Atomics_vi <0x67, "S_BUFFER_ATOMIC_UMAX_X2">;
608 defm S_BUFFER_ATOMIC_AND_X2 : SM_Real_Atomics_vi <0x68, "S_BUFFER_ATOMIC_AND_X2">;
609 defm S_BUFFER_ATOMIC_OR_X2 : SM_Real_Atomics_vi <0x69, "S_BUFFER_ATOMIC_OR_X2">;
610 defm S_BUFFER_ATOMIC_XOR_X2 : SM_Real_Atomics_vi <0x6a, "S_BUFFER_ATOMIC_XOR_X2">;
611 defm S_BUFFER_ATOMIC_INC_X2 : SM_Real_Atomics_vi <0x6b, "S_BUFFER_ATOMIC_INC_X2">;
612 defm S_BUFFER_ATOMIC_DEC_X2 : SM_Real_Atomics_vi <0x6c, "S_BUFFER_ATOMIC_DEC_X2">;
614 defm S_ATOMIC_SWAP : SM_Real_Atomics_vi <0x80, "S_ATOMIC_SWAP">;
615 defm S_ATOMIC_CMPSWAP : SM_Real_Atomics_vi <0x81, "S_ATOMIC_CMPSWAP">;
616 defm S_ATOMIC_ADD : SM_Real_Atomics_vi <0x82, "S_ATOMIC_ADD">;
617 defm S_ATOMIC_SUB : SM_Real_Atomics_vi <0x83, "S_ATOMIC_SUB">;
618 defm S_ATOMIC_SMIN : SM_Real_Atomics_vi <0x84, "S_ATOMIC_SMIN">;
619 defm S_ATOMIC_UMIN : SM_Real_Atomics_vi <0x85, "S_ATOMIC_UMIN">;
620 defm S_ATOMIC_SMAX : SM_Real_Atomics_vi <0x86, "S_ATOMIC_SMAX">;
621 defm S_ATOMIC_UMAX : SM_Real_Atomics_vi <0x87, "S_ATOMIC_UMAX">;
622 defm S_ATOMIC_AND : SM_Real_Atomics_vi <0x88, "S_ATOMIC_AND">;
623 defm S_ATOMIC_OR : SM_Real_Atomics_vi <0x89, "S_ATOMIC_OR">;
624 defm S_ATOMIC_XOR : SM_Real_Atomics_vi <0x8a, "S_ATOMIC_XOR">;
625 defm S_ATOMIC_INC : SM_Real_Atomics_vi <0x8b, "S_ATOMIC_INC">;
626 defm S_ATOMIC_DEC : SM_Real_Atomics_vi <0x8c, "S_ATOMIC_DEC">;
628 defm S_ATOMIC_SWAP_X2 : SM_Real_Atomics_vi <0xa0, "S_ATOMIC_SWAP_X2">;
629 defm S_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_vi <0xa1, "S_ATOMIC_CMPSWAP_X2">;
630 defm S_ATOMIC_ADD_X2 : SM_Real_Atomics_vi <0xa2, "S_ATOMIC_ADD_X2">;
631 defm S_ATOMIC_SUB_X2 : SM_Real_Atomics_vi <0xa3, "S_ATOMIC_SUB_X2">;
632 defm S_ATOMIC_SMIN_X2 : SM_Real_Atomics_vi <0xa4, "S_ATOMIC_SMIN_X2">;
633 defm S_ATOMIC_UMIN_X2 : SM_Real_Atomics_vi <0xa5, "S_ATOMIC_UMIN_X2">;
634 defm S_ATOMIC_SMAX_X2 : SM_Real_Atomics_vi <0xa6, "S_ATOMIC_SMAX_X2">;
635 defm S_ATOMIC_UMAX_X2 : SM_Real_Atomics_vi <0xa7, "S_ATOMIC_UMAX_X2">;
636 defm S_ATOMIC_AND_X2 : SM_Real_Atomics_vi <0xa8, "S_ATOMIC_AND_X2">;
637 defm S_ATOMIC_OR_X2 : SM_Real_Atomics_vi <0xa9, "S_ATOMIC_OR_X2">;
638 defm S_ATOMIC_XOR_X2 : SM_Real_Atomics_vi <0xaa, "S_ATOMIC_XOR_X2">;
639 defm S_ATOMIC_INC_X2 : SM_Real_Atomics_vi <0xab, "S_ATOMIC_INC_X2">;
640 defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_vi <0xac, "S_ATOMIC_DEC_X2">;
642 multiclass SM_Real_Discard_vi<bits<8> op, string ps> {
643 def _IMM_vi : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_IMM)>;
644 def _SGPR_vi : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR)>;
647 defm S_DCACHE_DISCARD : SM_Real_Discard_vi <0x28, "S_DCACHE_DISCARD">;
648 defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_vi <0x29, "S_DCACHE_DISCARD_X2">;
650 //===----------------------------------------------------------------------===//
652 //===----------------------------------------------------------------------===//
654 def smrd_literal_offset : NamedOperandU32<"SMRDLiteralOffset",
655 NamedMatchClass<"SMRDLiteralOffset">> {
656 let OperandType = "OPERAND_IMMEDIATE";
659 class SMRD_Real_Load_IMM_ci <bits<5> op, SM_Load_Pseudo ps> :
663 let AssemblerPredicates = [isGFX7Only];
664 let DecoderNamespace = "GFX7";
665 let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc, DLC:$dlc);
667 let LGKM_CNT = ps.LGKM_CNT;
669 let mayLoad = ps.mayLoad;
670 let mayStore = ps.mayStore;
671 let hasSideEffects = ps.hasSideEffects;
672 let SchedRW = ps.SchedRW;
673 let UseNamedOperandTable = ps.UseNamedOperandTable;
675 let Inst{7-0} = 0xff;
677 let Inst{14-9} = sbase{6-1};
678 let Inst{21-15} = sdst{6-0};
679 let Inst{26-22} = op;
680 let Inst{31-27} = 0x18; //encoding
681 let Inst{63-32} = offset{31-0};
684 def S_LOAD_DWORD_IMM_ci : SMRD_Real_Load_IMM_ci <0x00, S_LOAD_DWORD_IMM>;
685 def S_LOAD_DWORDX2_IMM_ci : SMRD_Real_Load_IMM_ci <0x01, S_LOAD_DWORDX2_IMM>;
686 def S_LOAD_DWORDX4_IMM_ci : SMRD_Real_Load_IMM_ci <0x02, S_LOAD_DWORDX4_IMM>;
687 def S_LOAD_DWORDX8_IMM_ci : SMRD_Real_Load_IMM_ci <0x03, S_LOAD_DWORDX8_IMM>;
688 def S_LOAD_DWORDX16_IMM_ci : SMRD_Real_Load_IMM_ci <0x04, S_LOAD_DWORDX16_IMM>;
689 def S_BUFFER_LOAD_DWORD_IMM_ci : SMRD_Real_Load_IMM_ci <0x08, S_BUFFER_LOAD_DWORD_IMM>;
690 def S_BUFFER_LOAD_DWORDX2_IMM_ci : SMRD_Real_Load_IMM_ci <0x09, S_BUFFER_LOAD_DWORDX2_IMM>;
691 def S_BUFFER_LOAD_DWORDX4_IMM_ci : SMRD_Real_Load_IMM_ci <0x0a, S_BUFFER_LOAD_DWORDX4_IMM>;
692 def S_BUFFER_LOAD_DWORDX8_IMM_ci : SMRD_Real_Load_IMM_ci <0x0b, S_BUFFER_LOAD_DWORDX8_IMM>;
693 def S_BUFFER_LOAD_DWORDX16_IMM_ci : SMRD_Real_Load_IMM_ci <0x0c, S_BUFFER_LOAD_DWORDX16_IMM>;
695 class SMRD_Real_ci <bits<5> op, SM_Pseudo ps>
697 , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI>
700 let AssemblerPredicates = [isGFX7Only];
701 let DecoderNamespace = "GFX7";
703 let Inst{7-0} = !if(ps.has_offset, offset{7-0}, ?);
705 let Inst{14-9} = !if(ps.has_sbase, sbase{6-1}, ?);
706 let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?);
707 let Inst{26-22} = op;
708 let Inst{31-27} = 0x18; //encoding
711 def S_DCACHE_INV_VOL_ci : SMRD_Real_ci <0x1d, S_DCACHE_INV_VOL>;
713 //===----------------------------------------------------------------------===//
714 // Scalar Memory Patterns
715 //===----------------------------------------------------------------------===//
717 def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isUniformLoad(N);}]> {
718 let GISelPredicateCode = [{
719 if (!MI.hasOneMemOperand())
721 if (!isInstrUniform(MI))
724 // FIXME: We should probably be caching this.
725 SmallVector<GEPInfo, 4> AddrInfo;
726 getAddrModeInfo(MI, MRI, AddrInfo);
728 if (hasVgprParts(AddrInfo))
734 def SMRDImm : ComplexPattern<i64, 2, "SelectSMRDImm">;
735 def SMRDImm32 : ComplexPattern<i64, 2, "SelectSMRDImm32">;
736 def SMRDSgpr : ComplexPattern<i64, 2, "SelectSMRDSgpr">;
737 def SMRDBufferImm : ComplexPattern<i32, 1, "SelectSMRDBufferImm">;
738 def SMRDBufferImm32 : ComplexPattern<i32, 1, "SelectSMRDBufferImm32">;
740 multiclass SMRD_Pattern <string Instr, ValueType vt> {
744 (smrd_load (SMRDImm i64:$sbase, i32:$offset)),
745 (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0, 0))
748 // 2. 32-bit IMM offset on CI
750 (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
751 (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0, 0))> {
752 let OtherPredicates = [isGFX7Only];
757 (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
758 (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0, 0))
763 (vt (smrd_load (i64 SReg_64:$sbase))),
764 (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0, 0))
768 multiclass SMLoad_Pattern <string Instr, ValueType vt> {
769 // 1. Offset as an immediate
771 (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), i1:$glc, i1:$dlc),
772 (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc),
776 // 2. 32-bit IMM offset on CI
778 (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), i1:$glc, i1:$dlc)),
779 (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc), (as_i1imm $dlc))> {
780 let OtherPredicates = [isGFX7Only];
783 // 3. Offset loaded in an 32bit SGPR
785 (SIsbuffer_load v4i32:$sbase, i32:$offset, i1:$glc, i1:$dlc),
786 (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc),
791 // Global and constant loads can be selected to either MUBUF or SMRD
792 // instructions, but SMRD instructions are faster so we want the instruction
793 // selector to prefer those.
794 let AddedComplexity = 100 in {
796 foreach vt = Reg32Types.types in {
797 defm : SMRD_Pattern <"S_LOAD_DWORD", vt>;
800 foreach vt = SReg_64.RegTypes in {
801 defm : SMRD_Pattern <"S_LOAD_DWORDX2", vt>;
804 foreach vt = SReg_128.RegTypes in {
805 defm : SMRD_Pattern <"S_LOAD_DWORDX4", vt>;
808 defm : SMRD_Pattern <"S_LOAD_DWORDX8", v8i32>;
809 defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>;
811 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", i32>;
812 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2i32>;
813 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4i32>;
814 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8i32>;
815 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16i32>;
817 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", f32>;
818 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2f32>;
819 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4f32>;
820 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8f32>;
821 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16f32>;
822 } // End let AddedComplexity = 100
825 (i64 (readcyclecounter)),
829 //===----------------------------------------------------------------------===//
831 //===----------------------------------------------------------------------===//
833 class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps> :
834 SM_Real<ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10>, Enc64 {
838 let AssemblerPredicates = [isGFX10Plus];
839 let DecoderNamespace = "GFX10";
841 let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
842 let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?);
843 let Inst{14} = !if(ps.has_dlc, dlc, ?);
844 let Inst{16} = !if(ps.has_glc, glc, ?);
845 let Inst{25-18} = op;
846 let Inst{31-26} = 0x3d;
847 let Inst{51-32} = !if(ps.offset_is_imm, !if(ps.has_offset, offset{19-0}, ?), ?);
848 let Inst{63-57} = !if(ps.offset_is_imm, !cast<int>(SGPR_NULL.HWEncoding),
849 !if(ps.has_offset, offset{6-0}, ?));
852 multiclass SM_Real_Loads_gfx10<bits<8> op, string ps,
853 SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
854 SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
855 def _IMM_gfx10 : SMEM_Real_gfx10<op, immPs> {
856 let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
858 def _SGPR_gfx10 : SMEM_Real_gfx10<op, sgprPs> {
859 let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
863 class SMEM_Real_Store_gfx10<bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx10<op, ps> {
867 let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?);
870 multiclass SM_Real_Stores_gfx10<bits<8> op, string ps,
871 SM_Store_Pseudo immPs = !cast<SM_Store_Pseudo>(ps#_IMM),
872 SM_Store_Pseudo sgprPs = !cast<SM_Store_Pseudo>(ps#_SGPR)> {
873 // FIXME: The operand name $offset is inconsistent with $soff used
875 def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, immPs> {
876 let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
879 def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs> {
880 let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
884 defm S_LOAD_DWORD : SM_Real_Loads_gfx10<0x000, "S_LOAD_DWORD">;
885 defm S_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x001, "S_LOAD_DWORDX2">;
886 defm S_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x002, "S_LOAD_DWORDX4">;
887 defm S_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x003, "S_LOAD_DWORDX8">;
888 defm S_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x004, "S_LOAD_DWORDX16">;
890 let SubtargetPredicate = HasScalarFlatScratchInsts in {
891 defm S_SCRATCH_LOAD_DWORD : SM_Real_Loads_gfx10<0x005, "S_SCRATCH_LOAD_DWORD">;
892 defm S_SCRATCH_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x006, "S_SCRATCH_LOAD_DWORDX2">;
893 defm S_SCRATCH_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x007, "S_SCRATCH_LOAD_DWORDX4">;
894 } // End SubtargetPredicate = HasScalarFlatScratchInsts
896 defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_gfx10<0x008, "S_BUFFER_LOAD_DWORD">;
897 defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x009, "S_BUFFER_LOAD_DWORDX2">;
898 defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x00a, "S_BUFFER_LOAD_DWORDX4">;
899 defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x00b, "S_BUFFER_LOAD_DWORDX8">;
900 defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x00c, "S_BUFFER_LOAD_DWORDX16">;
902 let SubtargetPredicate = HasScalarStores in {
903 defm S_STORE_DWORD : SM_Real_Stores_gfx10<0x010, "S_STORE_DWORD">;
904 defm S_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x011, "S_STORE_DWORDX2">;
905 defm S_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x012, "S_STORE_DWORDX4">;
906 let OtherPredicates = [HasScalarFlatScratchInsts] in {
907 defm S_SCRATCH_STORE_DWORD : SM_Real_Stores_gfx10<0x015, "S_SCRATCH_STORE_DWORD">;
908 defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x016, "S_SCRATCH_STORE_DWORDX2">;
909 defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x017, "S_SCRATCH_STORE_DWORDX4">;
910 } // End OtherPredicates = [HasScalarFlatScratchInsts]
911 defm S_BUFFER_STORE_DWORD : SM_Real_Stores_gfx10<0x018, "S_BUFFER_STORE_DWORD">;
912 defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x019, "S_BUFFER_STORE_DWORDX2">;
913 defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x01a, "S_BUFFER_STORE_DWORDX4">;
914 } // End SubtargetPredicate = HasScalarStores
916 def S_MEMREALTIME_gfx10 : SMEM_Real_gfx10<0x025, S_MEMREALTIME>;
917 def S_MEMTIME_gfx10 : SMEM_Real_gfx10<0x024, S_MEMTIME>;
918 def S_GL1_INV_gfx10 : SMEM_Real_gfx10<0x01f, S_GL1_INV>;
919 def S_GET_WAVEID_IN_WORKGROUP_gfx10 : SMEM_Real_gfx10<0x02a, S_GET_WAVEID_IN_WORKGROUP>;
920 def S_DCACHE_INV_gfx10 : SMEM_Real_gfx10<0x020, S_DCACHE_INV>;
922 let SubtargetPredicate = HasScalarStores in {
923 def S_DCACHE_WB_gfx10 : SMEM_Real_gfx10<0x021, S_DCACHE_WB>;
924 } // End SubtargetPredicate = HasScalarStores
926 multiclass SM_Real_Probe_gfx10<bits<8> op, string ps> {
927 def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>;
928 def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>;
931 defm S_ATC_PROBE : SM_Real_Probe_gfx10 <0x26, "S_ATC_PROBE">;
932 defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx10 <0x27, "S_ATC_PROBE_BUFFER">;
934 class SMEM_Atomic_Real_gfx10 <bits<8> op, SM_Atomic_Pseudo ps>
935 : SMEM_Real_gfx10 <op, ps> {
940 let Constraints = ps.Constraints;
941 let DisableEncoding = ps.DisableEncoding;
945 let Inst{14} = !if(ps.has_dlc, dlc, 0);
946 let Inst{12-6} = !if(glc, sdst{6-0}, sdata{6-0});
949 multiclass SM_Real_Atomics_gfx10<bits<8> op, string ps> {
950 def _IMM_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
951 def _SGPR_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
952 def _IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
953 def _SGPR_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
956 let SubtargetPredicate = HasScalarAtomics in {
958 defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x40, "S_BUFFER_ATOMIC_SWAP">;
959 defm S_BUFFER_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x41, "S_BUFFER_ATOMIC_CMPSWAP">;
960 defm S_BUFFER_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x42, "S_BUFFER_ATOMIC_ADD">;
961 defm S_BUFFER_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x43, "S_BUFFER_ATOMIC_SUB">;
962 defm S_BUFFER_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x44, "S_BUFFER_ATOMIC_SMIN">;
963 defm S_BUFFER_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x45, "S_BUFFER_ATOMIC_UMIN">;
964 defm S_BUFFER_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x46, "S_BUFFER_ATOMIC_SMAX">;
965 defm S_BUFFER_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x47, "S_BUFFER_ATOMIC_UMAX">;
966 defm S_BUFFER_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x48, "S_BUFFER_ATOMIC_AND">;
967 defm S_BUFFER_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x49, "S_BUFFER_ATOMIC_OR">;
968 defm S_BUFFER_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x4a, "S_BUFFER_ATOMIC_XOR">;
969 defm S_BUFFER_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x4b, "S_BUFFER_ATOMIC_INC">;
970 defm S_BUFFER_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x4c, "S_BUFFER_ATOMIC_DEC">;
972 defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0x60, "S_BUFFER_ATOMIC_SWAP_X2">;
973 defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0x61, "S_BUFFER_ATOMIC_CMPSWAP_X2">;
974 defm S_BUFFER_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0x62, "S_BUFFER_ATOMIC_ADD_X2">;
975 defm S_BUFFER_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0x63, "S_BUFFER_ATOMIC_SUB_X2">;
976 defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0x64, "S_BUFFER_ATOMIC_SMIN_X2">;
977 defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0x65, "S_BUFFER_ATOMIC_UMIN_X2">;
978 defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0x66, "S_BUFFER_ATOMIC_SMAX_X2">;
979 defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0x67, "S_BUFFER_ATOMIC_UMAX_X2">;
980 defm S_BUFFER_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0x68, "S_BUFFER_ATOMIC_AND_X2">;
981 defm S_BUFFER_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0x69, "S_BUFFER_ATOMIC_OR_X2">;
982 defm S_BUFFER_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0x6a, "S_BUFFER_ATOMIC_XOR_X2">;
983 defm S_BUFFER_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0x6b, "S_BUFFER_ATOMIC_INC_X2">;
984 defm S_BUFFER_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0x6c, "S_BUFFER_ATOMIC_DEC_X2">;
986 defm S_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x80, "S_ATOMIC_SWAP">;
987 defm S_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x81, "S_ATOMIC_CMPSWAP">;
988 defm S_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x82, "S_ATOMIC_ADD">;
989 defm S_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x83, "S_ATOMIC_SUB">;
990 defm S_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x84, "S_ATOMIC_SMIN">;
991 defm S_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x85, "S_ATOMIC_UMIN">;
992 defm S_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x86, "S_ATOMIC_SMAX">;
993 defm S_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x87, "S_ATOMIC_UMAX">;
994 defm S_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x88, "S_ATOMIC_AND">;
995 defm S_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x89, "S_ATOMIC_OR">;
996 defm S_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x8a, "S_ATOMIC_XOR">;
997 defm S_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x8b, "S_ATOMIC_INC">;
998 defm S_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x8c, "S_ATOMIC_DEC">;
1000 defm S_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0xa0, "S_ATOMIC_SWAP_X2">;
1001 defm S_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0xa1, "S_ATOMIC_CMPSWAP_X2">;
1002 defm S_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0xa2, "S_ATOMIC_ADD_X2">;
1003 defm S_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0xa3, "S_ATOMIC_SUB_X2">;
1004 defm S_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0xa4, "S_ATOMIC_SMIN_X2">;
1005 defm S_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0xa5, "S_ATOMIC_UMIN_X2">;
1006 defm S_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0xa6, "S_ATOMIC_SMAX_X2">;
1007 defm S_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0xa7, "S_ATOMIC_UMAX_X2">;
1008 defm S_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0xa8, "S_ATOMIC_AND_X2">;
1009 defm S_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0xa9, "S_ATOMIC_OR_X2">;
1010 defm S_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0xaa, "S_ATOMIC_XOR_X2">;
1011 defm S_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0xab, "S_ATOMIC_INC_X2">;
1012 defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0xac, "S_ATOMIC_DEC_X2">;
1014 multiclass SM_Real_Discard_gfx10<bits<8> op, string ps> {
1015 def _IMM_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>;
1016 def _SGPR_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>;
1019 defm S_DCACHE_DISCARD : SM_Real_Discard_gfx10 <0x28, "S_DCACHE_DISCARD">;
1020 defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_gfx10 <0x29, "S_DCACHE_DISCARD_X2">;
1022 } // End SubtargetPredicate = HasScalarAtomics