From 23499103f77180977ddeb31b680cb66159141d61 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Thu, 10 Mar 2022 14:04:16 -0800 Subject: [PATCH] [AMDGPU] Support for gfx940 flat lds opcodes Differential Revision: https://reviews.llvm.org/D121414 --- llvm/lib/Target/AMDGPU/FLATInstructions.td | 103 +++++++++++++++++++++ llvm/test/MC/AMDGPU/gfx940_asm_features.s | 68 ++++++++++++++ .../Disassembler/AMDGPU/gfx940_dasm_features.txt | 54 +++++++++++ 3 files changed, 225 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 9f086a29d16f..a2a9c3d7788b 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -243,6 +243,33 @@ multiclass FLAT_Global_Store_Pseudo { } } +class FLAT_Global_Load_LDS_Pseudo : FLAT_Pseudo< + opName, + (outs ), + !con( + !if(EnableSaddr, (ins SReg_64:$saddr, VGPR_32:$vaddr), (ins VReg_64:$vaddr)), + (ins flat_offset:$offset, CPol_0:$cpol)), + " $vaddr"#!if(EnableSaddr, ", $saddr", ", off")#"$offset$cpol"> { + let LGKM_CNT = 1; + let is_flat_global = 1; + let has_data = 0; + let has_vdst = 0; + let mayLoad = 1; + let mayStore = 1; + let has_saddr = 1; + let enabled_saddr = EnableSaddr; + let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); + let Uses = [M0, EXEC]; + let SchedRW = [WriteVMEM, WriteLDS]; +} + +multiclass FLAT_Global_Load_LDS_Pseudo { + def "" : FLAT_Global_Load_LDS_Pseudo, + GlobalSaddrTable<0, opName>; + def _SADDR : FLAT_Global_Load_LDS_Pseudo, + GlobalSaddrTable<1, opName>; +} + class FLAT_Global_Store_AddTid_Pseudo : FLAT_Pseudo< opName, @@ -366,6 +393,47 @@ multiclass FLAT_Scratch_Store_Pseudo { } } +class FLAT_Scratch_Load_LDS_Pseudo : FLAT_Pseudo< + opName, + (outs ), + !if(EnableSVE, + (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol), + !if(EnableSaddr, + (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol), + !if(EnableVaddr, + (ins VGPR_32:$vaddr, flat_offset:$offset, CPol:$cpol), + (ins flat_offset:$offset, CPol:$cpol)))), + " "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { + + let LGKM_CNT = 1; + let is_flat_scratch = 1; + let has_data = 0; + let has_vdst = 0; + let mayLoad = 1; + let mayStore = 1; + let has_saddr = 1; + let enabled_saddr = EnableSaddr; + let has_vaddr = EnableVaddr; + let has_sve = EnableSVE; + let sve = EnableVaddr; + let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"))); + let Uses = [M0, EXEC]; + let SchedRW = [WriteVMEM, WriteLDS]; +} + +multiclass FLAT_Scratch_Load_LDS_Pseudo { + def "" : FLAT_Scratch_Load_LDS_Pseudo, + FlatScratchInst; + def _SADDR : FLAT_Scratch_Load_LDS_Pseudo, + FlatScratchInst; + def _SVS : FLAT_Scratch_Load_LDS_Pseudo, + FlatScratchInst; + def _ST : FLAT_Scratch_Load_LDS_Pseudo, + FlatScratchInst; +} + class FLAT_AtomicNoRet_Pseudo pattern = []> : FLAT_Pseudo { @@ -772,6 +840,16 @@ defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2", let SubtargetPredicate = HasGFX10_BEncoding in defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub>; + +let SubtargetPredicate = isGFX940Plus in { + +defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ubyte">; +defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sbyte">; +defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ushort">; +defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">; +defm GLOBAL_LOAD_LDS_DWORD : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">; + +} // End let SubtargetPredicate = isGFX940Plus } // End is_flat_global = 1 @@ -803,6 +881,16 @@ defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>; defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>; +let SubtargetPredicate = isGFX940Plus in { + +defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ubyte">; +defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sbyte">; +defm SCRATCH_LOAD_LDS_USHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ushort">; +defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sshort">; +defm SCRATCH_LOAD_LDS_DWORD : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_dword">; + +} // End let SubtargetPredicate = isGFX940Plus + } // End SubtargetPredicate = HasFlatScratchInsts let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in { @@ -1613,6 +1701,13 @@ defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; +let AssemblerPredicate = isGFX940Plus in { +defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_vi <0x026>; +defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_vi <0x027>; +defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_vi <0x028>; +defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_vi <0x029>; +defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_vi <0x02a>; +} // End let AssemblerPredicate = isGFX940Plus defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>; defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>; @@ -1641,6 +1736,14 @@ defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Real_Atomics_vi <0x6a>; defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>; defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>; +let AssemblerPredicate = isGFX940Plus in { +defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_SVE_vi <0x026>; +defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_SVE_vi <0x027>; +defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_AllAddr_SVE_vi <0x028>; +defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_SVE_vi <0x029>; +defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_AllAddr_SVE_vi <0x02a>; +} // End let AssemblerPredicate = isGFX940Plus + defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_SVE_vi <0x10>; defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_SVE_vi <0x11>; defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_SVE_vi <0x12>; diff --git a/llvm/test/MC/AMDGPU/gfx940_asm_features.s b/llvm/test/MC/AMDGPU/gfx940_asm_features.s index c1c7c0dc3228..56d25976bfbe 100644 --- a/llvm/test/MC/AMDGPU/gfx940_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx940_asm_features.s @@ -130,6 +130,74 @@ ds_pk_add_rtn_bf16 v3, v2, v1 ds_pk_add_rtn_bf16 a3, v2, a1 // NOT-GFX940: error: instruction not supported on this GPU +// GFX940: global_load_lds_dword v[2:3], off ; encoding: [0x00,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00] +global_load_lds_dword v[2:3], off + +// NOT-GFX940: error: +// GFX940: global_load_lds_dword v[2:3], off sc0 nt sc1 ; encoding: [0x00,0x80,0xab,0xde,0x02,0x00,0x7f,0x00] +global_load_lds_dword v[2:3], off sc0 nt sc1 + +// NOT-GFX940: error: +// GFX940: global_load_lds_dword v[2:3], off offset:4 ; encoding: [0x04,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00] +global_load_lds_dword v[2:3], off offset:4 + +// NOT-GFX940: error: +// GFX940: global_load_lds_dword v2, s[4:5] offset:4 ; encoding: [0x04,0x80,0xa8,0xdc,0x02,0x00,0x04,0x00] +global_load_lds_dword v2, s[4:5] offset:4 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: global_load_lds_ubyte v[2:3], off ; encoding: [0x00,0x80,0x98,0xdc,0x02,0x00,0x7f,0x00] +global_load_lds_ubyte v[2:3], off + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: global_load_lds_sbyte v[2:3], off ; encoding: [0x00,0x80,0x9c,0xdc,0x02,0x00,0x7f,0x00] +global_load_lds_sbyte v[2:3], off + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: global_load_lds_sshort v[2:3], off ; encoding: [0x00,0x80,0xa4,0xdc,0x02,0x00,0x7f,0x00] +global_load_lds_sshort v[2:3], off + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: global_load_lds_ushort v[2:3], off ; encoding: [0x00,0x80,0xa0,0xdc,0x02,0x00,0x7f,0x00] +global_load_lds_ushort v[2:3], off + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: scratch_load_lds_dword v2, off ; encoding: [0x00,0x60,0xa8,0xdc,0x02,0x00,0x7f,0x00] +scratch_load_lds_dword v2, off + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: scratch_load_lds_dword v2, s4 ; encoding: [0x00,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00] +scratch_load_lds_dword v2, s4 + +// NOT-GFX940: error: +// GFX940: scratch_load_lds_dword v2, s4 offset:4 ; encoding: [0x04,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00] +scratch_load_lds_dword v2, s4 offset:4 + +// NOT-GFX940: error: +// GFX940: scratch_load_lds_dword off, s4 offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x04,0x00] +scratch_load_lds_dword off, s4 offset:4 + +// NOT-GFX940: error: +// GFX940: scratch_load_lds_dword off, off offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00] +scratch_load_lds_dword off, off offset:4 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: scratch_load_lds_ubyte v2, off ; encoding: [0x00,0x60,0x98,0xdc,0x02,0x00,0x7f,0x00] +scratch_load_lds_ubyte v2, off + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: scratch_load_lds_sbyte v2, off ; encoding: [0x00,0x60,0x9c,0xdc,0x02,0x00,0x7f,0x00] +scratch_load_lds_sbyte v2, off + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: scratch_load_lds_ushort v2, off ; encoding: [0x00,0x60,0xa0,0xdc,0x02,0x00,0x7f,0x00] +scratch_load_lds_ushort v2, off + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: scratch_load_lds_sshort v2, off ; encoding: [0x00,0x60,0xa4,0xdc,0x02,0x00,0x7f,0x00] +scratch_load_lds_sshort v2, off + +// NOT-GFX940: error: instruction not supported on this GPU // GFX940: v_mov_b64_e32 v[2:3], v[4:5] ; encoding: [0x04,0x71,0x04,0x7e] v_mov_b64 v[2:3], v[4:5] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt index 0922536d90af..39dcf53452a4 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt @@ -87,6 +87,60 @@ # GFX940: ds_pk_add_rtn_bf16 a3, v2, a1 ; encoding: [0x00,0x00,0x70,0xdb,0x02,0x01,0x00,0x03] 0x00,0x00,0x70,0xdb,0x02,0x01,0x00,0x03 +# GFX940: global_load_lds_dword v[2:3], off ; encoding: [0x00,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00 + +# GFX940: global_load_lds_dword v[2:3], off sc0 nt sc1 ; encoding: [0x00,0x80,0xab,0xde,0x02,0x00,0x7f,0x00] +0x00,0x80,0xab,0xde,0x02,0x00,0x7f,0x00 + +# GFX940: global_load_lds_dword v[2:3], off offset:4 ; encoding: [0x04,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00] +0x04,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00 + +# GFX940: global_load_lds_dword v2, s[4:5] offset:4 ; encoding: [0x04,0x80,0xa8,0xdc,0x02,0x00,0x04,0x00] +0x04,0x80,0xa8,0xdc,0x02,0x00,0x04,0x00 + +# GFX940: global_load_lds_ubyte v[2:3], off ; encoding: [0x00,0x80,0x98,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x80,0x98,0xdc,0x02,0x00,0x7f,0x00 + +# GFX940: global_load_lds_sbyte v[2:3], off ; encoding: [0x00,0x80,0x9c,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x80,0x9c,0xdc,0x02,0x00,0x7f,0x00 + +# GFX940: global_load_lds_sshort v[2:3], off ; encoding: [0x00,0x80,0xa4,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x80,0xa4,0xdc,0x02,0x00,0x7f,0x00 + +# GFX940: global_load_lds_ushort v[2:3], off ; encoding: [0x00,0x80,0xa0,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x80,0xa0,0xdc,0x02,0x00,0x7f,0x00 + +# GFX940: scratch_load_lds_dword v2, off ; encoding: [0x00,0x60,0xa8,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x60,0xa8,0xdc,0x02,0x00,0x7f,0x00 + +# GFX940: scratch_load_lds_dword v2, s4 ; encoding: [0x00,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00] +0x00,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00 + +# GFX940: scratch_load_lds_dword v2, s4 offset:4 ; encoding: [0x04,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00] +0x04,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00 + +# GFX940: scratch_load_lds_dword off, s4 offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x04,0x00] +0x04,0x40,0xa8,0xdc,0x00,0x00,0x04,0x00 + +# GFX940: scratch_load_lds_dword off, off offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00] +0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00 + +# GFX940: scratch_load_lds_dword off, off offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00] +0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00 + +# GFX940: scratch_load_lds_ubyte v2, off ; encoding: [0x00,0x60,0x98,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x60,0x98,0xdc,0x02,0x00,0x7f,0x00 + +# GFX940: scratch_load_lds_sbyte v2, off ; encoding: [0x00,0x60,0x9c,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x60,0x9c,0xdc,0x02,0x00,0x7f,0x00 + +# GFX940: scratch_load_lds_ushort v2, off ; encoding: [0x00,0x60,0xa0,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x60,0xa0,0xdc,0x02,0x00,0x7f,0x00 + +# GFX940: scratch_load_lds_sshort v2, off ; encoding: [0x00,0x60,0xa4,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x60,0xa4,0xdc,0x02,0x00,0x7f,0x00 + # GFX940: v_mov_b64_e32 v[2:3], v[4:5] ; encoding: [0x04,0x71,0x04,0x7e] 0x04,0x71,0x04,0x7e -- 2.11.4.GIT