1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX11 %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX12 %s
5 define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
6 ; GFX11-LABEL: load_2dmsaa:
7 ; GFX11: ; %bb.0: ; %main_body
8 ; GFX11-NEXT: image_msaa_load v[0:3], v[0:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm ; encoding: [0x98,0x01,0x60,0xf0,0x00,0x00,0x00,0x00]
9 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
10 ; GFX11-NEXT: ; return to shader part epilog
12 ; GFX12-LABEL: load_2dmsaa:
13 ; GFX12: ; %bb.0: ; %main_body
14 ; GFX12-NEXT: image_msaa_load v[0:3], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm ; encoding: [0x06,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x00]
15 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
16 ; GFX12-NEXT: ; return to shader part epilog
18 %v = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
22 define amdgpu_ps <4 x float> @load_2dmsaa_both(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) {
23 ; GFX11-LABEL: load_2dmsaa_both:
24 ; GFX11: ; %bb.0: ; %main_body
25 ; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v8, 0 ; encoding: [0x00,0x01,0x10,0xca,0x80,0x00,0x08,0x05]
26 ; GFX11-NEXT: v_dual_mov_b32 v7, v2 :: v_dual_mov_b32 v6, v1 ; encoding: [0x02,0x01,0x10,0xca,0x01,0x01,0x06,0x07]
27 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4) ; encoding: [0x42,0x02,0x87,0xbf]
28 ; GFX11-NEXT: v_mov_b32_e32 v9, v8 ; encoding: [0x08,0x03,0x12,0x7e]
29 ; GFX11-NEXT: v_mov_b32_e32 v10, v8 ; encoding: [0x08,0x03,0x14,0x7e]
30 ; GFX11-NEXT: v_mov_b32_e32 v11, v8 ; encoding: [0x08,0x03,0x16,0x7e]
31 ; GFX11-NEXT: v_mov_b32_e32 v12, v8 ; encoding: [0x08,0x03,0x18,0x7e]
32 ; GFX11-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_mov_b32 v1, v9 ; encoding: [0x08,0x01,0x10,0xca,0x09,0x01,0x00,0x00]
33 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; encoding: [0x93,0x01,0x87,0xbf]
34 ; GFX11-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v3, v11 ; encoding: [0x0a,0x01,0x10,0xca,0x0b,0x01,0x02,0x02]
35 ; GFX11-NEXT: v_mov_b32_e32 v4, v12 ; encoding: [0x0c,0x03,0x08,0x7e]
36 ; GFX11-NEXT: image_msaa_load v[0:4], v[5:7], s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe lwe ; encoding: [0x98,0x02,0x60,0xf0,0x05,0x00,0x60,0x00]
37 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
38 ; GFX11-NEXT: global_store_b32 v8, v4, s[8:9] ; encoding: [0x00,0x00,0x6a,0xdc,0x08,0x04,0x08,0x00]
39 ; GFX11-NEXT: ; return to shader part epilog
41 ; GFX12-LABEL: load_2dmsaa_both:
42 ; GFX12: ; %bb.0: ; %main_body
43 ; GFX12-NEXT: v_dual_mov_b32 v7, v0 :: v_dual_mov_b32 v8, 0 ; encoding: [0x00,0x01,0x10,0xca,0x80,0x00,0x08,0x07]
44 ; GFX12-NEXT: v_dual_mov_b32 v5, v2 :: v_dual_mov_b32 v6, v1 ; encoding: [0x02,0x01,0x10,0xca,0x01,0x01,0x06,0x05]
45 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; encoding: [0x22,0x01,0x87,0xbf]
46 ; GFX12-NEXT: v_dual_mov_b32 v9, v8 :: v_dual_mov_b32 v10, v8 ; encoding: [0x08,0x01,0x10,0xca,0x08,0x01,0x0a,0x09]
47 ; GFX12-NEXT: v_dual_mov_b32 v11, v8 :: v_dual_mov_b32 v12, v8 ; encoding: [0x08,0x01,0x10,0xca,0x08,0x01,0x0c,0x0b]
48 ; GFX12-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_mov_b32 v1, v9 ; encoding: [0x08,0x01,0x10,0xca,0x09,0x01,0x00,0x00]
49 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) ; encoding: [0x92,0x01,0x87,0xbf]
50 ; GFX12-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v3, v11 ; encoding: [0x0a,0x01,0x10,0xca,0x0b,0x01,0x02,0x02]
51 ; GFX12-NEXT: v_mov_b32_e32 v4, v12 ; encoding: [0x0c,0x03,0x08,0x7e]
52 ; GFX12-NEXT: image_msaa_load v[0:4], [v7, v6, v5], s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe lwe ; encoding: [0x0e,0x20,0x86,0xe4,0x00,0x01,0x00,0x00,0x07,0x06,0x05,0x00]
53 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
54 ; GFX12-NEXT: global_store_b32 v8, v4, s[8:9] ; encoding: [0x08,0x80,0x06,0xee,0x00,0x00,0x00,0x02,0x08,0x00,0x00,0x00]
55 ; GFX12-NEXT: ; return to shader part epilog
57 %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32i32.i32(i32 2, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0)
58 %v.vec = extractvalue {<4 x float>, i32} %v, 0
59 %v.err = extractvalue {<4 x float>, i32} %v, 1
60 store i32 %v.err, ptr addrspace(1) %out, align 4
61 ret <4 x float> %v.vec
64 define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
65 ; GFX11-LABEL: load_2darraymsaa:
66 ; GFX11: ; %bb.0: ; %main_body
67 ; GFX11-NEXT: image_msaa_load v[0:3], v[0:3], s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ; encoding: [0x9c,0x04,0x60,0xf0,0x00,0x00,0x00,0x00]
68 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
69 ; GFX11-NEXT: ; return to shader part epilog
71 ; GFX12-LABEL: load_2darraymsaa:
72 ; GFX12: ; %bb.0: ; %main_body
73 ; GFX12-NEXT: image_msaa_load v[0:3], [v0, v1, v2, v3], s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ; encoding: [0x07,0x20,0x06,0xe5,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x03]
74 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
75 ; GFX12-NEXT: ; return to shader part epilog
77 %v = call <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32(i32 4, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
81 define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
82 ; GFX11-LABEL: load_2darraymsaa_tfe:
83 ; GFX11: ; %bb.0: ; %main_body
84 ; GFX11-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v8, v3 ; encoding: [0x80,0x00,0x10,0xca,0x03,0x01,0x08,0x09]
85 ; GFX11-NEXT: v_dual_mov_b32 v7, v2 :: v_dual_mov_b32 v6, v1 ; encoding: [0x02,0x01,0x10,0xca,0x01,0x01,0x06,0x07]
86 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4) ; encoding: [0x42,0x02,0x87,0xbf]
87 ; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v10, v9 ; encoding: [0x00,0x01,0x10,0xca,0x09,0x01,0x0a,0x05]
88 ; GFX11-NEXT: v_mov_b32_e32 v11, v9 ; encoding: [0x09,0x03,0x16,0x7e]
89 ; GFX11-NEXT: v_mov_b32_e32 v12, v9 ; encoding: [0x09,0x03,0x18,0x7e]
90 ; GFX11-NEXT: v_mov_b32_e32 v13, v9 ; encoding: [0x09,0x03,0x1a,0x7e]
91 ; GFX11-NEXT: v_dual_mov_b32 v0, v9 :: v_dual_mov_b32 v1, v10 ; encoding: [0x09,0x01,0x10,0xca,0x0a,0x01,0x00,0x00]
92 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; encoding: [0x93,0x01,0x87,0xbf]
93 ; GFX11-NEXT: v_dual_mov_b32 v2, v11 :: v_dual_mov_b32 v3, v12 ; encoding: [0x0b,0x01,0x10,0xca,0x0c,0x01,0x02,0x02]
94 ; GFX11-NEXT: v_mov_b32_e32 v4, v13 ; encoding: [0x0d,0x03,0x08,0x7e]
95 ; GFX11-NEXT: image_msaa_load v[0:4], v[5:8], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe ; encoding: [0x9c,0x08,0x60,0xf0,0x05,0x00,0x20,0x00]
96 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
97 ; GFX11-NEXT: global_store_b32 v9, v4, s[8:9] ; encoding: [0x00,0x00,0x6a,0xdc,0x09,0x04,0x08,0x00]
98 ; GFX11-NEXT: ; return to shader part epilog
100 ; GFX12-LABEL: load_2darraymsaa_tfe:
101 ; GFX12: ; %bb.0: ; %main_body
102 ; GFX12-NEXT: v_mov_b32_e32 v9, 0 ; encoding: [0x80,0x02,0x12,0x7e]
103 ; GFX12-NEXT: v_dual_mov_b32 v5, v3 :: v_dual_mov_b32 v6, v2 ; encoding: [0x03,0x01,0x10,0xca,0x02,0x01,0x06,0x05]
104 ; GFX12-NEXT: v_dual_mov_b32 v7, v1 :: v_dual_mov_b32 v8, v0 ; encoding: [0x01,0x01,0x10,0xca,0x00,0x01,0x08,0x07]
105 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; encoding: [0x23,0x01,0x87,0xbf]
106 ; GFX12-NEXT: v_dual_mov_b32 v10, v9 :: v_dual_mov_b32 v11, v9 ; encoding: [0x09,0x01,0x10,0xca,0x09,0x01,0x0a,0x0a]
107 ; GFX12-NEXT: v_dual_mov_b32 v12, v9 :: v_dual_mov_b32 v13, v9 ; encoding: [0x09,0x01,0x10,0xca,0x09,0x01,0x0c,0x0c]
108 ; GFX12-NEXT: v_dual_mov_b32 v0, v9 :: v_dual_mov_b32 v1, v10 ; encoding: [0x09,0x01,0x10,0xca,0x0a,0x01,0x00,0x00]
109 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) ; encoding: [0x92,0x01,0x87,0xbf]
110 ; GFX12-NEXT: v_dual_mov_b32 v2, v11 :: v_dual_mov_b32 v3, v12 ; encoding: [0x0b,0x01,0x10,0xca,0x0c,0x01,0x02,0x02]
111 ; GFX12-NEXT: v_mov_b32_e32 v4, v13 ; encoding: [0x0d,0x03,0x08,0x7e]
112 ; GFX12-NEXT: image_msaa_load v[0:4], [v8, v7, v6, v5], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe ; encoding: [0x0f,0x20,0x06,0xe6,0x00,0x00,0x00,0x00,0x08,0x07,0x06,0x05]
113 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
114 ; GFX12-NEXT: global_store_b32 v9, v4, s[8:9] ; encoding: [0x08,0x80,0x06,0xee,0x00,0x00,0x00,0x02,0x09,0x00,0x00,0x00]
115 ; GFX12-NEXT: ; return to shader part epilog
117 %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32i32.i32(i32 8, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
118 %v.vec = extractvalue {<4 x float>, i32} %v, 0
119 %v.err = extractvalue {<4 x float>, i32} %v, 1
120 store i32 %v.err, ptr addrspace(1) %out, align 4
121 ret <4 x float> %v.vec
124 define amdgpu_ps <4 x float> @load_2dmsaa_glc(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
125 ; GFX11-LABEL: load_2dmsaa_glc:
126 ; GFX11: ; %bb.0: ; %main_body
127 ; GFX11-NEXT: image_msaa_load v[0:3], v[0:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc ; encoding: [0x98,0x41,0x60,0xf0,0x00,0x00,0x00,0x00]
128 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
129 ; GFX11-NEXT: ; return to shader part epilog
131 ; GFX12-LABEL: load_2dmsaa_glc:
132 ; GFX12: ; %bb.0: ; %main_body
133 ; GFX12-NEXT: image_msaa_load v[0:3], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm th:TH_LOAD_NT ; encoding: [0x06,0x20,0x46,0xe4,0x00,0x00,0x10,0x00,0x00,0x01,0x02,0x00]
134 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
135 ; GFX12-NEXT: ; return to shader part epilog
137 %v = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 1)
141 define amdgpu_ps <4 x float> @load_2dmsaa_slc(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
142 ; GFX11-LABEL: load_2dmsaa_slc:
143 ; GFX11: ; %bb.0: ; %main_body
144 ; GFX11-NEXT: image_msaa_load v[0:3], v[0:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm slc ; encoding: [0x98,0x11,0x60,0xf0,0x00,0x00,0x00,0x00]
145 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
146 ; GFX11-NEXT: ; return to shader part epilog
148 ; GFX12-LABEL: load_2dmsaa_slc:
149 ; GFX12: ; %bb.0: ; %main_body
150 ; GFX12-NEXT: image_msaa_load v[0:3], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm th:TH_LOAD_HT ; encoding: [0x06,0x20,0x46,0xe4,0x00,0x00,0x20,0x00,0x00,0x01,0x02,0x00]
151 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
152 ; GFX12-NEXT: ; return to shader part epilog
154 %v = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 2)
158 define amdgpu_ps <4 x float> @load_2dmsaa_glc_slc(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
159 ; GFX11-LABEL: load_2dmsaa_glc_slc:
160 ; GFX11: ; %bb.0: ; %main_body
161 ; GFX11-NEXT: image_msaa_load v[0:3], v[0:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc slc ; encoding: [0x98,0x51,0x60,0xf0,0x00,0x00,0x00,0x00]
162 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
163 ; GFX11-NEXT: ; return to shader part epilog
165 ; GFX12-LABEL: load_2dmsaa_glc_slc:
166 ; GFX12: ; %bb.0: ; %main_body
167 ; GFX12-NEXT: image_msaa_load v[0:3], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm th:TH_LOAD_LU ; encoding: [0x06,0x20,0x46,0xe4,0x00,0x00,0x30,0x00,0x00,0x01,0x02,0x00]
168 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
169 ; GFX12-NEXT: ; return to shader part epilog
171 %v = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 3)
175 define amdgpu_ps <4 x half> @load_2dmsaa_d16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
176 ; GFX11-LABEL: load_2dmsaa_d16:
177 ; GFX11: ; %bb.0: ; %main_body
178 ; GFX11-NEXT: image_msaa_load v[0:1], v[0:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm d16 ; encoding: [0x98,0x01,0x62,0xf0,0x00,0x00,0x00,0x00]
179 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
180 ; GFX11-NEXT: ; return to shader part epilog
182 ; GFX12-LABEL: load_2dmsaa_d16:
183 ; GFX12: ; %bb.0: ; %main_body
184 ; GFX12-NEXT: image_msaa_load v[0:1], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm d16 ; encoding: [0x26,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x00]
185 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
186 ; GFX12-NEXT: ; return to shader part epilog
188 %v = call <4 x half> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
192 define amdgpu_ps <4 x half> @load_2dmsaa_tfe_d16(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) {
193 ; GFX11-LABEL: load_2dmsaa_tfe_d16:
194 ; GFX11: ; %bb.0: ; %main_body
195 ; GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v6, 0 ; encoding: [0x00,0x01,0x10,0xca,0x80,0x00,0x06,0x03]
196 ; GFX11-NEXT: v_dual_mov_b32 v5, v2 :: v_dual_mov_b32 v4, v1 ; encoding: [0x02,0x01,0x10,0xca,0x01,0x01,0x04,0x05]
197 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; encoding: [0x22,0x01,0x87,0xbf]
198 ; GFX11-NEXT: v_mov_b32_e32 v7, v6 ; encoding: [0x06,0x03,0x0e,0x7e]
199 ; GFX11-NEXT: v_mov_b32_e32 v8, v6 ; encoding: [0x06,0x03,0x10,0x7e]
200 ; GFX11-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7 ; encoding: [0x06,0x01,0x10,0xca,0x07,0x01,0x00,0x00]
201 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; encoding: [0x02,0x00,0x87,0xbf]
202 ; GFX11-NEXT: v_mov_b32_e32 v2, v8 ; encoding: [0x08,0x03,0x04,0x7e]
203 ; GFX11-NEXT: image_msaa_load v[0:2], v[3:5], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe d16 ; encoding: [0x98,0x01,0x62,0xf0,0x03,0x00,0x20,0x00]
204 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
205 ; GFX11-NEXT: global_store_b32 v6, v2, s[8:9] ; encoding: [0x00,0x00,0x6a,0xdc,0x06,0x02,0x08,0x00]
206 ; GFX11-NEXT: ; return to shader part epilog
208 ; GFX12-LABEL: load_2dmsaa_tfe_d16:
209 ; GFX12: ; %bb.0: ; %main_body
210 ; GFX12-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0 ; encoding: [0x00,0x01,0x10,0xca,0x80,0x00,0x06,0x05]
211 ; GFX12-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v4, v1 ; encoding: [0x02,0x01,0x10,0xca,0x01,0x01,0x04,0x03]
212 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; encoding: [0x92,0x00,0x87,0xbf]
213 ; GFX12-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v8, v6 ; encoding: [0x06,0x01,0x10,0xca,0x06,0x01,0x08,0x07]
214 ; GFX12-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7 ; encoding: [0x06,0x01,0x10,0xca,0x07,0x01,0x00,0x00]
215 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) ; encoding: [0x02,0x00,0x87,0xbf]
216 ; GFX12-NEXT: v_mov_b32_e32 v2, v8 ; encoding: [0x08,0x03,0x04,0x7e]
217 ; GFX12-NEXT: image_msaa_load v[0:2], [v5, v4, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe d16 ; encoding: [0x2e,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x05,0x04,0x03,0x00]
218 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
219 ; GFX12-NEXT: global_store_b32 v6, v2, s[8:9] ; encoding: [0x08,0x80,0x06,0xee,0x00,0x00,0x00,0x01,0x06,0x00,0x00,0x00]
220 ; GFX12-NEXT: ; return to shader part epilog
222 %v = call {<4 x half>,i32} @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16i32.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
223 %v.vec = extractvalue {<4 x half>, i32} %v, 0
224 %v.err = extractvalue {<4 x half>, i32} %v, 1
225 store i32 %v.err, ptr addrspace(1) %out, align 4
226 ret <4 x half> %v.vec
229 define amdgpu_ps <4 x half> @load_2darraymsaa_d16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
230 ; GFX11-LABEL: load_2darraymsaa_d16:
231 ; GFX11: ; %bb.0: ; %main_body
232 ; GFX11-NEXT: image_msaa_load v[0:1], v[0:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm d16 ; encoding: [0x9c,0x01,0x62,0xf0,0x00,0x00,0x00,0x00]
233 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
234 ; GFX11-NEXT: ; return to shader part epilog
236 ; GFX12-LABEL: load_2darraymsaa_d16:
237 ; GFX12: ; %bb.0: ; %main_body
238 ; GFX12-NEXT: image_msaa_load v[0:1], [v0, v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm d16 ; encoding: [0x27,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x03]
239 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
240 ; GFX12-NEXT: ; return to shader part epilog
242 %v = call <4 x half> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f16.i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
246 define amdgpu_ps <4 x half> @load_2darraymsaa_tfe_d16(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
247 ; GFX11-LABEL: load_2darraymsaa_tfe_d16:
248 ; GFX11: ; %bb.0: ; %main_body
249 ; GFX11-NEXT: v_dual_mov_b32 v6, v0 :: v_dual_mov_b32 v7, 0 ; encoding: [0x00,0x01,0x10,0xca,0x80,0x00,0x06,0x06]
250 ; GFX11-NEXT: v_dual_mov_b32 v4, v2 :: v_dual_mov_b32 v5, v1 ; encoding: [0x02,0x01,0x10,0xca,0x01,0x01,0x04,0x04]
251 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; encoding: [0x22,0x01,0x87,0xbf]
252 ; GFX11-NEXT: v_mov_b32_e32 v8, v7 ; encoding: [0x07,0x03,0x10,0x7e]
253 ; GFX11-NEXT: v_mov_b32_e32 v9, v7 ; encoding: [0x07,0x03,0x12,0x7e]
254 ; GFX11-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8 ; encoding: [0x07,0x01,0x10,0xca,0x08,0x01,0x00,0x00]
255 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; encoding: [0x02,0x00,0x87,0xbf]
256 ; GFX11-NEXT: v_mov_b32_e32 v2, v9 ; encoding: [0x09,0x03,0x04,0x7e]
257 ; GFX11-NEXT: image_msaa_load v[0:2], [v6, v5, v4, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe d16 ; encoding: [0x9d,0x01,0x62,0xf0,0x06,0x00,0x20,0x00,0x05,0x04,0x03,0x00]
258 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
259 ; GFX11-NEXT: global_store_b32 v7, v2, s[8:9] ; encoding: [0x00,0x00,0x6a,0xdc,0x07,0x02,0x08,0x00]
260 ; GFX11-NEXT: ; return to shader part epilog
262 ; GFX12-LABEL: load_2darraymsaa_tfe_d16:
263 ; GFX12: ; %bb.0: ; %main_body
264 ; GFX12-NEXT: v_dual_mov_b32 v6, v0 :: v_dual_mov_b32 v7, 0 ; encoding: [0x00,0x01,0x10,0xca,0x80,0x00,0x06,0x06]
265 ; GFX12-NEXT: v_dual_mov_b32 v4, v2 :: v_dual_mov_b32 v5, v1 ; encoding: [0x02,0x01,0x10,0xca,0x01,0x01,0x04,0x04]
266 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; encoding: [0x92,0x00,0x87,0xbf]
267 ; GFX12-NEXT: v_dual_mov_b32 v8, v7 :: v_dual_mov_b32 v9, v7 ; encoding: [0x07,0x01,0x10,0xca,0x07,0x01,0x08,0x08]
268 ; GFX12-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8 ; encoding: [0x07,0x01,0x10,0xca,0x08,0x01,0x00,0x00]
269 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) ; encoding: [0x02,0x00,0x87,0xbf]
270 ; GFX12-NEXT: v_mov_b32_e32 v2, v9 ; encoding: [0x09,0x03,0x04,0x7e]
271 ; GFX12-NEXT: image_msaa_load v[0:2], [v6, v5, v4, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe d16 ; encoding: [0x2f,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x06,0x05,0x04,0x03]
272 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
273 ; GFX12-NEXT: global_store_b32 v7, v2, s[8:9] ; encoding: [0x08,0x80,0x06,0xee,0x00,0x00,0x00,0x01,0x07,0x00,0x00,0x00]
274 ; GFX12-NEXT: ; return to shader part epilog
276 %v = call {<4 x half>,i32} @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f16i32.i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
277 %v.vec = extractvalue {<4 x half>, i32} %v, 0
278 %v.err = extractvalue {<4 x half>, i32} %v, 1
279 store i32 %v.err, ptr addrspace(1) %out, align 4
280 ret <4 x half> %v.vec
283 define amdgpu_ps <4 x float> @load_2dmsaa_a16(<8 x i32> inreg %rsrc, i16 %s, i16 %t, i16 %fragid) {
284 ; GFX11-LABEL: load_2dmsaa_a16:
285 ; GFX11: ; %bb.0: ; %main_body
286 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; encoding: [0x00,0x00,0x44,0xd6,0x01,0x01,0xfe,0x03,0x00,0x01,0x04,0x05]
287 ; GFX11-NEXT: image_msaa_load v[0:3], [v0, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16 ; encoding: [0x99,0x01,0x61,0xf0,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
288 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
289 ; GFX11-NEXT: ; return to shader part epilog
291 ; GFX12-LABEL: load_2dmsaa_a16:
292 ; GFX12: ; %bb.0: ; %main_body
293 ; GFX12-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; encoding: [0x00,0x00,0x44,0xd6,0x01,0x01,0xfe,0x03,0x00,0x01,0x04,0x05]
294 ; GFX12-NEXT: image_msaa_load v[0:3], [v0, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16 ; encoding: [0x46,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x00]
295 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
296 ; GFX12-NEXT: ; return to shader part epilog
298 %v = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i16(i32 1, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
302 define amdgpu_ps <4 x float> @load_2darraymsaa_a16(<8 x i32> inreg %rsrc, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
303 ; GFX11-LABEL: load_2darraymsaa_a16:
304 ; GFX11: ; %bb.0: ; %main_body
305 ; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 ; encoding: [0x02,0x00,0x44,0xd6,0x03,0x05,0xfe,0x03,0x00,0x01,0x04,0x05]
306 ; GFX11-NEXT: v_perm_b32 v1, v1, v0, 0x5040100 ; encoding: [0x01,0x00,0x44,0xd6,0x01,0x01,0xfe,0x03,0x00,0x01,0x04,0x05]
307 ; GFX11-NEXT: image_msaa_load v[0:3], v[1:2], s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 ; encoding: [0x9c,0x04,0x61,0xf0,0x01,0x00,0x00,0x00]
308 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
309 ; GFX11-NEXT: ; return to shader part epilog
311 ; GFX12-LABEL: load_2darraymsaa_a16:
312 ; GFX12: ; %bb.0: ; %main_body
313 ; GFX12-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 ; encoding: [0x02,0x00,0x44,0xd6,0x03,0x05,0xfe,0x03,0x00,0x01,0x04,0x05]
314 ; GFX12-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; encoding: [0x00,0x00,0x44,0xd6,0x01,0x01,0xfe,0x03,0x00,0x01,0x04,0x05]
315 ; GFX12-NEXT: image_msaa_load v[0:3], [v0, v2], s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 ; encoding: [0x47,0x20,0x06,0xe5,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x00]
316 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
317 ; GFX12-NEXT: ; return to shader part epilog
319 %v = call <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i16(i32 4, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
323 declare <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
324 declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
325 declare <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
326 declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
328 declare <4 x half> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
329 declare {<4 x half>,i32} @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
330 declare <4 x half> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f16.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
331 declare {<4 x half>,i32} @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f16i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
333 declare <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
334 declare <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
336 attributes #0 = { nounwind }
337 attributes #1 = { nounwind readonly }
338 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: