1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding | FileCheck -enable-var-scope -check-prefixes=PREGFX10-UNPACKED %s
3 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s
4 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s
5 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX10-PACKED %s
6 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX11-PACKED %s
7 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX12-PACKED %s
9 define amdgpu_ps half @tbuffer_load_d16_x(<4 x i32> inreg %rsrc) {
10 ; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_x:
11 ; PREGFX10-UNPACKED: ; %bb.0: ; %main_body
12 ; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
13 ; PREGFX10-UNPACKED-NEXT: tbuffer_load_format_d16_x v0, v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen ; encoding: [0x00,0x20,0xb4,0xe8,0x00,0x00,0x00,0x80]
14 ; PREGFX10-UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
15 ; PREGFX10-UNPACKED-NEXT: ; return to shader part epilog
17 ; PREGFX10-PACKED-LABEL: tbuffer_load_d16_x:
18 ; PREGFX10-PACKED: ; %bb.0: ; %main_body
19 ; PREGFX10-PACKED-NEXT: v_mov_b32_e32 v0, 0
20 ; PREGFX10-PACKED-NEXT: tbuffer_load_format_d16_x v0, v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen
21 ; PREGFX10-PACKED-NEXT: s_waitcnt vmcnt(0)
22 ; PREGFX10-PACKED-NEXT: ; return to shader part epilog
24 ; GFX10-PACKED-LABEL: tbuffer_load_d16_x:
25 ; GFX10-PACKED: ; %bb.0: ; %main_body
26 ; GFX10-PACKED-NEXT: v_mov_b32_e32 v0, 0
27 ; GFX10-PACKED-NEXT: tbuffer_load_format_d16_x v0, v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen
28 ; GFX10-PACKED-NEXT: s_waitcnt vmcnt(0)
29 ; GFX10-PACKED-NEXT: ; return to shader part epilog
31 ; GFX11-PACKED-LABEL: tbuffer_load_d16_x:
32 ; GFX11-PACKED: ; %bb.0: ; %main_body
33 ; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, 0
34 ; GFX11-PACKED-NEXT: tbuffer_load_d16_format_x v0, v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen
35 ; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0)
36 ; GFX11-PACKED-NEXT: ; return to shader part epilog
38 ; GFX12-PACKED-LABEL: tbuffer_load_d16_x:
39 ; GFX12-PACKED: ; %bb.0: ; %main_body
40 ; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, 0
41 ; GFX12-PACKED-NEXT: tbuffer_load_d16_format_x v0, v0, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen
42 ; GFX12-PACKED-NEXT: s_wait_loadcnt 0x0
43 ; GFX12-PACKED-NEXT: ; return to shader part epilog
45 %data = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0)
49 define amdgpu_ps half @tbuffer_load_d16_xy(<4 x i32> inreg %rsrc) {
50 ; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xy:
51 ; PREGFX10-UNPACKED: ; %bb.0: ; %main_body
52 ; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
53 ; PREGFX10-UNPACKED-NEXT: tbuffer_load_format_d16_xy v[0:1], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen ; encoding: [0x00,0xa0,0xb4,0xe8,0x00,0x00,0x00,0x80]
54 ; PREGFX10-UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
55 ; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, v1 ; encoding: [0x01,0x03,0x00,0x7e]
56 ; PREGFX10-UNPACKED-NEXT: ; return to shader part epilog
58 ; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xy:
59 ; PREGFX10-PACKED: ; %bb.0: ; %main_body
60 ; PREGFX10-PACKED-NEXT: v_mov_b32_e32 v0, 0
61 ; PREGFX10-PACKED-NEXT: tbuffer_load_format_d16_xy v0, v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen
62 ; PREGFX10-PACKED-NEXT: s_waitcnt vmcnt(0)
63 ; PREGFX10-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0
64 ; PREGFX10-PACKED-NEXT: ; return to shader part epilog
66 ; GFX10-PACKED-LABEL: tbuffer_load_d16_xy:
67 ; GFX10-PACKED: ; %bb.0: ; %main_body
68 ; GFX10-PACKED-NEXT: v_mov_b32_e32 v0, 0
69 ; GFX10-PACKED-NEXT: tbuffer_load_format_d16_xy v0, v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen
70 ; GFX10-PACKED-NEXT: s_waitcnt vmcnt(0)
71 ; GFX10-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0
72 ; GFX10-PACKED-NEXT: ; return to shader part epilog
74 ; GFX11-PACKED-LABEL: tbuffer_load_d16_xy:
75 ; GFX11-PACKED: ; %bb.0: ; %main_body
76 ; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, 0
77 ; GFX11-PACKED-NEXT: tbuffer_load_d16_format_xy v0, v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen
78 ; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0)
79 ; GFX11-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0
80 ; GFX11-PACKED-NEXT: ; return to shader part epilog
82 ; GFX12-PACKED-LABEL: tbuffer_load_d16_xy:
83 ; GFX12-PACKED: ; %bb.0: ; %main_body
84 ; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, 0
85 ; GFX12-PACKED-NEXT: tbuffer_load_d16_format_xy v0, v0, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen
86 ; GFX12-PACKED-NEXT: s_wait_loadcnt 0x0
87 ; GFX12-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0
88 ; GFX12-PACKED-NEXT: ; return to shader part epilog
90 %data = call <2 x half> @llvm.amdgcn.struct.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0)
91 %elt = extractelement <2 x half> %data, i32 1
95 define amdgpu_ps half @tbuffer_load_d16_xyz(<4 x i32> inreg %rsrc) {
96 ; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xyz:
97 ; PREGFX10-UNPACKED: ; %bb.0: ; %main_body
98 ; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
99 ; PREGFX10-UNPACKED-NEXT: tbuffer_load_format_d16_xyz v[0:2], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen ; encoding: [0x00,0x20,0xb5,0xe8,0x00,0x00,0x00,0x80]
100 ; PREGFX10-UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
101 ; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, v2 ; encoding: [0x02,0x03,0x00,0x7e]
102 ; PREGFX10-UNPACKED-NEXT: ; return to shader part epilog
104 ; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xyz:
105 ; PREGFX10-PACKED: ; %bb.0: ; %main_body
106 ; PREGFX10-PACKED-NEXT: v_mov_b32_e32 v0, 0
107 ; PREGFX10-PACKED-NEXT: tbuffer_load_format_d16_xyz v[0:1], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen
108 ; PREGFX10-PACKED-NEXT: s_waitcnt vmcnt(0)
109 ; PREGFX10-PACKED-NEXT: v_mov_b32_e32 v0, v1
110 ; PREGFX10-PACKED-NEXT: ; return to shader part epilog
112 ; GFX10-PACKED-LABEL: tbuffer_load_d16_xyz:
113 ; GFX10-PACKED: ; %bb.0: ; %main_body
114 ; GFX10-PACKED-NEXT: v_mov_b32_e32 v0, 0
115 ; GFX10-PACKED-NEXT: tbuffer_load_format_d16_xyz v[0:1], v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen
116 ; GFX10-PACKED-NEXT: s_waitcnt vmcnt(0)
117 ; GFX10-PACKED-NEXT: v_mov_b32_e32 v0, v1
118 ; GFX10-PACKED-NEXT: ; return to shader part epilog
120 ; GFX11-PACKED-LABEL: tbuffer_load_d16_xyz:
121 ; GFX11-PACKED: ; %bb.0: ; %main_body
122 ; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, 0
123 ; GFX11-PACKED-NEXT: tbuffer_load_d16_format_xyz v[0:1], v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen
124 ; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0)
125 ; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, v1
126 ; GFX11-PACKED-NEXT: ; return to shader part epilog
128 ; GFX12-PACKED-LABEL: tbuffer_load_d16_xyz:
129 ; GFX12-PACKED: ; %bb.0: ; %main_body
130 ; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, 0
131 ; GFX12-PACKED-NEXT: tbuffer_load_d16_format_xyz v[0:1], v0, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen
132 ; GFX12-PACKED-NEXT: s_wait_loadcnt 0x0
133 ; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, v1
134 ; GFX12-PACKED-NEXT: ; return to shader part epilog
136 %data = call <3 x half> @llvm.amdgcn.struct.tbuffer.load.v3f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0)
137 %elt = extractelement <3 x half> %data, i32 2
141 define amdgpu_ps half @tbuffer_load_d16_xyzw(<4 x i32> inreg %rsrc) {
142 ; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xyzw:
143 ; PREGFX10-UNPACKED: ; %bb.0: ; %main_body
144 ; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
145 ; PREGFX10-UNPACKED-NEXT: tbuffer_load_format_d16_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen ; encoding: [0x00,0xa0,0xb5,0xe8,0x00,0x00,0x00,0x80]
146 ; PREGFX10-UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
147 ; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, v3 ; encoding: [0x03,0x03,0x00,0x7e]
148 ; PREGFX10-UNPACKED-NEXT: ; return to shader part epilog
150 ; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xyzw:
151 ; PREGFX10-PACKED: ; %bb.0: ; %main_body
152 ; PREGFX10-PACKED-NEXT: v_mov_b32_e32 v0, 0
153 ; PREGFX10-PACKED-NEXT: tbuffer_load_format_d16_xyzw v[0:1], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen
154 ; PREGFX10-PACKED-NEXT: s_waitcnt vmcnt(0)
155 ; PREGFX10-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1
156 ; PREGFX10-PACKED-NEXT: ; return to shader part epilog
158 ; GFX10-PACKED-LABEL: tbuffer_load_d16_xyzw:
159 ; GFX10-PACKED: ; %bb.0: ; %main_body
160 ; GFX10-PACKED-NEXT: v_mov_b32_e32 v0, 0
161 ; GFX10-PACKED-NEXT: tbuffer_load_format_d16_xyzw v[0:1], v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen
162 ; GFX10-PACKED-NEXT: s_waitcnt vmcnt(0)
163 ; GFX10-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1
164 ; GFX10-PACKED-NEXT: ; return to shader part epilog
166 ; GFX11-PACKED-LABEL: tbuffer_load_d16_xyzw:
167 ; GFX11-PACKED: ; %bb.0: ; %main_body
168 ; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, 0
169 ; GFX11-PACKED-NEXT: tbuffer_load_d16_format_xyzw v[0:1], v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen
170 ; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0)
171 ; GFX11-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1
172 ; GFX11-PACKED-NEXT: ; return to shader part epilog
174 ; GFX12-PACKED-LABEL: tbuffer_load_d16_xyzw:
175 ; GFX12-PACKED: ; %bb.0: ; %main_body
176 ; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, 0
177 ; GFX12-PACKED-NEXT: tbuffer_load_d16_format_xyzw v[0:1], v0, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen
178 ; GFX12-PACKED-NEXT: s_wait_loadcnt 0x0
179 ; GFX12-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1
180 ; GFX12-PACKED-NEXT: ; return to shader part epilog
182 %data = call <4 x half> @llvm.amdgcn.struct.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0)
183 %elt = extractelement <4 x half> %data, i32 3
187 declare half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32, i32)
188 declare <2 x half> @llvm.amdgcn.struct.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32, i32)
189 declare <3 x half> @llvm.amdgcn.struct.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32, i32, i32)
190 declare <4 x half> @llvm.amdgcn.struct.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32, i32)