1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck --check-prefix=GFX7 %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck --check-prefix=GFX8 %s
4 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s
5 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s
6 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11 %s
8 define bfloat @raw_ptr_buffer_load_bf16(ptr addrspace(8) inreg %rsrc) {
9 ; GFX7-LABEL: raw_ptr_buffer_load_bf16:
11 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12 ; GFX7-NEXT: buffer_load_ushort v0, off, s[16:19], 0
13 ; GFX7-NEXT: s_waitcnt vmcnt(0)
14 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
15 ; GFX7-NEXT: s_setpc_b64 s[30:31]
17 ; GFX8-LABEL: raw_ptr_buffer_load_bf16:
19 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20 ; GFX8-NEXT: buffer_load_ushort v0, off, s[16:19], 0
21 ; GFX8-NEXT: s_waitcnt vmcnt(0)
22 ; GFX8-NEXT: s_setpc_b64 s[30:31]
24 ; GFX9-LABEL: raw_ptr_buffer_load_bf16:
26 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27 ; GFX9-NEXT: buffer_load_ushort v0, off, s[16:19], 0
28 ; GFX9-NEXT: s_waitcnt vmcnt(0)
29 ; GFX9-NEXT: s_setpc_b64 s[30:31]
31 ; GFX10-LABEL: raw_ptr_buffer_load_bf16:
33 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34 ; GFX10-NEXT: buffer_load_ushort v0, off, s[16:19], 0
35 ; GFX10-NEXT: s_waitcnt vmcnt(0)
36 ; GFX10-NEXT: s_setpc_b64 s[30:31]
38 ; GFX11-LABEL: raw_ptr_buffer_load_bf16:
40 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41 ; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0
42 ; GFX11-NEXT: s_waitcnt vmcnt(0)
43 ; GFX11-NEXT: s_setpc_b64 s[30:31]
44 %val = call bfloat @llvm.amdgcn.raw.ptr.buffer.load.v2bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
48 define <2 x bfloat> @raw_ptr_buffer_load_v2bf16(ptr addrspace(8) inreg %rsrc) {
49 ; GFX7-LABEL: raw_ptr_buffer_load_v2bf16:
51 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52 ; GFX7-NEXT: buffer_load_dword v1, off, s[16:19], 0
53 ; GFX7-NEXT: s_waitcnt vmcnt(0)
54 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v1
55 ; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
56 ; GFX7-NEXT: s_setpc_b64 s[30:31]
58 ; GFX8-LABEL: raw_ptr_buffer_load_v2bf16:
60 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61 ; GFX8-NEXT: buffer_load_dword v0, off, s[16:19], 0
62 ; GFX8-NEXT: s_waitcnt vmcnt(0)
63 ; GFX8-NEXT: s_setpc_b64 s[30:31]
65 ; GFX9-LABEL: raw_ptr_buffer_load_v2bf16:
67 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68 ; GFX9-NEXT: buffer_load_dword v0, off, s[16:19], 0
69 ; GFX9-NEXT: s_waitcnt vmcnt(0)
70 ; GFX9-NEXT: s_setpc_b64 s[30:31]
72 ; GFX10-LABEL: raw_ptr_buffer_load_v2bf16:
74 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75 ; GFX10-NEXT: buffer_load_dword v0, off, s[16:19], 0
76 ; GFX10-NEXT: s_waitcnt vmcnt(0)
77 ; GFX10-NEXT: s_setpc_b64 s[30:31]
79 ; GFX11-LABEL: raw_ptr_buffer_load_v2bf16:
81 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82 ; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0
83 ; GFX11-NEXT: s_waitcnt vmcnt(0)
84 ; GFX11-NEXT: s_setpc_b64 s[30:31]
85 %val = call <2 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v2bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
89 define <4 x bfloat> @raw_ptr_buffer_load_v4bf16(ptr addrspace(8) inreg %rsrc) {
90 ; GFX7-LABEL: raw_ptr_buffer_load_v4bf16:
92 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
93 ; GFX7-NEXT: buffer_load_dwordx2 v[2:3], off, s[16:19], 0
94 ; GFX7-NEXT: s_waitcnt vmcnt(0)
95 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v2
96 ; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
97 ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v3
98 ; GFX7-NEXT: v_and_b32_e32 v3, 0xffff0000, v3
99 ; GFX7-NEXT: s_setpc_b64 s[30:31]
101 ; GFX8-LABEL: raw_ptr_buffer_load_v4bf16:
103 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104 ; GFX8-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0
105 ; GFX8-NEXT: s_waitcnt vmcnt(0)
106 ; GFX8-NEXT: s_setpc_b64 s[30:31]
108 ; GFX9-LABEL: raw_ptr_buffer_load_v4bf16:
110 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
111 ; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0
112 ; GFX9-NEXT: s_waitcnt vmcnt(0)
113 ; GFX9-NEXT: s_setpc_b64 s[30:31]
115 ; GFX10-LABEL: raw_ptr_buffer_load_v4bf16:
117 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0
119 ; GFX10-NEXT: s_waitcnt vmcnt(0)
120 ; GFX10-NEXT: s_setpc_b64 s[30:31]
122 ; GFX11-LABEL: raw_ptr_buffer_load_v4bf16:
124 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125 ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0
126 ; GFX11-NEXT: s_waitcnt vmcnt(0)
127 ; GFX11-NEXT: s_setpc_b64 s[30:31]
128 %val = call <4 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v4bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
129 ret <4 x bfloat> %val
133 ; define <6 x bfloat> @raw_ptr_buffer_load_v6bf16(ptr addrspace(8) inreg %rsrc) {
134 ; %val = call <6 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v6bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
135 ; ret <6 x bfloat> %val
138 define <8 x bfloat> @raw_ptr_buffer_load_v8bf16(ptr addrspace(8) inreg %rsrc) {
139 ; GFX7-LABEL: raw_ptr_buffer_load_v8bf16:
141 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142 ; GFX7-NEXT: buffer_load_dwordx4 v[4:7], off, s[16:19], 0
143 ; GFX7-NEXT: s_waitcnt vmcnt(0)
144 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v4
145 ; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v4
146 ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v5
147 ; GFX7-NEXT: v_and_b32_e32 v3, 0xffff0000, v5
148 ; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v6
149 ; GFX7-NEXT: v_and_b32_e32 v5, 0xffff0000, v6
150 ; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v7
151 ; GFX7-NEXT: v_and_b32_e32 v7, 0xffff0000, v7
152 ; GFX7-NEXT: s_setpc_b64 s[30:31]
154 ; GFX8-LABEL: raw_ptr_buffer_load_v8bf16:
156 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157 ; GFX8-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0
158 ; GFX8-NEXT: s_waitcnt vmcnt(0)
159 ; GFX8-NEXT: s_setpc_b64 s[30:31]
161 ; GFX9-LABEL: raw_ptr_buffer_load_v8bf16:
163 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0
165 ; GFX9-NEXT: s_waitcnt vmcnt(0)
166 ; GFX9-NEXT: s_setpc_b64 s[30:31]
168 ; GFX10-LABEL: raw_ptr_buffer_load_v8bf16:
170 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
171 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0
172 ; GFX10-NEXT: s_waitcnt vmcnt(0)
173 ; GFX10-NEXT: s_setpc_b64 s[30:31]
175 ; GFX11-LABEL: raw_ptr_buffer_load_v8bf16:
177 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
179 ; GFX11-NEXT: s_waitcnt vmcnt(0)
180 ; GFX11-NEXT: s_setpc_b64 s[30:31]
181 %val = call <8 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v8bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
182 ret <8 x bfloat> %val