[MachineScheduler] Fix physreg dependencies of ExitSU (#123541)
[llvm-project.git] / llvm / test / CodeGen / AMDGPU / llvm.amdgcn.raw.ptr.buffer.load.bf16.ll
blob3540468566147a8a35076423fe84bd27dbb59a3d
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck --check-prefix=GFX7 %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck --check-prefix=GFX8 %s
4 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s
5 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s
6 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11 %s
8 define bfloat @raw_ptr_buffer_load_bf16(ptr addrspace(8) inreg %rsrc) {
9 ; GFX7-LABEL: raw_ptr_buffer_load_bf16:
10 ; GFX7:       ; %bb.0:
11 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12 ; GFX7-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
13 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
14 ; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
15 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
17 ; GFX8-LABEL: raw_ptr_buffer_load_bf16:
18 ; GFX8:       ; %bb.0:
19 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20 ; GFX8-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
21 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
22 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
24 ; GFX9-LABEL: raw_ptr_buffer_load_bf16:
25 ; GFX9:       ; %bb.0:
26 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27 ; GFX9-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
28 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
29 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
31 ; GFX10-LABEL: raw_ptr_buffer_load_bf16:
32 ; GFX10:       ; %bb.0:
33 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34 ; GFX10-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
35 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
36 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
38 ; GFX11-LABEL: raw_ptr_buffer_load_bf16:
39 ; GFX11:       ; %bb.0:
40 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41 ; GFX11-NEXT:    buffer_load_u16 v0, off, s[0:3], 0
42 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
43 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
44   %val = call bfloat @llvm.amdgcn.raw.ptr.buffer.load.v2bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
45   ret bfloat %val
48 define <2 x bfloat> @raw_ptr_buffer_load_v2bf16(ptr addrspace(8) inreg %rsrc) {
49 ; GFX7-LABEL: raw_ptr_buffer_load_v2bf16:
50 ; GFX7:       ; %bb.0:
51 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52 ; GFX7-NEXT:    buffer_load_dword v1, off, s[16:19], 0
53 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
54 ; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
55 ; GFX7-NEXT:    v_and_b32_e32 v1, 0xffff0000, v1
56 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
58 ; GFX8-LABEL: raw_ptr_buffer_load_v2bf16:
59 ; GFX8:       ; %bb.0:
60 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61 ; GFX8-NEXT:    buffer_load_dword v0, off, s[16:19], 0
62 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
63 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
65 ; GFX9-LABEL: raw_ptr_buffer_load_v2bf16:
66 ; GFX9:       ; %bb.0:
67 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68 ; GFX9-NEXT:    buffer_load_dword v0, off, s[16:19], 0
69 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
70 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
72 ; GFX10-LABEL: raw_ptr_buffer_load_v2bf16:
73 ; GFX10:       ; %bb.0:
74 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75 ; GFX10-NEXT:    buffer_load_dword v0, off, s[16:19], 0
76 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
77 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
79 ; GFX11-LABEL: raw_ptr_buffer_load_v2bf16:
80 ; GFX11:       ; %bb.0:
81 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82 ; GFX11-NEXT:    buffer_load_b32 v0, off, s[0:3], 0
83 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
84 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
85   %val = call <2 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v2bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
86   ret <2 x bfloat> %val
89 define <4 x bfloat> @raw_ptr_buffer_load_v4bf16(ptr addrspace(8) inreg %rsrc) {
90 ; GFX7-LABEL: raw_ptr_buffer_load_v4bf16:
91 ; GFX7:       ; %bb.0:
92 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
93 ; GFX7-NEXT:    buffer_load_dwordx2 v[2:3], off, s[16:19], 0
94 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
95 ; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v2
96 ; GFX7-NEXT:    v_and_b32_e32 v1, 0xffff0000, v2
97 ; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
98 ; GFX7-NEXT:    v_and_b32_e32 v3, 0xffff0000, v3
99 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
101 ; GFX8-LABEL: raw_ptr_buffer_load_v4bf16:
102 ; GFX8:       ; %bb.0:
103 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104 ; GFX8-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
105 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
106 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
108 ; GFX9-LABEL: raw_ptr_buffer_load_v4bf16:
109 ; GFX9:       ; %bb.0:
110 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
111 ; GFX9-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
112 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
113 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
115 ; GFX10-LABEL: raw_ptr_buffer_load_v4bf16:
116 ; GFX10:       ; %bb.0:
117 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118 ; GFX10-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
119 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
120 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
122 ; GFX11-LABEL: raw_ptr_buffer_load_v4bf16:
123 ; GFX11:       ; %bb.0:
124 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125 ; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
126 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
127 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
128   %val = call <4 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v4bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
129   ret <4 x bfloat> %val
132 ; FIXME
133 ; define <6 x bfloat> @raw_ptr_buffer_load_v6bf16(ptr addrspace(8) inreg %rsrc) {
134 ;   %val = call <6 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v6bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
135 ;   ret <6 x bfloat> %val
136 ; }
138 define <8 x bfloat> @raw_ptr_buffer_load_v8bf16(ptr addrspace(8) inreg %rsrc) {
139 ; GFX7-LABEL: raw_ptr_buffer_load_v8bf16:
140 ; GFX7:       ; %bb.0:
141 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142 ; GFX7-NEXT:    buffer_load_dwordx4 v[4:7], off, s[16:19], 0
143 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
144 ; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v4
145 ; GFX7-NEXT:    v_and_b32_e32 v1, 0xffff0000, v4
146 ; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v5
147 ; GFX7-NEXT:    v_and_b32_e32 v3, 0xffff0000, v5
148 ; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 16, v6
149 ; GFX7-NEXT:    v_and_b32_e32 v5, 0xffff0000, v6
150 ; GFX7-NEXT:    v_lshlrev_b32_e32 v6, 16, v7
151 ; GFX7-NEXT:    v_and_b32_e32 v7, 0xffff0000, v7
152 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
154 ; GFX8-LABEL: raw_ptr_buffer_load_v8bf16:
155 ; GFX8:       ; %bb.0:
156 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157 ; GFX8-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
158 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
159 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
161 ; GFX9-LABEL: raw_ptr_buffer_load_v8bf16:
162 ; GFX9:       ; %bb.0:
163 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164 ; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
165 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
166 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
168 ; GFX10-LABEL: raw_ptr_buffer_load_v8bf16:
169 ; GFX10:       ; %bb.0:
170 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
171 ; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
172 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
173 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
175 ; GFX11-LABEL: raw_ptr_buffer_load_v8bf16:
176 ; GFX11:       ; %bb.0:
177 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178 ; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
179 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
180 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
181   %val = call <8 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v8bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
182   ret <8 x bfloat> %val