1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX900
3 ; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX90A
4 ; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX940
5 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX10
6 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX900-GISEL
8 declare void @llvm.amdgcn.global.load.lds(i8 addrspace(1)* nocapture %gptr, i8 addrspace(3)* nocapture %lptr, i32 %size, i32 %offset, i32 %aux)
10 define amdgpu_ps void @global_load_lds_dword_vaddr(i8 addrspace(1)* nocapture %gptr, i8 addrspace(3)* nocapture %lptr) {
11 ; GFX900-LABEL: global_load_lds_dword_vaddr:
12 ; GFX900: ; %bb.0: ; %main_body
13 ; GFX900-NEXT: v_readfirstlane_b32 s0, v2
14 ; GFX900-NEXT: s_mov_b32 m0, s0
15 ; GFX900-NEXT: s_nop 0
16 ; GFX900-NEXT: global_load_dword v[0:1], off offset:16 glc lds
17 ; GFX900-NEXT: s_endpgm
19 ; GFX90A-LABEL: global_load_lds_dword_vaddr:
20 ; GFX90A: ; %bb.0: ; %main_body
21 ; GFX90A-NEXT: v_readfirstlane_b32 s0, v2
22 ; GFX90A-NEXT: s_mov_b32 m0, s0
23 ; GFX90A-NEXT: s_nop 0
24 ; GFX90A-NEXT: global_load_dword v[0:1], off offset:16 glc lds
25 ; GFX90A-NEXT: s_endpgm
27 ; GFX940-LABEL: global_load_lds_dword_vaddr:
28 ; GFX940: ; %bb.0: ; %main_body
29 ; GFX940-NEXT: v_readfirstlane_b32 s0, v2
30 ; GFX940-NEXT: s_mov_b32 m0, s0
31 ; GFX940-NEXT: s_nop 0
32 ; GFX940-NEXT: global_load_lds_dword v[0:1], off offset:16 sc0
33 ; GFX940-NEXT: s_endpgm
35 ; GFX10-LABEL: global_load_lds_dword_vaddr:
36 ; GFX10: ; %bb.0: ; %main_body
37 ; GFX10-NEXT: v_readfirstlane_b32 s0, v2
38 ; GFX10-NEXT: s_mov_b32 m0, s0
39 ; GFX10-NEXT: global_load_dword v[0:1], off offset:16 glc lds
40 ; GFX10-NEXT: s_endpgm
42 ; GFX900-GISEL-LABEL: global_load_lds_dword_vaddr:
43 ; GFX900-GISEL: ; %bb.0: ; %main_body
44 ; GFX900-GISEL-NEXT: v_readfirstlane_b32 m0, v2
45 ; GFX900-GISEL-NEXT: s_nop 4
46 ; GFX900-GISEL-NEXT: global_load_dword v[0:1], off offset:16 glc lds
47 ; GFX900-GISEL-NEXT: s_endpgm
49 call void @llvm.amdgcn.global.load.lds(i8 addrspace(1)* %gptr, i8 addrspace(3)* %lptr, i32 4, i32 16, i32 1)
53 define amdgpu_ps void @global_load_lds_dword_saddr(i8 addrspace(1)* nocapture inreg %gptr, i8 addrspace(3)* nocapture %lptr) {
54 ; GFX900-LABEL: global_load_lds_dword_saddr:
55 ; GFX900: ; %bb.0: ; %main_body
56 ; GFX900-NEXT: v_readfirstlane_b32 s2, v0
57 ; GFX900-NEXT: v_mov_b32_e32 v1, 0
58 ; GFX900-NEXT: s_mov_b32 m0, s2
59 ; GFX900-NEXT: s_nop 0
60 ; GFX900-NEXT: global_load_dword v1, s[0:1] offset:32 slc lds
61 ; GFX900-NEXT: s_endpgm
63 ; GFX90A-LABEL: global_load_lds_dword_saddr:
64 ; GFX90A: ; %bb.0: ; %main_body
65 ; GFX90A-NEXT: v_readfirstlane_b32 s2, v0
66 ; GFX90A-NEXT: v_mov_b32_e32 v1, 0
67 ; GFX90A-NEXT: s_mov_b32 m0, s2
68 ; GFX90A-NEXT: s_nop 0
69 ; GFX90A-NEXT: global_load_dword v1, s[0:1] offset:32 slc lds
70 ; GFX90A-NEXT: s_endpgm
72 ; GFX940-LABEL: global_load_lds_dword_saddr:
73 ; GFX940: ; %bb.0: ; %main_body
74 ; GFX940-NEXT: v_readfirstlane_b32 s2, v0
75 ; GFX940-NEXT: v_mov_b32_e32 v1, 0
76 ; GFX940-NEXT: s_mov_b32 m0, s2
77 ; GFX940-NEXT: s_nop 0
78 ; GFX940-NEXT: global_load_lds_dword v1, s[0:1] offset:32 nt
79 ; GFX940-NEXT: s_endpgm
81 ; GFX10-LABEL: global_load_lds_dword_saddr:
82 ; GFX10: ; %bb.0: ; %main_body
83 ; GFX10-NEXT: v_readfirstlane_b32 s2, v0
84 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
85 ; GFX10-NEXT: s_mov_b32 m0, s2
86 ; GFX10-NEXT: global_load_dword v0, s[0:1] offset:32 slc lds
87 ; GFX10-NEXT: s_endpgm
89 ; GFX900-GISEL-LABEL: global_load_lds_dword_saddr:
90 ; GFX900-GISEL: ; %bb.0: ; %main_body
91 ; GFX900-GISEL-NEXT: v_readfirstlane_b32 m0, v0
92 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0
93 ; GFX900-GISEL-NEXT: s_nop 3
94 ; GFX900-GISEL-NEXT: global_load_dword v0, s[0:1] offset:32 slc lds
95 ; GFX900-GISEL-NEXT: s_endpgm
97 call void @llvm.amdgcn.global.load.lds(i8 addrspace(1)* %gptr, i8 addrspace(3)* %lptr, i32 4, i32 32, i32 2)
101 define amdgpu_ps void @global_load_lds_dword_saddr_and_vaddr(i8 addrspace(1)* nocapture inreg %gptr, i8 addrspace(3)* nocapture %lptr, i32 %voffset) {
102 ; GFX900-LABEL: global_load_lds_dword_saddr_and_vaddr:
103 ; GFX900: ; %bb.0: ; %main_body
104 ; GFX900-NEXT: v_readfirstlane_b32 s2, v0
105 ; GFX900-NEXT: s_mov_b32 m0, s2
106 ; GFX900-NEXT: s_nop 0
107 ; GFX900-NEXT: global_load_dword v1, s[0:1] offset:48 lds
108 ; GFX900-NEXT: s_endpgm
110 ; GFX90A-LABEL: global_load_lds_dword_saddr_and_vaddr:
111 ; GFX90A: ; %bb.0: ; %main_body
112 ; GFX90A-NEXT: v_readfirstlane_b32 s2, v0
113 ; GFX90A-NEXT: s_mov_b32 m0, s2
114 ; GFX90A-NEXT: s_nop 0
115 ; GFX90A-NEXT: global_load_dword v1, s[0:1] offset:48 scc lds
116 ; GFX90A-NEXT: s_endpgm
118 ; GFX940-LABEL: global_load_lds_dword_saddr_and_vaddr:
119 ; GFX940: ; %bb.0: ; %main_body
120 ; GFX940-NEXT: v_readfirstlane_b32 s2, v0
121 ; GFX940-NEXT: s_mov_b32 m0, s2
122 ; GFX940-NEXT: s_nop 0
123 ; GFX940-NEXT: global_load_lds_dword v1, s[0:1] offset:48 sc1
124 ; GFX940-NEXT: s_endpgm
126 ; GFX10-LABEL: global_load_lds_dword_saddr_and_vaddr:
127 ; GFX10: ; %bb.0: ; %main_body
128 ; GFX10-NEXT: v_readfirstlane_b32 s2, v0
129 ; GFX10-NEXT: s_mov_b32 m0, s2
130 ; GFX10-NEXT: global_load_dword v1, s[0:1] offset:48 lds
131 ; GFX10-NEXT: s_endpgm
133 ; GFX900-GISEL-LABEL: global_load_lds_dword_saddr_and_vaddr:
134 ; GFX900-GISEL: ; %bb.0: ; %main_body
135 ; GFX900-GISEL-NEXT: v_readfirstlane_b32 m0, v0
136 ; GFX900-GISEL-NEXT: s_nop 4
137 ; GFX900-GISEL-NEXT: global_load_dword v1, s[0:1] offset:48 lds
138 ; GFX900-GISEL-NEXT: s_endpgm
140 %voffset.64 = zext i32 %voffset to i64
141 %gep = getelementptr i8, i8 addrspace(1)* %gptr, i64 %voffset.64
142 call void @llvm.amdgcn.global.load.lds(i8 addrspace(1)* %gep, i8 addrspace(3)* %lptr, i32 4, i32 48, i32 16)
146 define amdgpu_ps void @global_load_lds_ushort_vaddr(i8 addrspace(1)* nocapture %gptr, i8 addrspace(3)* nocapture %lptr) {
147 ; GFX900-LABEL: global_load_lds_ushort_vaddr:
148 ; GFX900: ; %bb.0: ; %main_body
149 ; GFX900-NEXT: v_readfirstlane_b32 s0, v2
150 ; GFX900-NEXT: s_mov_b32 m0, s0
151 ; GFX900-NEXT: s_nop 0
152 ; GFX900-NEXT: global_load_ushort v[0:1], off lds
153 ; GFX900-NEXT: s_endpgm
155 ; GFX90A-LABEL: global_load_lds_ushort_vaddr:
156 ; GFX90A: ; %bb.0: ; %main_body
157 ; GFX90A-NEXT: v_readfirstlane_b32 s0, v2
158 ; GFX90A-NEXT: s_mov_b32 m0, s0
159 ; GFX90A-NEXT: s_nop 0
160 ; GFX90A-NEXT: global_load_ushort v[0:1], off lds
161 ; GFX90A-NEXT: s_endpgm
163 ; GFX940-LABEL: global_load_lds_ushort_vaddr:
164 ; GFX940: ; %bb.0: ; %main_body
165 ; GFX940-NEXT: v_readfirstlane_b32 s0, v2
166 ; GFX940-NEXT: s_mov_b32 m0, s0
167 ; GFX940-NEXT: s_nop 0
168 ; GFX940-NEXT: global_load_lds_ushort v[0:1], off
169 ; GFX940-NEXT: s_endpgm
171 ; GFX10-LABEL: global_load_lds_ushort_vaddr:
172 ; GFX10: ; %bb.0: ; %main_body
173 ; GFX10-NEXT: v_readfirstlane_b32 s0, v2
174 ; GFX10-NEXT: s_mov_b32 m0, s0
175 ; GFX10-NEXT: global_load_ushort v[0:1], off dlc lds
176 ; GFX10-NEXT: s_endpgm
178 ; GFX900-GISEL-LABEL: global_load_lds_ushort_vaddr:
179 ; GFX900-GISEL: ; %bb.0: ; %main_body
180 ; GFX900-GISEL-NEXT: v_readfirstlane_b32 m0, v2
181 ; GFX900-GISEL-NEXT: s_nop 4
182 ; GFX900-GISEL-NEXT: global_load_ushort v[0:1], off lds
183 ; GFX900-GISEL-NEXT: s_endpgm
185 call void @llvm.amdgcn.global.load.lds(i8 addrspace(1)* %gptr, i8 addrspace(3)* %lptr, i32 2, i32 0, i32 4)
189 define amdgpu_ps void @global_load_lds_ubyte_vaddr(i8 addrspace(1)* nocapture %gptr, i8 addrspace(3)* nocapture %lptr) {
190 ; GFX900-LABEL: global_load_lds_ubyte_vaddr:
191 ; GFX900: ; %bb.0: ; %main_body
192 ; GFX900-NEXT: v_readfirstlane_b32 s0, v2
193 ; GFX900-NEXT: s_mov_b32 m0, s0
194 ; GFX900-NEXT: s_nop 0
195 ; GFX900-NEXT: global_load_ubyte v[0:1], off lds
196 ; GFX900-NEXT: s_endpgm
198 ; GFX90A-LABEL: global_load_lds_ubyte_vaddr:
199 ; GFX90A: ; %bb.0: ; %main_body
200 ; GFX90A-NEXT: v_readfirstlane_b32 s0, v2
201 ; GFX90A-NEXT: s_mov_b32 m0, s0
202 ; GFX90A-NEXT: s_nop 0
203 ; GFX90A-NEXT: global_load_ubyte v[0:1], off lds
204 ; GFX90A-NEXT: s_endpgm
206 ; GFX940-LABEL: global_load_lds_ubyte_vaddr:
207 ; GFX940: ; %bb.0: ; %main_body
208 ; GFX940-NEXT: v_readfirstlane_b32 s0, v2
209 ; GFX940-NEXT: s_mov_b32 m0, s0
210 ; GFX940-NEXT: s_nop 0
211 ; GFX940-NEXT: global_load_lds_ubyte v[0:1], off
212 ; GFX940-NEXT: s_endpgm
214 ; GFX10-LABEL: global_load_lds_ubyte_vaddr:
215 ; GFX10: ; %bb.0: ; %main_body
216 ; GFX10-NEXT: v_readfirstlane_b32 s0, v2
217 ; GFX10-NEXT: s_mov_b32 m0, s0
218 ; GFX10-NEXT: global_load_ubyte v[0:1], off lds
219 ; GFX10-NEXT: s_endpgm
221 ; GFX900-GISEL-LABEL: global_load_lds_ubyte_vaddr:
222 ; GFX900-GISEL: ; %bb.0: ; %main_body
223 ; GFX900-GISEL-NEXT: v_readfirstlane_b32 m0, v2
224 ; GFX900-GISEL-NEXT: s_nop 4
225 ; GFX900-GISEL-NEXT: global_load_ubyte v[0:1], off lds
226 ; GFX900-GISEL-NEXT: s_endpgm
228 call void @llvm.amdgcn.global.load.lds(i8 addrspace(1)* %gptr, i8 addrspace(3)* %lptr, i32 1, i32 0, i32 0)