1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
4 ; Test the localizer did something and we don't materialize all
5 ; constants in SGPRs in the entry block.
7 define amdgpu_kernel void @localize_constants(i1 %cond) {
8 ; GFX9-LABEL: localize_constants:
9 ; GFX9: ; %bb.0: ; %entry
10 ; GFX9-NEXT: s_load_dword s1, s[4:5], 0x0
11 ; GFX9-NEXT: s_mov_b32 s0, 1
12 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
13 ; GFX9-NEXT: s_xor_b32 s1, s1, 1
14 ; GFX9-NEXT: s_and_b32 s1, s1, 1
15 ; GFX9-NEXT: s_cmp_lg_u32 s1, 0
16 ; GFX9-NEXT: s_cbranch_scc0 .LBB0_2
17 ; GFX9-NEXT: ; %bb.1: ; %bb1
18 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6
19 ; GFX9-NEXT: global_store_dword v[0:1], v0, off
20 ; GFX9-NEXT: s_waitcnt vmcnt(0)
21 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c7
22 ; GFX9-NEXT: global_store_dword v[0:1], v0, off
23 ; GFX9-NEXT: s_waitcnt vmcnt(0)
24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e8
25 ; GFX9-NEXT: global_store_dword v[0:1], v0, off
26 ; GFX9-NEXT: s_waitcnt vmcnt(0)
27 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c8
28 ; GFX9-NEXT: global_store_dword v[0:1], v0, off
29 ; GFX9-NEXT: s_waitcnt vmcnt(0)
30 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e7
31 ; GFX9-NEXT: global_store_dword v[0:1], v0, off
32 ; GFX9-NEXT: s_waitcnt vmcnt(0)
33 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
34 ; GFX9-NEXT: s_mov_b32 s0, 0
35 ; GFX9-NEXT: global_store_dword v[0:1], v0, off
36 ; GFX9-NEXT: s_waitcnt vmcnt(0)
37 ; GFX9-NEXT: .LBB0_2: ; %Flow
38 ; GFX9-NEXT: s_xor_b32 s0, s0, 1
39 ; GFX9-NEXT: s_and_b32 s0, s0, 1
40 ; GFX9-NEXT: s_cmp_lg_u32 s0, 0
41 ; GFX9-NEXT: s_cbranch_scc1 .LBB0_4
42 ; GFX9-NEXT: ; %bb.3: ; %bb0
43 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
44 ; GFX9-NEXT: global_store_dword v[0:1], v0, off
45 ; GFX9-NEXT: s_waitcnt vmcnt(0)
46 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c8
47 ; GFX9-NEXT: global_store_dword v[0:1], v0, off
48 ; GFX9-NEXT: s_waitcnt vmcnt(0)
49 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e7
50 ; GFX9-NEXT: global_store_dword v[0:1], v0, off
51 ; GFX9-NEXT: s_waitcnt vmcnt(0)
52 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e8
53 ; GFX9-NEXT: global_store_dword v[0:1], v0, off
54 ; GFX9-NEXT: s_waitcnt vmcnt(0)
55 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c7
56 ; GFX9-NEXT: global_store_dword v[0:1], v0, off
57 ; GFX9-NEXT: s_waitcnt vmcnt(0)
58 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6
59 ; GFX9-NEXT: global_store_dword v[0:1], v0, off
60 ; GFX9-NEXT: s_waitcnt vmcnt(0)
61 ; GFX9-NEXT: .LBB0_4: ; %bb2
64 br i1 %cond, label %bb0, label %bb1
67 store volatile i32 123, ptr addrspace(1) undef
68 store volatile i32 456, ptr addrspace(1) undef
69 store volatile i32 999, ptr addrspace(1) undef
70 store volatile i32 1000, ptr addrspace(1) undef
71 store volatile i32 455, ptr addrspace(1) undef
72 store volatile i32 23526, ptr addrspace(1) undef
76 store volatile i32 23526, ptr addrspace(1) undef
77 store volatile i32 455, ptr addrspace(1) undef
78 store volatile i32 1000, ptr addrspace(1) undef
79 store volatile i32 456, ptr addrspace(1) undef
80 store volatile i32 999, ptr addrspace(1) undef
81 store volatile i32 123, ptr addrspace(1) undef
88 ; FIXME: These aren't localized because thesee were legalized before
89 ; the localizer, and are no longer G_GLOBAL_VALUE.
90 @gv0 = addrspace(1) global i32 undef, align 4
91 @gv1 = addrspace(1) global i32 undef, align 4
92 @gv2 = addrspace(1) global i32 undef, align 4
93 @gv3 = addrspace(1) global i32 undef, align 4
95 define amdgpu_kernel void @localize_globals(i1 %cond) {
96 ; GFX9-LABEL: localize_globals:
97 ; GFX9: ; %bb.0: ; %entry
98 ; GFX9-NEXT: s_load_dword s1, s[4:5], 0x0
99 ; GFX9-NEXT: s_mov_b32 s0, 1
100 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
101 ; GFX9-NEXT: s_xor_b32 s1, s1, 1
102 ; GFX9-NEXT: s_and_b32 s1, s1, 1
103 ; GFX9-NEXT: s_cmp_lg_u32 s1, 0
104 ; GFX9-NEXT: s_cbranch_scc0 .LBB1_2
105 ; GFX9-NEXT: ; %bb.1: ; %bb1
106 ; GFX9-NEXT: s_getpc_b64 s[0:1]
107 ; GFX9-NEXT: s_add_u32 s0, s0, gv2@gotpcrel32@lo+4
108 ; GFX9-NEXT: s_addc_u32 s1, s1, gv2@gotpcrel32@hi+12
109 ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
110 ; GFX9-NEXT: s_getpc_b64 s[0:1]
111 ; GFX9-NEXT: s_add_u32 s0, s0, gv3@gotpcrel32@lo+4
112 ; GFX9-NEXT: s_addc_u32 s1, s1, gv3@gotpcrel32@hi+12
113 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
114 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
115 ; GFX9-NEXT: v_mov_b32_e32 v1, 1
116 ; GFX9-NEXT: s_mov_b32 s0, 0
117 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
118 ; GFX9-NEXT: global_store_dword v0, v0, s[2:3]
119 ; GFX9-NEXT: s_waitcnt vmcnt(0)
120 ; GFX9-NEXT: global_store_dword v0, v1, s[4:5]
121 ; GFX9-NEXT: s_waitcnt vmcnt(0)
122 ; GFX9-NEXT: .LBB1_2: ; %Flow
123 ; GFX9-NEXT: s_xor_b32 s0, s0, 1
124 ; GFX9-NEXT: s_and_b32 s0, s0, 1
125 ; GFX9-NEXT: s_cmp_lg_u32 s0, 0
126 ; GFX9-NEXT: s_cbranch_scc1 .LBB1_4
127 ; GFX9-NEXT: ; %bb.3: ; %bb0
128 ; GFX9-NEXT: s_getpc_b64 s[0:1]
129 ; GFX9-NEXT: s_add_u32 s0, s0, gv0@gotpcrel32@lo+4
130 ; GFX9-NEXT: s_addc_u32 s1, s1, gv0@gotpcrel32@hi+12
131 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
132 ; GFX9-NEXT: s_getpc_b64 s[2:3]
133 ; GFX9-NEXT: s_add_u32 s2, s2, gv1@gotpcrel32@lo+4
134 ; GFX9-NEXT: s_addc_u32 s3, s3, gv1@gotpcrel32@hi+12
135 ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
136 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
137 ; GFX9-NEXT: v_mov_b32_e32 v1, 1
138 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
139 ; GFX9-NEXT: global_store_dword v0, v0, s[0:1]
140 ; GFX9-NEXT: s_waitcnt vmcnt(0)
141 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3]
142 ; GFX9-NEXT: s_waitcnt vmcnt(0)
143 ; GFX9-NEXT: .LBB1_4: ; %bb2
144 ; GFX9-NEXT: s_endpgm
146 br i1 %cond, label %bb0, label %bb1
149 store volatile i32 0, ptr addrspace(1) @gv0
150 store volatile i32 1, ptr addrspace(1) @gv1
154 store volatile i32 0, ptr addrspace(1) @gv2
155 store volatile i32 1, ptr addrspace(1) @gv3
162 @static.gv0 = internal addrspace(1) global i32 undef, align 4
163 @static.gv1 = internal addrspace(1) global i32 undef, align 4
164 @static.gv2 = internal addrspace(1) global i32 undef, align 4
165 @static.gv3 = internal addrspace(1) global i32 undef, align 4
167 define void @localize_internal_globals(i1 %cond) {
168 ; GFX9-LABEL: localize_internal_globals:
169 ; GFX9: ; %bb.0: ; %entry
170 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
171 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
172 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
173 ; GFX9-NEXT: s_xor_b64 s[4:5], vcc, -1
174 ; GFX9-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
175 ; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
176 ; GFX9-NEXT: s_cbranch_execnz .LBB2_3
177 ; GFX9-NEXT: ; %bb.1: ; %Flow
178 ; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
179 ; GFX9-NEXT: s_cbranch_execnz .LBB2_4
180 ; GFX9-NEXT: .LBB2_2: ; %bb2
181 ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
182 ; GFX9-NEXT: s_setpc_b64 s[30:31]
183 ; GFX9-NEXT: .LBB2_3: ; %bb1
184 ; GFX9-NEXT: s_getpc_b64 s[6:7]
185 ; GFX9-NEXT: s_add_u32 s6, s6, static.gv2@rel32@lo+4
186 ; GFX9-NEXT: s_addc_u32 s7, s7, static.gv2@rel32@hi+12
187 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
188 ; GFX9-NEXT: global_store_dword v0, v0, s[6:7]
189 ; GFX9-NEXT: s_waitcnt vmcnt(0)
190 ; GFX9-NEXT: s_getpc_b64 s[6:7]
191 ; GFX9-NEXT: s_add_u32 s6, s6, static.gv3@rel32@lo+4
192 ; GFX9-NEXT: s_addc_u32 s7, s7, static.gv3@rel32@hi+12
193 ; GFX9-NEXT: v_mov_b32_e32 v1, 1
194 ; GFX9-NEXT: global_store_dword v0, v1, s[6:7]
195 ; GFX9-NEXT: s_waitcnt vmcnt(0)
196 ; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
197 ; GFX9-NEXT: s_cbranch_execz .LBB2_2
198 ; GFX9-NEXT: .LBB2_4: ; %bb0
199 ; GFX9-NEXT: s_getpc_b64 s[6:7]
200 ; GFX9-NEXT: s_add_u32 s6, s6, static.gv0@rel32@lo+4
201 ; GFX9-NEXT: s_addc_u32 s7, s7, static.gv0@rel32@hi+12
202 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
203 ; GFX9-NEXT: global_store_dword v0, v0, s[6:7]
204 ; GFX9-NEXT: s_waitcnt vmcnt(0)
205 ; GFX9-NEXT: s_getpc_b64 s[6:7]
206 ; GFX9-NEXT: s_add_u32 s6, s6, static.gv1@rel32@lo+4
207 ; GFX9-NEXT: s_addc_u32 s7, s7, static.gv1@rel32@hi+12
208 ; GFX9-NEXT: v_mov_b32_e32 v1, 1
209 ; GFX9-NEXT: global_store_dword v0, v1, s[6:7]
210 ; GFX9-NEXT: s_waitcnt vmcnt(0)
211 ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
212 ; GFX9-NEXT: s_setpc_b64 s[30:31]
214 br i1 %cond, label %bb0, label %bb1
217 store volatile i32 0, ptr addrspace(1) @static.gv0
218 store volatile i32 1, ptr addrspace(1) @static.gv1
222 store volatile i32 0, ptr addrspace(1) @static.gv2
223 store volatile i32 1, ptr addrspace(1) @static.gv3
230 ; This would crash from using the wrong insert point
231 define void @sink_null_insert_pt(ptr addrspace(4) %arg0) {
232 ; GFX9-LABEL: sink_null_insert_pt:
233 ; GFX9: ; %bb.0: ; %entry
234 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
235 ; GFX9-NEXT: s_mov_b32 s16, s33
236 ; GFX9-NEXT: s_mov_b32 s33, s32
237 ; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
238 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
239 ; GFX9-NEXT: s_mov_b64 exec, s[18:19]
240 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
241 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
242 ; GFX9-NEXT: global_load_dword v0, v[0:1], off glc
243 ; GFX9-NEXT: s_waitcnt vmcnt(0)
244 ; GFX9-NEXT: v_writelane_b32 v40, s16, 2
245 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
246 ; GFX9-NEXT: s_addk_i32 s32, 0x400
247 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
248 ; GFX9-NEXT: s_swappc_b64 s[30:31], 0
249 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
250 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
251 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2
252 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
253 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
254 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
255 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
256 ; GFX9-NEXT: s_mov_b32 s33, s4
257 ; GFX9-NEXT: s_waitcnt vmcnt(0)
258 ; GFX9-NEXT: s_setpc_b64 s[30:31]
260 %load0 = load volatile i32, ptr addrspace(1) null, align 4