1 # RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s
4 define amdgpu_ps void @test_strict_wwm_scc() {
7 define amdgpu_ps void @test_strict_wwm_scc2() {
10 define amdgpu_ps void @no_cfg() {
13 define amdgpu_ps void @copy_exec() {
16 define amdgpu_ps void @scc_always_live() {
19 define amdgpu_ps void @test_wwm_set_inactive_propagation() {
22 define amdgpu_ps void @test_wqm_lr_phi() {
25 define amdgpu_cs void @no_wqm_in_cs() {
28 define amdgpu_es void @no_wqm_in_es() {
31 define amdgpu_gs void @no_wqm_in_gs() {
34 define amdgpu_hs void @no_wqm_in_hs() {
37 define amdgpu_ls void @no_wqm_in_ls() {
40 define amdgpu_vs void @no_wqm_in_vs() {
47 # Check for awareness that s_or_saveexec_b64 clobbers SCC
49 #CHECK: ENTER_STRICT_WWM
52 name: test_strict_wwm_scc
54 exposesReturnsTwice: false
56 regBankSelected: false
58 tracksRegLiveness: true
60 - { id: 0, class: sgpr_32, preferred-register: '' }
61 - { id: 1, class: sgpr_32, preferred-register: '' }
62 - { id: 2, class: sgpr_32, preferred-register: '' }
63 - { id: 3, class: vgpr_32, preferred-register: '' }
64 - { id: 4, class: vgpr_32, preferred-register: '' }
65 - { id: 5, class: sgpr_32, preferred-register: '' }
66 - { id: 6, class: vgpr_32, preferred-register: '' }
67 - { id: 7, class: vgpr_32, preferred-register: '' }
68 - { id: 8, class: sreg_32_xm0, preferred-register: '' }
69 - { id: 9, class: sreg_32, preferred-register: '' }
70 - { id: 10, class: sreg_32, preferred-register: '' }
71 - { id: 11, class: vgpr_32, preferred-register: '' }
72 - { id: 12, class: vgpr_32, preferred-register: '' }
74 - { reg: '$sgpr0', virtual-reg: '%0' }
75 - { reg: '$sgpr1', virtual-reg: '%1' }
76 - { reg: '$sgpr2', virtual-reg: '%2' }
77 - { reg: '$vgpr0', virtual-reg: '%3' }
80 liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
86 S_CMP_LT_I32 0, %0, implicit-def $scc
87 %12 = V_ADD_CO_U32_e32 %3, %3, implicit-def $vcc, implicit $exec
88 %5 = S_CSELECT_B32 %2, %1, implicit $scc
89 %11 = V_ADD_CO_U32_e32 %5, %12, implicit-def $vcc, implicit $exec
90 $vgpr0 = STRICT_WWM %11, implicit $exec
91 SI_RETURN_TO_EPILOG $vgpr0
96 # Second test for awareness that s_or_saveexec_b64 clobbers SCC
97 # Because entry block is treated differently.
102 #CHECK: ENTER_STRICT_WWM
104 #CHECK: S_CSELECT_B32
105 name: test_strict_wwm_scc2
106 tracksRegLiveness: true
109 liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
111 %3:vgpr_32 = COPY $vgpr0
112 %2:sgpr_32 = COPY $sgpr2
113 %1:sgpr_32 = COPY $sgpr1
114 %0:sgpr_32 = COPY $sgpr0
115 %13:sgpr_128 = IMPLICIT_DEF
118 S_CMP_LT_I32 0, %0:sgpr_32, implicit-def $scc
119 %10:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %3:vgpr_32, %13:sgpr_128, 0, 0, 0, 0, implicit $exec
120 %12:vgpr_32 = V_ADD_CO_U32_e32 %3:vgpr_32, %3:vgpr_32, implicit-def $vcc, implicit $exec
121 %5:sgpr_32 = S_CSELECT_B32 %2:sgpr_32, %1:sgpr_32, implicit $scc
122 %11:vgpr_32 = V_ADD_CO_U32_e32 %5:sgpr_32, %12:vgpr_32, implicit-def $vcc, implicit $exec
123 $vgpr0 = STRICT_WWM %11:vgpr_32, implicit $exec
124 $vgpr1 = COPY %10:vgpr_32
125 SI_RETURN_TO_EPILOG $vgpr0, $vgpr1
130 # V_SET_INACTIVE, when its second operand is undef, is replaced by a
131 # COPY by si-wqm. Ensure the instruction is removed.
132 #CHECK-NOT: V_SET_INACTIVE
135 exposesReturnsTwice: false
137 regBankSelected: false
140 tracksRegLiveness: true
143 - { id: 0, class: sgpr_32, preferred-register: '' }
144 - { id: 1, class: sgpr_32, preferred-register: '' }
145 - { id: 2, class: sgpr_32, preferred-register: '' }
146 - { id: 3, class: sgpr_32, preferred-register: '' }
147 - { id: 4, class: sgpr_32, preferred-register: '' }
148 - { id: 5, class: sgpr_128, preferred-register: '' }
149 - { id: 6, class: sgpr_128, preferred-register: '' }
150 - { id: 7, class: sreg_32, preferred-register: '' }
151 - { id: 8, class: vreg_64, preferred-register: '' }
152 - { id: 9, class: sreg_32, preferred-register: '' }
153 - { id: 10, class: vgpr_32, preferred-register: '' }
154 - { id: 11, class: vgpr_32, preferred-register: '' }
155 - { id: 12, class: sreg_32, preferred-register: '' }
156 - { id: 13, class: vgpr_32, preferred-register: '' }
157 - { id: 14, class: vgpr_32, preferred-register: '' }
158 - { id: 15, class: vgpr_32, preferred-register: '' }
159 - { id: 16, class: vgpr_32, preferred-register: '' }
161 - { reg: '$sgpr0', virtual-reg: '%0' }
162 - { reg: '$sgpr1', virtual-reg: '%1' }
163 - { reg: '$sgpr2', virtual-reg: '%2' }
164 - { reg: '$sgpr3', virtual-reg: '%3' }
167 liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
169 %3:sgpr_32 = COPY $sgpr3
170 %2:sgpr_32 = COPY $sgpr2
171 %1:sgpr_32 = COPY $sgpr1
172 %0:sgpr_32 = COPY $sgpr0
173 %6:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
174 %5:sgpr_128 = COPY %6
175 %7:sreg_32 = S_MOV_B32 0
176 %8:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %6, %7, 0, 0, 0, implicit $exec
177 %16:vgpr_32 = COPY %8.sub1
178 %11:vgpr_32 = COPY %16
179 %10:vgpr_32 = V_SET_INACTIVE_B32 %11, undef %12:sreg_32, implicit $exec, implicit-def $scc
180 %14:vgpr_32 = COPY %7
181 %13:vgpr_32 = V_MOV_B32_dpp %14, killed %10, 323, 12, 15, 0, implicit $exec
182 early-clobber %15:vgpr_32 = STRICT_WWM killed %13, implicit $exec
183 BUFFER_STORE_DWORD_OFFSET_exact killed %15, %6, %7, 4, 0, 0, implicit $exec
189 # Ensure that strict_wwm is not put around an EXEC copy
190 #CHECK-LABEL: name: copy_exec
191 #CHECK: %7:sreg_64 = COPY $exec
192 #CHECK-NEXT: %14:sreg_64 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
193 #CHECK-NEXT: %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
194 #CHECK-NEXT: $exec = EXIT_STRICT_WWM %14
195 #CHECK-NEXT: %9:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %7.sub0, 0, implicit $exec
197 tracksRegLiveness: true
200 liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
202 %3:sgpr_32 = COPY $sgpr3
203 %2:sgpr_32 = COPY $sgpr2
204 %1:sgpr_32 = COPY $sgpr1
205 %0:sgpr_32 = COPY $sgpr0
206 %4:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
207 %5:sreg_32 = S_MOV_B32 0
208 %6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %4, %5, 0, 0, 0, implicit $exec
210 %8:sreg_64 = COPY $exec
211 %9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
212 %10:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %8.sub0:sreg_64, 0, implicit $exec
213 %11:vgpr_32 = V_MOV_B32_dpp %9:vgpr_32, %10:vgpr_32, 312, 15, 15, 0, implicit $exec
214 %12:sreg_32 = V_READLANE_B32 %11:vgpr_32, 63
215 early-clobber %13:sreg_32 = STRICT_WWM %9:vgpr_32, implicit $exec
217 %14:vgpr_32 = COPY %13
218 BUFFER_STORE_DWORD_OFFSET_exact killed %14, %4, %5, 4, 0, 0, implicit $exec
224 # Check exit of WQM is still inserted correctly when SCC is live until block end.
225 # Critially this tests that compilation does not fail.
226 #CHECK-LABEL: name: scc_always_live
227 #CHECK: %8:vreg_128 = IMAGE_SAMPLE_V4_V2 %7
228 #CHECK-NEXT: S_CMP_EQ_U32 %2, 0, implicit-def $scc
229 #CHECK-NEXT: undef %9.sub0:vreg_64 = nsz arcp nofpexcept V_ADD_F32_e64
230 #CHECK-NEXT: %9.sub1:vreg_64 = nsz arcp nofpexcept V_MUL_F32_e32
231 #CHECK-NEXT: %14:sreg_32_xm0 = COPY $scc
232 #CHECK-NEXT: $exec = S_AND_B64 $exec, %13, implicit-def $scc
233 #CHECK-NEXT: $scc = COPY %14
234 #CHECK-NEXT: %10:vgpr_32 = nsz arcp nofpexcept V_ADD_F32_e64
235 #CHECK-NEXT: %11:vreg_128 = IMAGE_SAMPLE_V4_V2
236 #CHECK-NEXT: S_CBRANCH_SCC0 %bb.2
237 name: scc_always_live
238 tracksRegLiveness: true
241 liveins: $sgpr1, $sgpr2, $vgpr1, $vgpr2
244 %0:vgpr_32 = COPY $vgpr1
245 %1:vgpr_32 = COPY $vgpr2
246 %8:sgpr_32 = COPY $sgpr2
247 %100:sgpr_256 = IMPLICIT_DEF
248 %101:sgpr_128 = IMPLICIT_DEF
250 %2:vgpr_32 = V_INTERP_P1_F32 %0:vgpr_32, 3, 2, implicit $mode, implicit $m0, implicit $exec
251 %3:vgpr_32 = V_INTERP_P1_F32 %1:vgpr_32, 3, 2, implicit $mode, implicit $m0, implicit $exec
253 undef %7.sub0:vreg_64 = COPY %2:vgpr_32
254 %7.sub1:vreg_64 = COPY %3:vgpr_32
256 %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %7:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
257 S_CMP_EQ_U32 %8:sgpr_32, 0, implicit-def $scc
259 undef %5.sub0:vreg_64 = nsz arcp nofpexcept V_ADD_F32_e64 0, %4.sub0:vreg_128, 0, %3:vgpr_32, 1, 0, implicit $mode, implicit $exec
260 %5.sub1:vreg_64 = nsz arcp nofpexcept V_MUL_F32_e32 %2, %3, implicit $mode, implicit $exec
261 %6:vgpr_32 = nsz arcp nofpexcept V_ADD_F32_e64 0, %2:vgpr_32, 0, %3:vgpr_32, 1, 0, implicit $mode, implicit $exec
263 %9:vreg_128 = IMAGE_SAMPLE_V4_V2 %5:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
265 S_CBRANCH_SCC0 %bb.2, implicit $scc
268 %10:sreg_32 = S_MOV_B32 0
269 BUFFER_STORE_DWORD_OFFSET_exact %6:vgpr_32, %101:sgpr_128, %10:sreg_32, 4, 0, 0, implicit $exec
273 $vgpr0 = COPY %4.sub0:vreg_128
274 $vgpr1 = COPY %4.sub1:vreg_128
275 $vgpr2 = COPY %9.sub0:vreg_128
276 $vgpr3 = COPY %9.sub1:vreg_128
277 SI_RETURN_TO_EPILOG $vgpr0, $vgpr1, $vgpr2, $vgpr3
281 # Check that unnecessary instruction do not get marked for WWM
283 #CHECK-NOT: ENTER_STRICT_WWM
284 #CHECK: BUFFER_LOAD_DWORDX2
285 #CHECK-NOT: ENTER_STRICT_WWM
286 #CHECK: V_SET_INACTIVE_B32
287 #CHECK: V_SET_INACTIVE_B32
288 #CHECK: ENTER_STRICT_WWM
290 name: test_wwm_set_inactive_propagation
291 tracksRegLiveness: true
294 liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
295 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
296 %1:vgpr_32 = COPY $vgpr0
297 %2:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN %1:vgpr_32, %0:sgpr_128, 0, 0, 0, 0, implicit $exec
298 %2.sub0:vreg_64 = V_SET_INACTIVE_B32 %2.sub0:vreg_64, 0, implicit $exec, implicit-def $scc
299 %2.sub1:vreg_64 = V_SET_INACTIVE_B32 %2.sub1:vreg_64, 0, implicit $exec, implicit-def $scc
300 %3:vreg_64 = nnan nsz arcp contract reassoc nofpexcept V_MAX_F64_e64 0, %2:vreg_64, 0, %2:vreg_64, 0, 0, implicit $mode, implicit $exec
301 $vgpr0 = STRICT_WWM %3.sub0:vreg_64, implicit $exec
302 $vgpr1 = STRICT_WWM %3.sub1:vreg_64, implicit $exec
303 SI_RETURN_TO_EPILOG $vgpr0, $vgpr1
307 # Check that WQM marking occurs correctly through phi nodes in live range graph.
308 # If not then initial V_MOV will not be in WQM.
310 #CHECK-LABEL: name: test_wqm_lr_phi
313 #CHECK-NEXT: V_MOV_B32_e32 -10
314 #CHECK-NEXT: V_MOV_B32_e32 0
315 name: test_wqm_lr_phi
316 tracksRegLiveness: true
319 undef %0.sub0:vreg_64 = V_MOV_B32_e32 -10, implicit $exec
320 %0.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
321 %1:sreg_64 = S_GETPC_B64
322 %2:sgpr_256 = S_LOAD_DWORDX8_IMM %1:sreg_64, 32, 0
325 $vcc = V_CMP_LT_U32_e64 4, 4, implicit $exec
326 S_CBRANCH_VCCNZ %bb.3, implicit $vcc
330 %0.sub0:vreg_64 = V_ADD_U32_e32 1, %0.sub1, implicit $exec
334 %0.sub1:vreg_64 = V_ADD_U32_e32 1, %0.sub1, implicit $exec
338 %3:sgpr_128 = IMPLICIT_DEF
339 %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %2:sgpr_256, %3:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
340 $vgpr0 = COPY %4.sub0:vreg_128
341 $vgpr1 = COPY %4.sub1:vreg_128
342 SI_RETURN_TO_EPILOG $vgpr0, $vgpr1
346 #CHECK-LABEL: name: no_wqm_in_cs
349 tracksRegLiveness: true
352 liveins: $vgpr1, $vgpr2
354 undef %0.sub0:vreg_64 = COPY $vgpr1
355 %0.sub1:vreg_64 = COPY $vgpr2
356 %100:sgpr_256 = IMPLICIT_DEF
357 %101:sgpr_128 = IMPLICIT_DEF
359 %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
363 #CHECK-LABEL: name: no_wqm_in_es
366 tracksRegLiveness: true
369 liveins: $vgpr1, $vgpr2
371 undef %0.sub0:vreg_64 = COPY $vgpr1
372 %0.sub1:vreg_64 = COPY $vgpr2
373 %100:sgpr_256 = IMPLICIT_DEF
374 %101:sgpr_128 = IMPLICIT_DEF
376 %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
380 #CHECK-LABEL: name: no_wqm_in_gs
383 tracksRegLiveness: true
386 liveins: $vgpr1, $vgpr2
388 undef %0.sub0:vreg_64 = COPY $vgpr1
389 %0.sub1:vreg_64 = COPY $vgpr2
390 %100:sgpr_256 = IMPLICIT_DEF
391 %101:sgpr_128 = IMPLICIT_DEF
393 %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
397 #CHECK-LABEL: name: no_wqm_in_hs
400 tracksRegLiveness: true
403 liveins: $vgpr1, $vgpr2
405 undef %0.sub0:vreg_64 = COPY $vgpr1
406 %0.sub1:vreg_64 = COPY $vgpr2
407 %100:sgpr_256 = IMPLICIT_DEF
408 %101:sgpr_128 = IMPLICIT_DEF
410 %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
414 #CHECK-LABEL: name: no_wqm_in_ls
417 tracksRegLiveness: true
420 liveins: $vgpr1, $vgpr2
422 undef %0.sub0:vreg_64 = COPY $vgpr1
423 %0.sub1:vreg_64 = COPY $vgpr2
424 %100:sgpr_256 = IMPLICIT_DEF
425 %101:sgpr_128 = IMPLICIT_DEF
427 %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
431 #CHECK-LABEL: name: no_wqm_in_vs
434 tracksRegLiveness: true
437 liveins: $vgpr1, $vgpr2
439 undef %0.sub0:vreg_64 = COPY $vgpr1
440 %0.sub1:vreg_64 = COPY $vgpr2
441 %100:sgpr_256 = IMPLICIT_DEF
442 %101:sgpr_128 = IMPLICIT_DEF
444 %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)