1 # RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s -check-prefixes=CHECK,SI
2 # RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=gfx900 -o - %s | FileCheck %s -check-prefixes=CHECK,GFX9
3 # RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -o - %s | FileCheck %s
4 # RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -o - %s | FileCheck %s
6 # CHECK-LABEL: name: vccz_corrupt_workaround
7 # CHECK: $vcc = V_CMP_EQ_F32
8 # SI-NEXT: S_WAITCNT 127
9 # SI-NEXT: $vcc = S_MOV_B64 $vcc
10 # CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit killed $vcc
12 name: vccz_corrupt_workaround
13 tracksRegLiveness: true
18 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0
19 $sgpr7 = S_MOV_B32 61440
21 $vcc = V_CMP_EQ_F32_e64 0, 0, 0, undef $sgpr2, 0, implicit $mode, implicit $exec
22 S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
25 liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
27 $vgpr0 = V_MOV_B32_e32 9, implicit $exec
28 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec
29 $vgpr0 = V_MOV_B32_e32 0, implicit $exec
33 liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
35 $vgpr0 = V_MOV_B32_e32 100, implicit $exec
36 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec
37 $vgpr0 = V_MOV_B32_e32 1, implicit $exec
40 liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
42 $sgpr3 = S_MOV_B32 61440
44 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
49 # CHECK-LABEL: name: vccz_corrupt_undef_vcc
50 # CHECK: BUFFER_STORE_DWORD_OFFSET
51 # SI-NEXT: S_WAITCNT 3855
52 # CHECK-NEXT: $vgpr0 = V_MOV_B32_e32
54 name: vccz_corrupt_undef_vcc
55 tracksRegLiveness: true
60 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0
61 $sgpr7 = S_MOV_B32 61440
63 S_CBRANCH_VCCZ %bb.1, implicit undef $vcc
66 liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
68 $vgpr0 = V_MOV_B32_e32 9, implicit $exec
69 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec
70 $vgpr0 = V_MOV_B32_e32 0, implicit $exec
74 liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
76 $vgpr0 = V_MOV_B32_e32 100, implicit $exec
77 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec
78 $vgpr0 = V_MOV_B32_e32 1, implicit $exec
81 liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
83 $sgpr3 = S_MOV_B32 61440
85 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
90 # Test that after reloading vcc spilled to a vgpr, we insert any necessary
91 # instructions to fix vccz.
93 # CHECK-LABEL: name: reload_vcc_from_vgpr
94 # CHECK: $vcc_lo = V_READLANE_B32 $vgpr0, 8, implicit-def $vcc
95 # CHECK: $vcc_hi = V_READLANE_B32 $vgpr0, 9
96 # SI: $vcc = S_MOV_B64 $vcc
97 # GFX9: $vcc = S_MOV_B64 $vcc
98 # CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
100 name: reload_vcc_from_vgpr
103 $vcc_lo = V_READLANE_B32 $vgpr0, 8, implicit-def $vcc
104 $vcc_hi = V_READLANE_B32 $vgpr0, 9
105 S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
110 # Test that after reloading vcc spilled to memory, we insert any necessary
111 # instructions to fix vccz.
113 # CHECK-LABEL: name: reload_vcc_from_mem
114 # CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec
115 # CHECK: $vcc_lo = V_READFIRSTLANE_B32 killed $vgpr0, implicit $exec, implicit-def $vcc
116 # CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, implicit $exec
117 # CHECK: $vcc_hi = V_READFIRSTLANE_B32 killed $vgpr0, implicit $exec, implicit-def $vcc
118 # SI: $vcc = S_MOV_B64 $vcc
119 # GFX9: $vcc = S_MOV_B64 $vcc
120 # CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
122 name: reload_vcc_from_mem
125 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec
126 $vcc_lo = V_READFIRSTLANE_B32 killed $vgpr0, implicit $exec, implicit-def $vcc
127 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, implicit $exec
128 $vcc_hi = V_READFIRSTLANE_B32 killed $vgpr0, implicit $exec, implicit-def $vcc
129 S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
134 # Test that after inline asm that defines vcc_lo, we insert any necessary
135 # instructions to fix vccz.
137 # CHECK-LABEL: name: inlineasm_def_vcc_lo
138 # CHECK: INLINEASM &"; def vcc_lo", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vcc_lo
139 # SI: $vcc = S_MOV_B64 $vcc
140 # GFX9: $vcc = S_MOV_B64 $vcc
141 # CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
143 name: inlineasm_def_vcc_lo
146 INLINEASM &"; def vcc_lo", 1, 10, implicit-def $vcc_lo
147 S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
152 # Test that after inline asm that defines vcc, no unnecessary instructions are
153 # inserted to fix vccz.
155 # CHECK-LABEL: name: inlineasm_def_vcc
156 # CHECK: INLINEASM &"; def vcc", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vcc
157 # CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
159 name: inlineasm_def_vcc
162 INLINEASM &"; def vcc", 1, 10, implicit-def $vcc
163 S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
168 # Test vcc definition in a previous basic block.
170 # CHECK-LABEL: name: vcc_def_pred
172 # SI: $vcc = S_MOV_B64 $vcc
173 # GFX9: $vcc = S_MOV_B64 $vcc
174 # CHECK: S_CBRANCH_VCCZ %bb.2, implicit $vcc
181 S_CBRANCH_VCCZ %bb.2, implicit $vcc
186 # Test various ways that the live range of vccz can overlap with the live range
187 # of an outstanding smem load.
190 # CHECK-LABEL: name: load_wait_def_use
192 # SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
193 # SI-NEXT: S_WAITCNT 127
194 # SI-NEXT: $vcc = S_MOV_B64 0
195 # SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc
196 name: load_wait_def_use
199 $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
202 S_CBRANCH_VCCZ %bb.1, implicit $vcc
207 # CHECK-LABEL: name: load_wait_nop_def_use
209 # SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
210 # SI-NEXT: S_WAITCNT 127
212 # SI-NEXT: $vcc = S_MOV_B64 0
213 # SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc
214 name: load_wait_nop_def_use
217 $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
221 S_CBRANCH_VCCZ %bb.1, implicit $vcc
226 # CHECK-LABEL: name: load_def_wait_use
228 # SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
229 # SI-NEXT: $vcc = S_MOV_B64 0
230 # SI-NEXT: S_WAITCNT 127
231 # SI-NEXT: $vcc = S_MOV_B64 $vcc
232 # SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc
233 name: load_def_wait_use
236 $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
239 S_CBRANCH_VCCZ %bb.1, implicit $vcc
243 # CHECK-LABEL: name: load_def_wait_nop_use
245 # SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
246 # SI-NEXT: $vcc = S_MOV_B64 0
247 # SI-NEXT: S_WAITCNT 127
249 # SI-NEXT: $vcc = S_MOV_B64 $vcc
250 # SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc
251 name: load_def_wait_nop_use
254 $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
258 S_CBRANCH_VCCZ %bb.1, implicit $vcc
263 # CHECK-LABEL: name: load_def_use
265 # SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
266 # SI-NEXT: $vcc = S_MOV_B64 0
267 # SI-NEXT: S_WAITCNT 127
268 # SI-NEXT: $vcc = S_MOV_B64 $vcc
269 # SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc
273 $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
275 S_CBRANCH_VCCZ %bb.1, implicit $vcc
280 # CHECK-LABEL: name: def_load_wait_use
282 # SI-NEXT: $vcc = S_MOV_B64 0
283 # SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
284 # SI-NEXT: S_WAITCNT 127
285 # SI-NEXT: $vcc = S_MOV_B64 $vcc
286 # SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc
287 name: def_load_wait_use
291 $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
293 S_CBRANCH_VCCZ %bb.1, implicit $vcc
298 # CHECK-LABEL: name: def_load_wait_nop_use
300 # SI-NEXT: $vcc = S_MOV_B64 0
301 # SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
302 # SI-NEXT: S_WAITCNT 127
304 # SI-NEXT: $vcc = S_MOV_B64 $vcc
305 # SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc
306 name: def_load_wait_nop_use
310 $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
313 S_CBRANCH_VCCZ %bb.1, implicit $vcc
318 # CHECK-LABEL: name: def_load_use
320 # SI-NEXT: $vcc = S_MOV_B64 0
321 # SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
322 # SI-NEXT: S_WAITCNT 127
323 # SI-NEXT: $vcc = S_MOV_B64 $vcc
324 # SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc
329 $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
330 S_CBRANCH_VCCZ %bb.1, implicit $vcc