1 # RUN: llc -march=amdgcn -mcpu=fiji -run-pass si-insert-waits %s -o - | FileCheck %s
4 define amdgpu_kernel void @flat_zero_waitcnt(i32 addrspace(1)* %global4,
5 <4 x i32> addrspace(1)* %global16,
6 i32 addrspace(4)* %flat4,
7 <4 x i32> addrspace(4)* %flat16) {
11 define amdgpu_kernel void @single_fallthrough_successor_no_end_block_wait() {
15 define amdgpu_kernel void @single_branch_successor_not_next_block() {
22 # CHECK-LABEL: name: flat_zero_waitcnt
25 # CHECK: FLAT_LOAD_DWORD
26 # CHECK: FLAT_LOAD_DWORDX4
27 # Global loads will return in order so we should:
28 # s_waitcnt vmcnt(1) lgkmcnt(0)
29 # CHECK-NEXT: S_WAITCNT 113
32 # CHECK: FLAT_LOAD_DWORD
33 # CHECK: S_WAITCNT 3952
34 # CHECK: FLAT_LOAD_DWORDX4
35 # The first load has no mem operand, so we should assume it accesses the flat
37 # s_waitcnt vmcnt(0) lgkmcnt(0)
38 # CHECK-NEXT: S_WAITCNT 127
41 # CHECK: FLAT_LOAD_DWORD
42 # CHECK: S_WAITCNT 3952
43 # CHECK: FLAT_LOAD_DWORDX4
45 # One outstand loads access the flat address space.
46 # s_waitcnt vmcnt(0) lgkmcnt(0)
47 # CHECK-NEXT: S_WAITCNT 127
49 name: flat_zero_waitcnt
54 %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.global4)
55 %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16)
56 %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec
61 %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr
62 %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16)
63 %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec
67 %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.flat4)
68 %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.flat16)
69 %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec
73 # There is only a single fallthrough successor block, so there's no
74 # need to wait immediately.
76 # CHECK-LABEL: name: single_fallthrough_successor_no_end_block_wait
77 # CHECK: %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2
78 # CHECK-NOT: S_WAITCNT
81 # CHECK-NEXT: V_LSHLREV_B64
82 # CHECK-NEXT: S_WAITCNT 112
83 # CHECK-NEXT: FLAT_STORE_DWORD
84 name: single_fallthrough_successor_no_end_block_wait
89 %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr
92 %vgpr3_vgpr4 = V_LSHLREV_B64 4, %vgpr7_vgpr8, implicit %exec
93 FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr
97 # The block has a single predecessor with a single successor, but it
98 # is not the next block so it's non-obvious that the wait is not needed.
101 # CHECK-LABEL: name: single_branch_successor_not_next_block
102 # CHECK: %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2
103 # CHECK-NEXT: S_WAITCNT 112
106 # CHECK-NEXT: FLAT_STORE_DWORD
107 # CHECK-NEXT: S_ENDPGM
110 # CHECK-NEXT: V_LSHLREV_B64
111 # CHECK-NEXT: FLAT_STORE_DWORD
112 name: single_branch_successor_not_next_block
117 %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr
121 FLAT_STORE_DWORD %vgpr8_vgpr9, %vgpr10, 0, 0, 0, implicit %exec, implicit %flat_scr
125 %vgpr3_vgpr4 = V_LSHLREV_B64 4, %vgpr7_vgpr8, implicit %exec
126 FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr