1 ; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs -amdgpu-s-branch-bits=7 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1030 %s
2 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-s-branch-bits=7 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1010 %s
4 ; For gfx1010, overestimate the branch size in case we need to insert
5 ; a nop for the buggy offset.
7 ; GCN-LABEL: long_forward_scc_branch_3f_offset_bug:
8 ; GFX1030: s_cmp_lg_u32
9 ; GFX1030-NEXT: s_cbranch_scc1 [[ENDBB:BB[0-9]+_[0-9]+]]
11 ; GFX1010: s_cmp_lg_u32
12 ; GFX1010-NEXT: s_cbranch_scc0 [[RELAX_BB:BB[0-9]+_[0-9]+]]
13 ; GFX1010: s_getpc_b64
14 ; GFX1010-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
15 ; GFX1010-NEXT: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ([[ENDBB:BB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
16 ; GFX1010-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ([[ENDBB:BB[0-9]+_[0-9]+]]-[[POST_GETPC]])>>32
17 ; GFX1010: [[RELAX_BB]]:
24 ; GCN: global_store_dword
25 define amdgpu_kernel void @long_forward_scc_branch_3f_offset_bug(i32 addrspace(1)* %arg, i32 %cnd0) #0 {
27 %cmp0 = icmp eq i32 %cnd0, 0
28 br i1 %cmp0, label %bb2, label %bb3
31 %val = call i32 asm sideeffect
43 v_nop_e64", "=s"() ; 20 * 12 = 240
44 call void @llvm.amdgcn.s.sleep(i32 0) ; +4 = 244
45 %cmp1 = icmp eq i32 %val, 0 ; +4 = 248
46 br i1 %cmp1, label %bb2, label %bb3 ; +4 (gfx1030), +8 with workaround (gfx1010)
49 store volatile i32 %cnd0, i32 addrspace(1)* %arg
53 ; GCN-LABEL: {{^}}long_forward_exec_branch_3f_offset_bug:
54 ; GFX1030: v_cmp_eq_u32
55 ; GFX1030: s_and_saveexec_b32
56 ; GFX1030-NEXT: s_cbranch_execnz [[RELAX_BB:BB[0-9]+_[0-9]+]]
58 ; GFX1010: v_cmp_eq_u32
59 ; GFX1010: s_and_saveexec_b32
60 ; GFX1010-NEXT: s_cbranch_execnz [[RELAX_BB:BB[0-9]+_[0-9]+]]
63 ; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
64 ; GCN-NEXT: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ([[ENDBB:BB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
65 ; GCN-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ([[ENDBB:BB[0-9]+_[0-9]+]]-[[POST_GETPC]])>>32
70 ; GCN: s_cbranch_execz
73 ; GCN: global_store_dword
74 define void @long_forward_exec_branch_3f_offset_bug(i32 addrspace(1)* %arg, i32 %cnd0) #0 {
76 %cmp0 = icmp eq i32 %cnd0, 0
77 br i1 %cmp0, label %bb2, label %bb3
80 %val = call i32 asm sideeffect
92 v_nop_e64", "=v"() ; 20 * 12 = 240
93 call void @llvm.amdgcn.s.sleep(i32 0) ; +4 = 244
94 %cmp1 = icmp eq i32 %val, 0 ; +4 = 248
95 br i1 %cmp1, label %bb2, label %bb3 ; +4 (gfx1030), +8 with workaround (gfx1010)
98 store volatile i32 %cnd0, i32 addrspace(1)* %arg
102 declare void @llvm.amdgcn.s.sleep(i32 immarg)