1 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=branch-relaxation -amdgpu-s-branch-bits=4 -o - %s | FileCheck -check-prefix=GCN %s
3 # Make sure there's no assert if the DBG_VALUE ends up in the same
4 # block as the branch expansion.
6 # GCN-LABEL: long_branch_dbg_value:
8 # GCN-NEXT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- [DW_OP_plus_uconst 12, DW_OP_stack_value]
9 # GCN-NEXT: .loc 1 0 42 is_stmt 0 ; /tmp/test_debug_value.cl:0:42
10 # GCN-NEXT: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
11 # GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
12 # GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], (.LBB0_4-[[POST_GETPC]])&4294967295
13 # GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], (.LBB0_4-[[POST_GETPC]])>>32
14 # GCN-NEXT: s_setpc_b64
18 define amdgpu_kernel void @long_branch_dbg_value(ptr addrspace(1) nocapture %arg, float %arg1) #1 !dbg !5 {
20 %long_branch_dbg_value.kernarg.segment = call nonnull align 16 dereferenceable(12) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
21 %arg.kernarg.offset = getelementptr inbounds i8, ptr addrspace(4) %long_branch_dbg_value.kernarg.segment, i64 0
22 %arg.kernarg.offset.cast = bitcast ptr addrspace(4) %arg.kernarg.offset to ptr addrspace(4), !amdgpu.uniform !2, !amdgpu.noclobber !2
23 %arg.load = load ptr addrspace(1), ptr addrspace(4) %arg.kernarg.offset.cast, align 16, !invariant.load !2
24 %arg1.kernarg.offset = getelementptr inbounds i8, ptr addrspace(4) %long_branch_dbg_value.kernarg.segment, i64 8
25 %arg1.kernarg.offset.cast = bitcast ptr addrspace(4) %arg1.kernarg.offset to ptr addrspace(4), !amdgpu.uniform !2, !amdgpu.noclobber !2
26 %arg1.load = load float, ptr addrspace(4) %arg1.kernarg.offset.cast, align 8, !invariant.load !2
27 %tmp = fmul float %arg1.load, %arg1.load
28 %tmp2 = getelementptr inbounds float, ptr addrspace(1) %arg.load, i64 3
29 call void @llvm.dbg.value(metadata ptr addrspace(1) %tmp2, metadata !11, metadata !DIExpression()) #5, !dbg !12
30 store float %tmp, ptr addrspace(1) %tmp2, align 4, !dbg !12
31 %tmp3 = fcmp olt float %tmp, 0x3810000000000000
32 %tmp3.inv = xor i1 %tmp3, true
33 br i1 %tmp3.inv, label %bb4, label %bb8, !amdgpu.uniform !2
36 %tmp5 = load volatile float, ptr addrspace(1) undef, align 4
37 %tmp6 = fcmp oeq float %tmp5, 0x7FF0000000000000
38 br i1 %tmp6, label %bb7, label %Flow, !amdgpu.uniform !2
41 br label %Flow, !amdgpu.uniform !2
43 Flow: ; preds = %bb7, %bb4
44 br label %bb8, !amdgpu.uniform !2
46 bb8: ; preds = %bb, %Flow
50 declare align 4 ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #2
51 declare void @llvm.dbg.value(metadata, metadata, metadata) #0
53 attributes #0 = { nounwind readnone speculatable willreturn }
54 attributes #1 = { nounwind writeonly }
55 attributes #2 = { nounwind readnone speculatable willreturn }
56 attributes #3 = { convergent nounwind willreturn }
57 attributes #4 = { convergent nounwind readnone willreturn }
58 attributes #5 = { nounwind }
61 !llvm.module.flags = !{!3, !4}
63 !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 244715) (llvm/trunk 244718)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
64 !1 = !DIFile(filename: "/tmp/test_debug_value.cl", directory: "/Users/matt/src/llvm/build_debug")
66 !3 = !{i32 2, !"Dwarf Version", i32 4}
67 !4 = !{i32 2, !"Debug Info Version", i32 3}
68 !5 = distinct !DISubprogram(name: "test_debug_value", scope: !1, file: !1, line: 1, type: !6, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !10)
69 !6 = !DISubroutineType(types: !7)
71 !8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, align: 32)
72 !9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
74 !11 = !DILocalVariable(name: "globalptr_arg", arg: 1, scope: !5, file: !1, line: 1, type: !8)
75 !12 = !DILocation(line: 1, column: 42, scope: !5)
79 name: long_branch_dbg_value
80 tracksRegLiveness: true
84 successors: %bb.1(0x40000000), %bb.4(0x40000000)
87 renamable $sgpr6_sgpr7 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s64) from %ir.arg.kernarg.offset.cast, align 16, addrspace 4)
88 renamable $sgpr4 = S_LOAD_DWORD_IMM killed renamable $sgpr4_sgpr5, 8, 0 :: (dereferenceable invariant load (s32) from %ir.arg1.kernarg.offset.cast, align 8, addrspace 4)
90 renamable $vgpr0 = nofpexcept V_MUL_F32_e64 0, killed $sgpr4, 0, $sgpr4, 0, 0, implicit $mode, implicit $exec
91 DBG_VALUE renamable $sgpr6_sgpr7, $noreg, !11, !DIExpression(DW_OP_plus_uconst, 12, DW_OP_stack_value), debug-location !12
92 $vgpr1 = V_MOV_B32_e32 $sgpr6, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr6_sgpr7
93 $vgpr2 = V_MOV_B32_e32 $sgpr7, implicit $exec, implicit killed $sgpr6_sgpr7, implicit $exec
94 GLOBAL_STORE_DWORD killed renamable $vgpr1_vgpr2, renamable $vgpr0, 12, 0, implicit $exec, debug-location !12 :: (store (s32) into %ir.tmp2, addrspace 1)
95 renamable $sgpr4 = S_MOV_B32 8388608
96 renamable $sgpr4_sgpr5 = nofpexcept V_CMP_GT_F32_e64 0, killed $sgpr4, 0, killed $vgpr0, 0, implicit $mode, implicit $exec
97 renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
98 S_CBRANCH_VCCNZ %bb.4, implicit killed $vcc
101 successors: %bb.2(0x40000000), %bb.3(0x40000000)
103 renamable $sgpr4_sgpr5 = IMPLICIT_DEF
104 $vgpr0 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr4_sgpr5
105 $vgpr1 = V_MOV_B32_e32 $sgpr5, implicit $exec, implicit killed $sgpr4_sgpr5, implicit $exec
106 renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`, addrspace 1)
107 renamable $sgpr4 = S_MOV_B32 2139095040
109 renamable $sgpr4_sgpr5 = nofpexcept V_CMP_NEQ_F32_e64 0, killed $sgpr4, 0, killed $vgpr0, 0, implicit $mode, implicit $exec
110 renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
111 S_CBRANCH_VCCNZ %bb.3, implicit killed $vcc
114 successors: %bb.3(0x80000000)
118 successors: %bb.4(0x80000000)