1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -stop-after=amdgpu-unify-divergent-exit-nodes | FileCheck %s --check-prefix=UNIFY
3 ; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs | FileCheck %s
5 declare void @llvm.trap()
6 declare i32 @llvm.amdgcn.workitem.id.x()
8 define amdgpu_kernel void @kernel(i32 %a, ptr addrspace(1) %x, i32 noundef %n) {
9 ; This used to bypass the structurization process because structurizer is unable to
10 ; handle multiple-exits CFG. This should be correctly structurized.
11 ; CHECK-LABEL: kernel:
12 ; CHECK: ; %bb.0: ; %entry
13 ; CHECK-NEXT: s_load_dword s0, s[8:9], 0x10
14 ; CHECK-NEXT: s_load_dword s10, s[8:9], 0x0
15 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
16 ; CHECK-NEXT: s_cmpk_lg_i32 s0, 0x100
17 ; CHECK-NEXT: s_cbranch_scc0 .LBB0_6
18 ; CHECK-NEXT: ; %bb.1: ; %if.else
19 ; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 10, v0
20 ; CHECK-NEXT: s_mov_b64 s[4:5], 0
21 ; CHECK-NEXT: s_mov_b64 s[2:3], 0
22 ; CHECK-NEXT: s_mov_b64 s[0:1], 0
23 ; CHECK-NEXT: s_and_saveexec_b64 s[6:7], vcc
24 ; CHECK-NEXT: s_cbranch_execz .LBB0_5
25 ; CHECK-NEXT: ; %bb.2: ; %if.then3
26 ; CHECK-NEXT: s_cmp_lg_u32 s10, 0
27 ; CHECK-NEXT: s_cbranch_scc1 .LBB0_14
28 ; CHECK-NEXT: ; %bb.3:
29 ; CHECK-NEXT: s_mov_b64 s[0:1], -1
30 ; CHECK-NEXT: .LBB0_4: ; %Flow3
31 ; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], exec
32 ; CHECK-NEXT: s_and_b64 s[2:3], s[2:3], exec
33 ; CHECK-NEXT: .LBB0_5: ; %Flow2
34 ; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
35 ; CHECK-NEXT: s_and_b64 vcc, exec, s[4:5]
36 ; CHECK-NEXT: s_cbranch_vccz .LBB0_8
37 ; CHECK-NEXT: s_branch .LBB0_7
38 ; CHECK-NEXT: .LBB0_6:
39 ; CHECK-NEXT: s_mov_b64 s[2:3], 0
40 ; CHECK-NEXT: s_mov_b64 s[0:1], 0
41 ; CHECK-NEXT: s_cbranch_execz .LBB0_8
42 ; CHECK-NEXT: .LBB0_7: ; %if.then
43 ; CHECK-NEXT: s_cmp_lg_u32 s10, 0
44 ; CHECK-NEXT: s_mov_b64 s[0:1], -1
45 ; CHECK-NEXT: s_cbranch_scc1 .LBB0_13
46 ; CHECK-NEXT: .LBB0_8: ; %Flow4
47 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
48 ; CHECK-NEXT: .LBB0_9: ; %UnifiedUnreachableBlock
49 ; CHECK-NEXT: ; divergent unreachable
50 ; CHECK-NEXT: .LBB0_10: ; %Flow6
51 ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
52 ; CHECK-NEXT: s_and_saveexec_b64 s[2:3], s[0:1]
53 ; CHECK-NEXT: s_cbranch_execz .LBB0_12
54 ; CHECK-NEXT: ; %bb.11: ; %if.end6.sink.split
55 ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x8
56 ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
57 ; CHECK-NEXT: v_mov_b32_e32 v1, s10
58 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
59 ; CHECK-NEXT: global_store_dword v0, v1, s[0:1]
60 ; CHECK-NEXT: .LBB0_12: ; %UnifiedReturnBlock
61 ; CHECK-NEXT: s_endpgm
62 ; CHECK-NEXT: .LBB0_13: ; %cond.false
63 ; CHECK-NEXT: s_mov_b64 s[0:1], 0
64 ; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], exec
65 ; CHECK-NEXT: s_trap 2
66 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
67 ; CHECK-NEXT: s_cbranch_execnz .LBB0_9
68 ; CHECK-NEXT: s_branch .LBB0_10
69 ; CHECK-NEXT: .LBB0_14: ; %cond.false.i8
70 ; CHECK-NEXT: s_mov_b64 s[2:3], -1
71 ; CHECK-NEXT: s_trap 2
72 ; CHECK-NEXT: s_branch .LBB0_4
76 %tid = call i32 @llvm.amdgcn.workitem.id.x()
77 %cmp = icmp eq i32 %n, 256
78 br i1 %cmp, label %if.then, label %if.else
81 %cmp1 = icmp eq i32 %a, 0
82 br i1 %cmp1, label %if.end6.sink.split, label %cond.false
85 call void @llvm.trap()
89 %cmp2 = icmp ult i32 %tid, 10
90 br i1 %cmp2, label %if.then3, label %if.end6
93 %cmp1.i7 = icmp eq i32 %a, 0
94 br i1 %cmp1.i7, label %if.end6.sink.split, label %cond.false.i8
97 call void @llvm.trap()
101 %x1 = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %tid
102 store i32 %a, ptr addrspace(1) %x1, align 4
108 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: