1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,WAVE64 %s
3 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,WAVE32 %s
5 ; End to end tests for scalar vs. vector boolean legalization strategies.
7 define amdgpu_ps float @select_vgpr_sgpr_trunc_cond(i32 inreg %a, i32 %b, i32 %c) {
8 ; WAVE64-LABEL: select_vgpr_sgpr_trunc_cond:
10 ; WAVE64-NEXT: s_and_b32 s0, 1, s0
11 ; WAVE64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
12 ; WAVE64-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
13 ; WAVE64-NEXT: ; return to shader part epilog
15 ; WAVE32-LABEL: select_vgpr_sgpr_trunc_cond:
17 ; WAVE32-NEXT: s_and_b32 s0, 1, s0
18 ; WAVE32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
19 ; WAVE32-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
20 ; WAVE32-NEXT: ; return to shader part epilog
21 %cc = trunc i32 %a to i1
22 %r = select i1 %cc, i32 %b, i32 %c
23 %r.f = bitcast i32 %r to float
27 define amdgpu_ps float @select_vgpr_sgpr_trunc_and_cond(i32 inreg %a.0, i32 inreg %a.1, i32 %b, i32 %c) {
28 ; WAVE64-LABEL: select_vgpr_sgpr_trunc_and_cond:
30 ; WAVE64-NEXT: s_and_b32 s0, s0, s1
31 ; WAVE64-NEXT: s_and_b32 s0, 1, s0
32 ; WAVE64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
33 ; WAVE64-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
34 ; WAVE64-NEXT: ; return to shader part epilog
36 ; WAVE32-LABEL: select_vgpr_sgpr_trunc_and_cond:
38 ; WAVE32-NEXT: s_and_b32 s0, s0, s1
39 ; WAVE32-NEXT: s_and_b32 s0, 1, s0
40 ; WAVE32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
41 ; WAVE32-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
42 ; WAVE32-NEXT: ; return to shader part epilog
43 %cc.0 = trunc i32 %a.0 to i1
44 %cc.1 = trunc i32 %a.1 to i1
45 %and = and i1 %cc.0, %cc.1
46 %r = select i1 %and, i32 %b, i32 %c
47 %r.f = bitcast i32 %r to float
51 define amdgpu_ps i32 @select_sgpr_trunc_and_cond(i32 inreg %a.0, i32 inreg %a.1, i32 inreg %b, i32 inreg %c) {
52 ; GCN-LABEL: select_sgpr_trunc_and_cond:
54 ; GCN-NEXT: s_and_b32 s0, s0, s1
55 ; GCN-NEXT: s_and_b32 s0, s0, 1
56 ; GCN-NEXT: s_cmp_lg_u32 s0, 0
57 ; GCN-NEXT: s_cselect_b32 s0, s2, s3
58 ; GCN-NEXT: ; return to shader part epilog
59 %cc.0 = trunc i32 %a.0 to i1
60 %cc.1 = trunc i32 %a.1 to i1
61 %and = and i1 %cc.0, %cc.1
62 %r = select i1 %and, i32 %b, i32 %c
66 define amdgpu_kernel void @sgpr_trunc_brcond(i32 %cond) {
67 ; WAVE64-LABEL: sgpr_trunc_brcond:
68 ; WAVE64: ; %bb.0: ; %entry
69 ; WAVE64-NEXT: s_load_dword s0, s[0:1], 0x24
70 ; WAVE64-NEXT: s_waitcnt lgkmcnt(0)
71 ; WAVE64-NEXT: s_xor_b32 s0, s0, 1
72 ; WAVE64-NEXT: s_and_b32 s0, s0, 1
73 ; WAVE64-NEXT: s_cmp_lg_u32 s0, 0
74 ; WAVE64-NEXT: s_cbranch_scc1 .LBB3_2
75 ; WAVE64-NEXT: ; %bb.1: ; %bb0
76 ; WAVE64-NEXT: v_mov_b32_e32 v0, 0
77 ; WAVE64-NEXT: global_store_dword v[0:1], v0, off
78 ; WAVE64-NEXT: s_waitcnt vmcnt(0)
79 ; WAVE64-NEXT: .LBB3_2: ; %bb1
80 ; WAVE64-NEXT: v_mov_b32_e32 v0, 1
81 ; WAVE64-NEXT: global_store_dword v[0:1], v0, off
82 ; WAVE64-NEXT: s_waitcnt vmcnt(0)
84 ; WAVE32-LABEL: sgpr_trunc_brcond:
85 ; WAVE32: ; %bb.0: ; %entry
86 ; WAVE32-NEXT: s_load_dword s0, s[0:1], 0x24
87 ; WAVE32-NEXT: s_waitcnt lgkmcnt(0)
88 ; WAVE32-NEXT: s_xor_b32 s0, s0, 1
89 ; WAVE32-NEXT: s_and_b32 s0, s0, 1
90 ; WAVE32-NEXT: s_cmp_lg_u32 s0, 0
91 ; WAVE32-NEXT: s_cbranch_scc1 .LBB3_2
92 ; WAVE32-NEXT: ; %bb.1: ; %bb0
93 ; WAVE32-NEXT: v_mov_b32_e32 v0, 0
94 ; WAVE32-NEXT: global_store_dword v[0:1], v0, off
95 ; WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
96 ; WAVE32-NEXT: .LBB3_2: ; %bb1
97 ; WAVE32-NEXT: v_mov_b32_e32 v0, 1
98 ; WAVE32-NEXT: global_store_dword v[0:1], v0, off
99 ; WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
101 %trunc = trunc i32 %cond to i1
102 br i1 %trunc, label %bb0, label %bb1
105 store volatile i32 0, ptr addrspace(1) undef
109 store volatile i32 1, ptr addrspace(1) undef
113 define amdgpu_kernel void @brcond_sgpr_trunc_and(i32 %cond0, i32 %cond1) {
114 ; WAVE64-LABEL: brcond_sgpr_trunc_and:
115 ; WAVE64: ; %bb.0: ; %entry
116 ; WAVE64-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
117 ; WAVE64-NEXT: s_waitcnt lgkmcnt(0)
118 ; WAVE64-NEXT: s_and_b32 s0, s0, s1
119 ; WAVE64-NEXT: s_xor_b32 s0, s0, 1
120 ; WAVE64-NEXT: s_and_b32 s0, s0, 1
121 ; WAVE64-NEXT: s_cmp_lg_u32 s0, 0
122 ; WAVE64-NEXT: s_cbranch_scc1 .LBB4_2
123 ; WAVE64-NEXT: ; %bb.1: ; %bb0
124 ; WAVE64-NEXT: v_mov_b32_e32 v0, 0
125 ; WAVE64-NEXT: global_store_dword v[0:1], v0, off
126 ; WAVE64-NEXT: s_waitcnt vmcnt(0)
127 ; WAVE64-NEXT: .LBB4_2: ; %bb1
128 ; WAVE64-NEXT: v_mov_b32_e32 v0, 1
129 ; WAVE64-NEXT: global_store_dword v[0:1], v0, off
130 ; WAVE64-NEXT: s_waitcnt vmcnt(0)
132 ; WAVE32-LABEL: brcond_sgpr_trunc_and:
133 ; WAVE32: ; %bb.0: ; %entry
134 ; WAVE32-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
135 ; WAVE32-NEXT: s_waitcnt lgkmcnt(0)
136 ; WAVE32-NEXT: s_and_b32 s0, s0, s1
137 ; WAVE32-NEXT: s_xor_b32 s0, s0, 1
138 ; WAVE32-NEXT: s_and_b32 s0, s0, 1
139 ; WAVE32-NEXT: s_cmp_lg_u32 s0, 0
140 ; WAVE32-NEXT: s_cbranch_scc1 .LBB4_2
141 ; WAVE32-NEXT: ; %bb.1: ; %bb0
142 ; WAVE32-NEXT: v_mov_b32_e32 v0, 0
143 ; WAVE32-NEXT: global_store_dword v[0:1], v0, off
144 ; WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
145 ; WAVE32-NEXT: .LBB4_2: ; %bb1
146 ; WAVE32-NEXT: v_mov_b32_e32 v0, 1
147 ; WAVE32-NEXT: global_store_dword v[0:1], v0, off
148 ; WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
150 %trunc0 = trunc i32 %cond0 to i1
151 %trunc1 = trunc i32 %cond1 to i1
152 %and = and i1 %trunc0, %trunc1
153 br i1 %and, label %bb0, label %bb1
156 store volatile i32 0, ptr addrspace(1) undef
160 store volatile i32 1, ptr addrspace(1) undef