1 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1010 %s
2 ; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1030 %s
4 ; GCN-LABEL: {{^}}test_insert_vcmpx_pattern_lt:
5 ; GFX1010: v_cmp_lt_i32_e32 vcc_lo, 15, v{{.*}}
6 ; GFX1010-NEXT: s_and_saveexec_b32 s{{.*}}, vcc_lo
7 ; GFX1030: s_mov_b32 s{{.*}}, exec_lo
8 ; GFX1030-NEXT: v_cmpx_lt_i32_e32 15, v{{.*}}
9 define i32 @test_insert_vcmpx_pattern_lt(i32 %x) {
11 %bc = icmp slt i32 %x, 16
12 br i1 %bc, label %endif, label %if
22 ; GCN-LABEL: {{^}}test_insert_vcmpx_pattern_gt:
23 ; GFX1010: v_cmp_gt_i32_e32 vcc_lo, 17, v{{.*}}
24 ; GFX1010-NEXT: s_and_saveexec_b32 s{{.*}}, vcc_lo
25 ; GFX1030: s_mov_b32 s{{.*}}, exec_lo
26 ; GFX1030-NEXT: v_cmpx_gt_i32_e32 17, v{{.*}}
27 define i32 @test_insert_vcmpx_pattern_gt(i32 %x) {
29 %bc = icmp sgt i32 %x, 16
30 br i1 %bc, label %endif, label %if
40 ; GCN-LABEL: {{^}}test_insert_vcmpx_pattern_eq:
41 ; GFX1010: v_cmp_ne_u32_e32 vcc_lo, 16, v{{.*}}
42 ; GFX1010-NEXT: s_and_saveexec_b32 s{{.*}}, vcc_lo
43 ; GFX1030: s_mov_b32 s{{.*}}, exec_lo
44 ; GFX1030-NEXT: v_cmpx_ne_u32_e32 16, v{{.*}}
45 define i32 @test_insert_vcmpx_pattern_eq(i32 %x) {
47 %bc = icmp eq i32 %x, 16
48 br i1 %bc, label %endif, label %if
58 ; GCN-LABEL: {{^}}test_insert_vcmpx_pattern_ne:
59 ; GFX1010: v_cmp_eq_u32_e32 vcc_lo, 16, v{{.*}}
60 ; GFX1010-NEXT: s_and_saveexec_b32 s{{.*}}, vcc_lo
61 ; GFX1030: s_mov_b32 s{{.*}}, exec_lo
62 ; GFX1030-NEXT: v_cmpx_eq_u32_e32 16, v{{.*}}
63 define i32 @test_insert_vcmpx_pattern_ne(i32 %x) {
65 %bc = icmp ne i32 %x, 16
66 br i1 %bc, label %endif, label %if
76 ; GCN-LABEL: {{^}}test_insert_vcmpx_pattern_le:
77 ; GFX1010: v_cmp_lt_i32_e32 vcc_lo, 16, v{{.*}}
78 ; GFX1010-NEXT: s_and_saveexec_b32 s{{.*}}, vcc_lo
79 ; GFX1030: s_mov_b32 s{{.*}}, exec_lo
80 ; GFX1030-NEXT: v_cmpx_lt_i32_e32 16, v{{.*}}
81 define i32 @test_insert_vcmpx_pattern_le(i32 %x) {
83 %bc = icmp sle i32 %x, 16
84 br i1 %bc, label %endif, label %if
94 ; GCN-LABEL: {{^}}test_insert_vcmpx_pattern_ge:
95 ; GFX1010: v_cmp_gt_i32_e32 vcc_lo, 16, v{{.*}}
96 ; GFX1010-NEXT: s_and_saveexec_b32 s{{.*}}, vcc_lo
97 ; GFX1030: s_mov_b32 s{{.*}}, exec_lo
98 ; GFX1030-NEXT: v_cmpx_gt_i32_e32 16, v{{.*}}
99 define i32 @test_insert_vcmpx_pattern_ge(i32 %x) {
101 %bc = icmp sge i32 %x, 16
102 br i1 %bc, label %endif, label %if
112 declare amdgpu_gfx void @check_live_outs_helper(i64) #0
114 ; In cases where the output operand cannot be safely removed,
115 ; don't apply the v_cmpx transformation.
117 ; GCN-LABEL: {{^}}check_live_outs:
118 ; GFX1010: v_cmp_eq_u32_e64 s{{.*}}, v{{.*}}, v{{.*}}
119 ; GFX1010: s_and_saveexec_b32 s{{.*}}, s{{.*}}
120 ; GFX1030: v_cmp_eq_u32_e64 s{{.*}}, v{{.*}}, v{{.*}}
121 ; GFX1030: s_and_saveexec_b32 s{{.*}}, s{{.*}}
122 define amdgpu_cs void @check_live_outs(i32 %a, i32 %b) {
123 %cond = icmp eq i32 %a, %b
124 %result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
125 br i1 %cond, label %l1, label %l2
127 call amdgpu_gfx void @check_live_outs_helper(i64 %result)
133 ; Omit the transformation if the s_and_saveexec instruction overwrites
134 ; any of the v_cmp source operands.
136 ; GCN-LABEL: check_saveexec_overwrites_vcmp_source:
137 ; GCN: .LBB7_3: ; %then
138 ; GFX1010: v_cmp_ge_i32_e32 vcc_lo, s[[A:[0-9]+]], v{{.*}}
139 ; GFX1010-NEXT: v_mov_b32_e32 {{.*}}, s[[A]]
140 ; GFX1010-NEXT: s_and_saveexec_b32 s[[A]], vcc_lo
141 ; GFX1030: v_cmp_ge_i32_e32 vcc_lo, s[[A:[0-9]+]], v{{.*}}
142 ; GFX1030-NEXT: v_mov_b32_e32 {{.*}}, s[[A]]
143 ; GFX1030-NEXT: s_and_saveexec_b32 s[[A]], vcc_lo
144 define i32 @check_saveexec_overwrites_vcmp_source(i32 inreg %a, i32 inreg %b) {
146 %0 = icmp sge i32 %a, 0
147 br i1 %0, label %if, label %then
155 %3 = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
156 %4 = trunc i64 %3 to i32
157 %5 = icmp slt i32 %4, %b
158 br i1 %5, label %after, label %end
167 declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0