1 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3 ; Make sure to test with f32 and i32 compares. If we have to use float
4 ; compares, we always have multiple condition registers. If we can do
5 ; scalar compares, we don't want to use multiple condition registers.
7 ; GCN-LABEL: {{^}}opt_select_i32_and_cmp_i32:
8 ; GCN-DAG: s_cmp_lg_u32
9 ; GCN: s_cselect_b64 [[CMP1:s\[[0-9]+:[0-9]+\]]], -1, 0
10 ; GCN-DAG: s_cmp_lg_u32
11 ; GCN: s_cselect_b64 [[CMP2:s\[[0-9]+:[0-9]+\]]], -1, 0
12 ; GCN: s_and_b64 [[AND1:s\[[0-9]+:[0-9]+\]]], [[CMP1]], [[CMP2]]
13 ; GCN: s_and_b64 [[AND2:s\[[0-9]+:[0-9]+\]]], [[AND1]], exec
14 ; GCN: s_cselect_b32 [[RESULT:s[0-9]+]]
15 ; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
16 ; GCN: buffer_store_dword [[VRESULT]]
17 define amdgpu_kernel void @opt_select_i32_and_cmp_i32(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) #0 {
18 %icmp0 = icmp ne i32 %a, %b
19 %icmp1 = icmp ne i32 %a, %c
20 %and = and i1 %icmp0, %icmp1
21 %select = select i1 %and, i32 %x, i32 %y
22 store i32 %select, ptr addrspace(1) %out
26 ; GCN-LABEL: {{^}}opt_select_i32_and_cmp_f32:
27 ; GCN-DAG: v_cmp_lg_f32_e32 vcc
28 ; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
29 ; GCN: s_and_b64 [[CMP1]], vcc, [[CMP1]]
30 ; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[CMP1]], exec
31 ; GCN: s_cselect_b32 [[RESULT:s[0-9]+]]
32 ; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
33 ; GCN: buffer_store_dword [[VRESULT]]
34 define amdgpu_kernel void @opt_select_i32_and_cmp_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %x, i32 %y) #0 {
35 %fcmp0 = fcmp one float %a, %b
36 %fcmp1 = fcmp one float %a, %c
37 %and = and i1 %fcmp0, %fcmp1
38 %select = select i1 %and, i32 %x, i32 %y
39 store i32 %select, ptr addrspace(1) %out
43 ; GCN-LABEL: {{^}}opt_select_i64_and_cmp_i32:
44 ; GCN-DAG: s_cmp_lg_u32
45 ; GCN: s_cselect_b64 [[CMP1:s\[[0-9]+:[0-9]+\]]], -1, 0
46 ; GCN-DAG: s_cmp_lg_u32
47 ; GCN: s_cselect_b64 [[CMP2:s\[[0-9]+:[0-9]+\]]], -1, 0
48 ; GCN: s_and_b64 [[AND1:s\[[0-9]+:[0-9]+\]]], [[CMP1]], [[CMP2]]
49 ; GCN: s_and_b64 [[AND2:s\[[0-9]+:[0-9]+\]]], [[AND1]], exec
50 ; GCN-DAG: s_cselect_b32 [[RESULT0:s[0-9]+]]
51 ; GCN-DAG: s_cselect_b32 [[RESULT1:s[0-9]+]]
52 ; GCN-DAG: v_mov_b32_e32 v[[VRESULT1:[0-9]+]], [[RESULT0]]
53 ; GCN-DAG: v_mov_b32_e32 v[[VRESULT0:[0-9]+]], [[RESULT1]]
54 ; GCN: buffer_store_dwordx2 v[[[VRESULT0]]:[[VRESULT1]]]
55 define amdgpu_kernel void @opt_select_i64_and_cmp_i32(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) #0 {
56 %icmp0 = icmp ne i32 %a, %b
57 %icmp1 = icmp ne i32 %a, %c
58 %and = and i1 %icmp0, %icmp1
59 %select = select i1 %and, i64 %x, i64 %y
60 store i64 %select, ptr addrspace(1) %out
64 ; GCN-LABEL: {{^}}opt_select_i64_and_cmp_f32:
65 ; GCN-DAG: v_cmp_lg_f32_e32 vcc,
66 ; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
67 ; GCN: s_and_b64 [[AND1:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP1]]
68 ; GCN: s_and_b64 [[AND2:s\[[0-9]+:[0-9]+\]]], [[AND1]], exec
69 ; GCN-DAG: s_cselect_b32 [[RESULT0:s[0-9]+]]
70 ; GCN-DAG: s_cselect_b32 [[RESULT1:s[0-9]+]]
71 ; GCN-DAG: v_mov_b32_e32 v[[VRESULT1:[0-9]+]], [[RESULT0]]
72 ; GCN-DAG: v_mov_b32_e32 v[[VRESULT0:[0-9]+]], [[RESULT1]]
73 ; GCN: buffer_store_dwordx2 v[[[VRESULT0]]:[[VRESULT1]]]
74 define amdgpu_kernel void @opt_select_i64_and_cmp_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i64 %x, i64 %y) #0 {
75 %fcmp0 = fcmp one float %a, %b
76 %fcmp1 = fcmp one float %a, %c
77 %and = and i1 %fcmp0, %fcmp1
78 %select = select i1 %and, i64 %x, i64 %y
79 store i64 %select, ptr addrspace(1) %out
83 ; GCN-LABEL: {{^}}opt_select_i32_or_cmp_i32:
84 ; GCN-DAG: s_cmp_lg_u32
85 ; GCN: s_cselect_b64 [[CMP1:s\[[0-9]+:[0-9]+\]]], -1, 0
86 ; GCN-DAG: s_cmp_lg_u32
87 ; GCN: s_cselect_b64 [[CMP2:s\[[0-9]+:[0-9]+\]]], -1, 0
88 ; GCN: s_or_b64 [[OR:s\[[0-9]+:[0-9]+\]]], [[CMP1]], [[CMP2]]
89 ; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[OR]], exec
90 ; GCN-DAG: s_cselect_b32 [[RESULT:s[0-9]+]]
91 ; GCN-DAG: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
92 ; GCN: buffer_store_dword [[VRESULT]]
94 define amdgpu_kernel void @opt_select_i32_or_cmp_i32(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) #0 {
95 %icmp0 = icmp ne i32 %a, %b
96 %icmp1 = icmp ne i32 %a, %c
97 %or = or i1 %icmp0, %icmp1
98 %select = select i1 %or, i32 %x, i32 %y
99 store i32 %select, ptr addrspace(1) %out
103 ; GCN-LABEL: {{^}}opt_select_i32_or_cmp_f32:
104 ; GCN-DAG: v_cmp_lg_f32_e32 vcc
105 ; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
106 ; GCN: s_or_b64 [[OR:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP1]]
107 ; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[OR]], exec
108 ; GCN-DAG: s_cselect_b32 [[RESULT:s[0-9]+]]
109 ; GCN-DAG: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
110 ; GCN: buffer_store_dword [[VRESULT]]
111 define amdgpu_kernel void @opt_select_i32_or_cmp_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %x, i32 %y) #0 {
112 %fcmp0 = fcmp one float %a, %b
113 %fcmp1 = fcmp one float %a, %c
114 %or = or i1 %fcmp0, %fcmp1
115 %select = select i1 %or, i32 %x, i32 %y
116 store i32 %select, ptr addrspace(1) %out
120 ; GCN-LABEL: {{^}}opt_select_i64_or_cmp_i32:
121 ; GCN-DAG: s_cmp_lg_u32
122 ; GCN: s_cselect_b64 [[CMP1:s\[[0-9]+:[0-9]+\]]], -1, 0
123 ; GCN-DAG: s_cmp_lg_u32
124 ; GCN: s_cselect_b64 [[CMP2:s\[[0-9]+:[0-9]+\]]], -1, 0
125 ; GCN: s_or_b64 [[OR:s\[[0-9]+:[0-9]+\]]], [[CMP1]], [[CMP2]]
126 ; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[OR]], exec
127 ; GCN-DAG: s_cselect_b32 [[RESULT0:s[0-9]+]]
128 ; GCN-DAG: s_cselect_b32 [[RESULT1:s[0-9]+]]
129 ; GCN-DAG: v_mov_b32_e32 v[[VRESULT1:[0-9]+]], [[RESULT0]]
130 ; GCN-DAG: v_mov_b32_e32 v[[VRESULT0:[0-9]+]], [[RESULT1]]
131 ; GCN: buffer_store_dwordx2 v[[[VRESULT0]]:[[VRESULT1]]]
132 define amdgpu_kernel void @opt_select_i64_or_cmp_i32(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) #0 {
133 %icmp0 = icmp ne i32 %a, %b
134 %icmp1 = icmp ne i32 %a, %c
135 %or = or i1 %icmp0, %icmp1
136 %select = select i1 %or, i64 %x, i64 %y
137 store i64 %select, ptr addrspace(1) %out
141 ; GCN-LABEL: {{^}}opt_select_i64_or_cmp_f32:
142 ; GCN-DAG: v_cmp_lg_f32_e32 vcc,
143 ; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
144 ; GCN: s_or_b64 [[OR:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP1]]
145 ; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[OR]], exec
146 ; GCN-DAG: s_cselect_b32 [[RESULT0:s[0-9]+]]
147 ; GCN-DAG: s_cselect_b32 [[RESULT1:s[0-9]+]]
148 ; GCN-DAG: v_mov_b32_e32 v[[VRESULT1:[0-9]+]], [[RESULT0]]
149 ; GCN-DAG: v_mov_b32_e32 v[[VRESULT0:[0-9]+]], [[RESULT1]]
150 ; GCN: buffer_store_dwordx2 v[[[VRESULT0]]:[[VRESULT1]]]
151 define amdgpu_kernel void @opt_select_i64_or_cmp_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i64 %x, i64 %y) #0 {
152 %fcmp0 = fcmp one float %a, %b
153 %fcmp1 = fcmp one float %a, %c
154 %or = or i1 %fcmp0, %fcmp1
155 %select = select i1 %or, i64 %x, i64 %y
156 store i64 %select, ptr addrspace(1) %out
160 ; GCN-LABEL: {{^}}regression:
161 ; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 1.0
163 define amdgpu_kernel void @regression(ptr addrspace(1) %out, float %c0, float %c1) #0 {
165 %cmp0 = fcmp oeq float %c0, 1.0
166 br i1 %cmp0, label %if0, label %endif
169 %cmp1 = fcmp oeq float %c1, 0.0
170 br i1 %cmp1, label %if1, label %endif
173 %cmp2 = xor i1 %cmp1, true
177 %tmp0 = phi i1 [ true, %entry ], [ %cmp2, %if1 ], [ false, %if0 ]
178 %tmp2 = select i1 %tmp0, float 4.0, float 0.0
179 store float %tmp2, ptr addrspace(1) %out
183 attributes #0 = { nounwind }