1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3 ; Make sure to test with f32 and i32 compares. If we have to use float
4 ; compares, we always have multiple condition registers. If we can do
5 ; scalar compares, we don't want to use multiple condition registers.
7 ; GCN-LABEL: {{^}}opt_select_i32_and_cmp_i32:
8 ; GCN-DAG: s_cmp_lg_u32
9 ; GCN: s_cselect_b64 [[CMP1:s\[[0-9]+:[0-9]+\]]], -1, 0
10 ; GCN-DAG: s_cmp_lg_u32
11 ; GCN: s_cselect_b64 [[CMP2:s\[[0-9]+:[0-9]+\]]], -1, 0
12 ; GCN: s_and_b64 vcc, [[CMP1]], [[CMP2]]
13 ; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
15 ; GCN: buffer_store_dword [[RESULT]]
16 define amdgpu_kernel void @opt_select_i32_and_cmp_i32(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) #0 {
17 %icmp0 = icmp ne i32 %a, %b
18 %icmp1 = icmp ne i32 %a, %c
19 %and = and i1 %icmp0, %icmp1
20 %select = select i1 %and, i32 %x, i32 %y
21 store i32 %select, i32 addrspace(1)* %out
25 ; GCN-LABEL: {{^}}opt_select_i32_and_cmp_f32:
26 ; GCN-DAG: v_cmp_lg_f32_e32 vcc
27 ; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
28 ; GCN: s_and_b64 vcc, vcc, [[CMP1]]
29 ; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
31 ; GCN: buffer_store_dword [[RESULT]]
32 define amdgpu_kernel void @opt_select_i32_and_cmp_f32(i32 addrspace(1)* %out, float %a, float %b, float %c, i32 %x, i32 %y) #0 {
33 %fcmp0 = fcmp one float %a, %b
34 %fcmp1 = fcmp one float %a, %c
35 %and = and i1 %fcmp0, %fcmp1
36 %select = select i1 %and, i32 %x, i32 %y
37 store i32 %select, i32 addrspace(1)* %out
41 ; GCN-LABEL: {{^}}opt_select_i64_and_cmp_i32:
42 ; GCN-DAG: s_cmp_lg_u32
43 ; GCN: s_cselect_b64 [[CMP1:s\[[0-9]+:[0-9]+\]]], -1, 0
44 ; GCN-DAG: s_cmp_lg_u32
45 ; GCN: s_cselect_b64 [[CMP2:s\[[0-9]+:[0-9]+\]]], -1, 0
46 ; GCN: s_and_b64 vcc, [[CMP1]], [[CMP2]]
47 ; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
48 ; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
49 ; GCN: buffer_store_dwordx2 v[[[RESULT0]]:[[RESULT1]]]
50 define amdgpu_kernel void @opt_select_i64_and_cmp_i32(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) #0 {
51 %icmp0 = icmp ne i32 %a, %b
52 %icmp1 = icmp ne i32 %a, %c
53 %and = and i1 %icmp0, %icmp1
54 %select = select i1 %and, i64 %x, i64 %y
55 store i64 %select, i64 addrspace(1)* %out
59 ; GCN-LABEL: {{^}}opt_select_i64_and_cmp_f32:
60 ; GCN-DAG: v_cmp_lg_f32_e32 vcc,
61 ; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
62 ; GCN: s_and_b64 vcc, vcc, [[CMP1]]
63 ; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
64 ; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
65 ; GCN: buffer_store_dwordx2 v[[[RESULT0]]:[[RESULT1]]]
66 define amdgpu_kernel void @opt_select_i64_and_cmp_f32(i64 addrspace(1)* %out, float %a, float %b, float %c, i64 %x, i64 %y) #0 {
67 %fcmp0 = fcmp one float %a, %b
68 %fcmp1 = fcmp one float %a, %c
69 %and = and i1 %fcmp0, %fcmp1
70 %select = select i1 %and, i64 %x, i64 %y
71 store i64 %select, i64 addrspace(1)* %out
75 ; GCN-LABEL: {{^}}opt_select_i32_or_cmp_i32:
76 ; GCN-DAG: s_cmp_lg_u32
77 ; GCN: s_cselect_b64 [[CMP1:s\[[0-9]+:[0-9]+\]]], -1, 0
78 ; GCN-DAG: s_cmp_lg_u32
79 ; GCN: s_cselect_b64 [[CMP2:s\[[0-9]+:[0-9]+\]]], -1, 0
80 ; GCN: s_or_b64 vcc, [[CMP1]], [[CMP2]]
81 ; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
83 ; GCN: buffer_store_dword [[RESULT]]
85 define amdgpu_kernel void @opt_select_i32_or_cmp_i32(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) #0 {
86 %icmp0 = icmp ne i32 %a, %b
87 %icmp1 = icmp ne i32 %a, %c
88 %or = or i1 %icmp0, %icmp1
89 %select = select i1 %or, i32 %x, i32 %y
90 store i32 %select, i32 addrspace(1)* %out
94 ; GCN-LABEL: {{^}}opt_select_i32_or_cmp_f32:
95 ; GCN-DAG: v_cmp_lg_f32_e32 vcc
96 ; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
97 ; GCN: s_or_b64 vcc, vcc, [[CMP1]]
98 ; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
100 ; GCN: buffer_store_dword [[RESULT]]
101 define amdgpu_kernel void @opt_select_i32_or_cmp_f32(i32 addrspace(1)* %out, float %a, float %b, float %c, i32 %x, i32 %y) #0 {
102 %fcmp0 = fcmp one float %a, %b
103 %fcmp1 = fcmp one float %a, %c
104 %or = or i1 %fcmp0, %fcmp1
105 %select = select i1 %or, i32 %x, i32 %y
106 store i32 %select, i32 addrspace(1)* %out
110 ; GCN-LABEL: {{^}}opt_select_i64_or_cmp_i32:
111 ; GCN-DAG: s_cmp_lg_u32
112 ; GCN: s_cselect_b64 [[CMP1:s\[[0-9]+:[0-9]+\]]], -1, 0
113 ; GCN-DAG: s_cmp_lg_u32
114 ; GCN: s_cselect_b64 [[CMP2:s\[[0-9]+:[0-9]+\]]], -1, 0
115 ; GCN: s_or_b64 vcc, [[CMP1]], [[CMP2]]
116 ; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
117 ; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
118 ; GCN: buffer_store_dwordx2 v[[[RESULT0]]:[[RESULT1]]]
119 define amdgpu_kernel void @opt_select_i64_or_cmp_i32(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) #0 {
120 %icmp0 = icmp ne i32 %a, %b
121 %icmp1 = icmp ne i32 %a, %c
122 %or = or i1 %icmp0, %icmp1
123 %select = select i1 %or, i64 %x, i64 %y
124 store i64 %select, i64 addrspace(1)* %out
128 ; GCN-LABEL: {{^}}opt_select_i64_or_cmp_f32:
129 ; GCN-DAG: v_cmp_lg_f32_e32 vcc,
130 ; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
131 ; GCN: s_or_b64 vcc, vcc, [[CMP1]]
132 ; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
133 ; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
134 ; GCN: buffer_store_dwordx2 v[[[RESULT0]]:[[RESULT1]]]
135 define amdgpu_kernel void @opt_select_i64_or_cmp_f32(i64 addrspace(1)* %out, float %a, float %b, float %c, i64 %x, i64 %y) #0 {
136 %fcmp0 = fcmp one float %a, %b
137 %fcmp1 = fcmp one float %a, %c
138 %or = or i1 %fcmp0, %fcmp1
139 %select = select i1 %or, i64 %x, i64 %y
140 store i64 %select, i64 addrspace(1)* %out
144 ; GCN-LABEL: {{^}}regression:
145 ; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 1.0
146 ; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 0
147 ; GCN: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 0
149 define amdgpu_kernel void @regression(float addrspace(1)* %out, float %c0, float %c1) #0 {
151 %cmp0 = fcmp oeq float %c0, 1.0
152 br i1 %cmp0, label %if0, label %endif
155 %cmp1 = fcmp oeq float %c1, 0.0
156 br i1 %cmp1, label %if1, label %endif
159 %cmp2 = xor i1 %cmp1, true
163 %tmp0 = phi i1 [ true, %entry ], [ %cmp2, %if1 ], [ false, %if0 ]
164 %tmp2 = select i1 %tmp0, float 4.0, float 0.0
165 store float %tmp2, float addrspace(1)* %out
169 attributes #0 = { nounwind }