1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
3 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
5 ; FUNC-LABEL: {{^}}sext_bool_icmp_eq_0:
7 ; GCN: v_cmp_ne_u32_e32 vcc,
8 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
9 ; GCN-NEXT:buffer_store_byte [[RESULT]]
12 ; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
13 ; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
14 define amdgpu_kernel void @sext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
15 %icmp0 = icmp eq i32 %a, %b
16 %ext = sext i1 %icmp0 to i32
17 %icmp1 = icmp eq i32 %ext, 0
18 store i1 %icmp1, i1 addrspace(1)* %out
22 ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_0:
24 ; GCN: v_cmp_ne_u32_e32 vcc,
25 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
26 ; GCN-NEXT: buffer_store_byte [[RESULT]]
29 ; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
30 ; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
31 define amdgpu_kernel void @sext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
32 %icmp0 = icmp ne i32 %a, %b
33 %ext = sext i1 %icmp0 to i32
34 %icmp1 = icmp ne i32 %ext, 0
35 store i1 %icmp1, i1 addrspace(1)* %out
39 ; FUNC-LABEL: {{^}}sext_bool_icmp_eq_neg1:
41 ; GCN: v_cmp_eq_u32_e32 vcc,
42 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
43 ; GCN-NEXT: buffer_store_byte [[RESULT]]
45 define amdgpu_kernel void @sext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
46 %icmp0 = icmp eq i32 %a, %b
47 %ext = sext i1 %icmp0 to i32
48 %icmp1 = icmp eq i32 %ext, -1
49 store i1 %icmp1, i1 addrspace(1)* %out
53 ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_neg1:
55 ; GCN: v_cmp_eq_u32_e32 vcc,
56 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
57 ; GCN-NEXT: buffer_store_byte [[RESULT]]
59 define amdgpu_kernel void @sext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
60 %icmp0 = icmp ne i32 %a, %b
61 %ext = sext i1 %icmp0 to i32
62 %icmp1 = icmp ne i32 %ext, -1
63 store i1 %icmp1, i1 addrspace(1)* %out
67 ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_0:
69 ; GCN: v_cmp_ne_u32_e32 vcc,
70 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
71 ; GCN-NEXT: buffer_store_byte [[RESULT]]
73 define amdgpu_kernel void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
74 %icmp0 = icmp eq i32 %a, %b
75 %ext = zext i1 %icmp0 to i32
76 %icmp1 = icmp eq i32 %ext, 0
77 store i1 %icmp1, i1 addrspace(1)* %out
81 ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_0:
83 ; GCN: v_cmp_ne_u32_e32 vcc,
84 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
85 ; GCN-NEXT: buffer_store_byte [[RESULT]]
87 define amdgpu_kernel void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
88 %icmp0 = icmp ne i32 %a, %b
89 %ext = zext i1 %icmp0 to i32
90 %icmp1 = icmp ne i32 %ext, 0
91 store i1 %icmp1, i1 addrspace(1)* %out
95 ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_1:
97 ; GCN: v_cmp_eq_u32_e32 vcc,
98 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
99 ; GCN-NEXT: buffer_store_byte [[RESULT]]
101 define amdgpu_kernel void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
102 %icmp0 = icmp eq i32 %a, %b
103 %ext = zext i1 %icmp0 to i32
104 %icmp1 = icmp eq i32 %ext, 1
105 store i1 %icmp1, i1 addrspace(1)* %out
109 ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_1:
111 ; GCN: v_cmp_eq_u32_e32 vcc,
112 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
113 ; GCN-NEXT: buffer_store_byte [[RESULT]]
114 define amdgpu_kernel void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
115 %icmp0 = icmp ne i32 %a, %b
116 %ext = zext i1 %icmp0 to i32
117 %icmp1 = icmp ne i32 %ext, 1
118 store i1 %icmp1, i1 addrspace(1)* %out
123 ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_neg1:
124 ; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 0{{$}}
125 ; GCN: buffer_store_byte [[TMP]]
127 define amdgpu_kernel void @zext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
128 %icmp0 = icmp eq i32 %a, %b
129 %ext = zext i1 %icmp0 to i32
130 %icmp1 = icmp eq i32 %ext, -1
131 store i1 %icmp1, i1 addrspace(1)* %out
136 ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_neg1:
137 ; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 1{{$}}
138 ; GCN: buffer_store_byte [[TMP]]
140 define amdgpu_kernel void @zext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
141 %icmp0 = icmp ne i32 %a, %b
142 %ext = zext i1 %icmp0 to i32
143 %icmp1 = icmp ne i32 %ext, -1
144 store i1 %icmp1, i1 addrspace(1)* %out
148 ; FUNC-LABEL: {{^}}cmp_zext_k_i8max:
149 ; SI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
150 ; VI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
151 ; GCN: s_movk_i32 [[K255:s[0-9]+]], 0xff
152 ; GCN-DAG: v_mov_b32_e32 [[VK255:v[0-9]+]], [[K255]]
153 ; SI-DAG: s_and_b32 [[B:s[0-9]+]], [[VALUE]], [[K255]]
154 ; SI: v_cmp_ne_u32_e32 vcc, [[B]], [[VK255]]
156 ; VI-DAG: v_and_b32_e32 [[B:v[0-9]+]], [[VALUE]], [[VK255]]
157 ; VI: v_cmp_ne_u16_e32 vcc, [[K255]], [[B]]
159 ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
160 ; GCN: buffer_store_byte [[RESULT]]
162 define amdgpu_kernel void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind {
163 %b.ext = zext i8 %b to i32
164 %icmp0 = icmp ne i32 %b.ext, 255
165 store i1 %icmp0, i1 addrspace(1)* %out
169 ; FUNC-LABEL: {{^}}cmp_sext_k_neg1:
170 ; GCN: buffer_load_sbyte [[B:v[0-9]+]]
171 ; GCN: v_cmp_ne_u32_e32 vcc, -1, [[B]]{{$}}
172 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
173 ; GCN: buffer_store_byte [[RESULT]]
175 define amdgpu_kernel void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nounwind {
176 %b = load i8, i8 addrspace(1)* %b.ptr
177 %b.ext = sext i8 %b to i32
178 %icmp0 = icmp ne i32 %b.ext, -1
179 store i1 %icmp0, i1 addrspace(1)* %out
183 ; FUNC-LABEL: {{^}}v_cmp_sext_k_neg1_i8_sext_arg:
184 ; GCN: v_cmp_ne_u32_e32 vcc, -1, v0
185 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0, 1, vcc
186 ; GCN: buffer_store_byte [[SELECT]]
187 define void @v_cmp_sext_k_neg1_i8_sext_arg(i8 signext %b) nounwind {
188 %b.ext = sext i8 %b to i32
189 %icmp0 = icmp ne i32 %b.ext, -1
190 store i1 %icmp0, i1 addrspace(1)* undef
194 ; FIXME: This ends up doing a buffer_load_ubyte, and and compare to
195 ; 255. Seems to be because of ordering problems when not allowing load widths to be reduced.
196 ; Should do a buffer_load_sbyte and compare with -1
198 ; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_arg:
199 ; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
200 ; VI: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
201 ; GCN: s_movk_i32 [[K:s[0-9]+]], 0xff
202 ; GCN-DAG: s_and_b32 [[B:s[0-9]+]], [[VAL]], [[K]]
203 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], [[K]]
204 ; GCN: v_cmp_ne_u32_e32 vcc, [[B]], [[VK]]{{$}}
205 ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
206 ; GCN: buffer_store_byte [[RESULT]]
208 define amdgpu_kernel void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind {
209 %b.ext = sext i8 %b to i32
210 %icmp0 = icmp ne i32 %b.ext, -1
211 store i1 %icmp0, i1 addrspace(1)* %out
215 ; FUNC-LABEL: {{^}}cmp_zext_k_neg1:
216 ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
217 ; GCN: buffer_store_byte [[RESULT]]
219 define amdgpu_kernel void @cmp_zext_k_neg1(i1 addrspace(1)* %out, i8 %b) nounwind {
220 %b.ext = zext i8 %b to i32
221 %icmp0 = icmp ne i32 %b.ext, -1
222 store i1 %icmp0, i1 addrspace(1)* %out
226 ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_k:
227 ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
228 ; GCN: buffer_store_byte [[RESULT]]
230 define amdgpu_kernel void @zext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
231 %icmp0 = icmp ne i32 %a, %b
232 %ext = zext i1 %icmp0 to i32
233 %icmp1 = icmp ne i32 %ext, 2
234 store i1 %icmp1, i1 addrspace(1)* %out
238 ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_k:
239 ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
240 ; GCN: buffer_store_byte [[RESULT]]
242 define amdgpu_kernel void @zext_bool_icmp_eq_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
243 %icmp0 = icmp ne i32 %a, %b
244 %ext = zext i1 %icmp0 to i32
245 %icmp1 = icmp eq i32 %ext, 2
246 store i1 %icmp1, i1 addrspace(1)* %out
250 ; FIXME: These cases should really be able fold to true/false in
253 ; This really folds away to false
254 ; FUNC-LABEL: {{^}}sext_bool_icmp_eq_1:
255 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0{{$}}
256 ; GCN: buffer_store_byte [[K]]
257 define amdgpu_kernel void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
258 %icmp0 = icmp eq i32 %a, %b
259 %ext = sext i1 %icmp0 to i32
260 %icmp1 = icmp eq i32 %ext, 1
261 store i1 %icmp1, i1 addrspace(1)* %out
265 ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_1:
266 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 1{{$}}
267 ; GCN: buffer_store_byte [[K]]
268 define amdgpu_kernel void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
269 %icmp0 = icmp ne i32 %a, %b
270 %ext = sext i1 %icmp0 to i32
271 %icmp1 = icmp ne i32 %ext, 1
272 store i1 %icmp1, i1 addrspace(1)* %out
276 ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_k:
277 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 1{{$}}
278 ; GCN: buffer_store_byte [[K]]
279 define amdgpu_kernel void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
280 %icmp0 = icmp ne i32 %a, %b
281 %ext = sext i1 %icmp0 to i32
282 %icmp1 = icmp ne i32 %ext, 2
283 store i1 %icmp1, i1 addrspace(1)* %out