1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
3 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
5 ; FUNC-LABEL: {{^}}sext_bool_icmp_eq_0:
8 ; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0
9 ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]]
10 ; GCN-NEXT:buffer_store_byte [[RESULT]]
13 ; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
14 ; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
15 define amdgpu_kernel void @sext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
16 %icmp0 = icmp eq i32 %a, %b
17 %ext = sext i1 %icmp0 to i32
18 %icmp1 = icmp eq i32 %ext, 0
19 store i1 %icmp1, i1 addrspace(1)* %out
23 ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_0:
26 ; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0
27 ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]]
28 ; GCN-NEXT: buffer_store_byte [[RESULT]]
31 ; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
32 ; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
33 define amdgpu_kernel void @sext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
34 %icmp0 = icmp ne i32 %a, %b
35 %ext = sext i1 %icmp0 to i32
36 %icmp1 = icmp ne i32 %ext, 0
37 store i1 %icmp1, i1 addrspace(1)* %out
41 ; FUNC-LABEL: {{^}}sext_bool_icmp_eq_neg1:
44 ; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0
45 ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]]
46 ; GCN-NEXT: buffer_store_byte [[RESULT]]
48 define amdgpu_kernel void @sext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
49 %icmp0 = icmp eq i32 %a, %b
50 %ext = sext i1 %icmp0 to i32
51 %icmp1 = icmp eq i32 %ext, -1
52 store i1 %icmp1, i1 addrspace(1)* %out
56 ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_neg1:
59 ; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0
60 ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]]
61 ; GCN-NEXT: buffer_store_byte [[RESULT]]
63 define amdgpu_kernel void @sext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
64 %icmp0 = icmp ne i32 %a, %b
65 %ext = sext i1 %icmp0 to i32
66 %icmp1 = icmp ne i32 %ext, -1
67 store i1 %icmp1, i1 addrspace(1)* %out
71 ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_0:
74 ; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0
75 ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]]
76 ; GCN-NEXT: buffer_store_byte [[RESULT]]
78 define amdgpu_kernel void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
79 %icmp0 = icmp eq i32 %a, %b
80 %ext = zext i1 %icmp0 to i32
81 %icmp1 = icmp eq i32 %ext, 0
82 store i1 %icmp1, i1 addrspace(1)* %out
86 ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_0:
89 ; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0
90 ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]]
91 ; GCN-NEXT: buffer_store_byte [[RESULT]]
93 define amdgpu_kernel void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
94 %icmp0 = icmp ne i32 %a, %b
95 %ext = zext i1 %icmp0 to i32
96 %icmp1 = icmp ne i32 %ext, 0
97 store i1 %icmp1, i1 addrspace(1)* %out
101 ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_1:
104 ; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0
105 ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]]
106 ; GCN-NEXT: buffer_store_byte [[RESULT]]
108 define amdgpu_kernel void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
109 %icmp0 = icmp eq i32 %a, %b
110 %ext = zext i1 %icmp0 to i32
111 %icmp1 = icmp eq i32 %ext, 1
112 store i1 %icmp1, i1 addrspace(1)* %out
116 ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_1:
119 ; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0
120 ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]]
121 ; GCN-NEXT: buffer_store_byte [[RESULT]]
122 define amdgpu_kernel void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
123 %icmp0 = icmp ne i32 %a, %b
124 %ext = zext i1 %icmp0 to i32
125 %icmp1 = icmp ne i32 %ext, 1
126 store i1 %icmp1, i1 addrspace(1)* %out
131 ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_neg1:
132 ; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 0{{$}}
133 ; GCN: buffer_store_byte [[TMP]]
135 define amdgpu_kernel void @zext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
136 %icmp0 = icmp eq i32 %a, %b
137 %ext = zext i1 %icmp0 to i32
138 %icmp1 = icmp eq i32 %ext, -1
139 store i1 %icmp1, i1 addrspace(1)* %out
144 ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_neg1:
145 ; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 1{{$}}
146 ; GCN: buffer_store_byte [[TMP]]
148 define amdgpu_kernel void @zext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
149 %icmp0 = icmp ne i32 %a, %b
150 %ext = zext i1 %icmp0 to i32
151 %icmp1 = icmp ne i32 %ext, -1
152 store i1 %icmp1, i1 addrspace(1)* %out
156 ; FUNC-LABEL: {{^}}cmp_zext_k_i8max:
157 ; SI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
158 ; VI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
159 ; SI-DAG: s_and_b32 [[B:s[0-9]+]], [[VALUE]], 0xff
160 ; SI: s_cmpk_lg_i32 [[B]], 0xff
161 ; SI: s_cselect_b64 [[CC:[^,]+]], -1, 0
163 ; VI: v_mov_b32_e32 [[VK255:v[0-9]+]], 0xff
164 ; VI: s_movk_i32 [[K255:s[0-9]+]], 0xff
165 ; VI: v_and_b32_e32 [[B:v[0-9]+]], [[VALUE]], [[VK255]]
166 ; VI: v_cmp_ne_u16_e32 vcc, [[K255]], [[B]]
168 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]]
169 ; VI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
170 ; GCN: buffer_store_byte [[RESULT]]
172 define amdgpu_kernel void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind {
173 %b.ext = zext i8 %b to i32
174 %icmp0 = icmp ne i32 %b.ext, 255
175 store i1 %icmp0, i1 addrspace(1)* %out
179 ; FUNC-LABEL: {{^}}cmp_sext_k_neg1:
180 ; GCN: buffer_load_sbyte [[B:v[0-9]+]]
181 ; GCN: v_cmp_ne_u32_e32 vcc, -1, [[B]]{{$}}
182 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
183 ; GCN: buffer_store_byte [[RESULT]]
185 define amdgpu_kernel void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nounwind {
186 %b = load i8, i8 addrspace(1)* %b.ptr
187 %b.ext = sext i8 %b to i32
188 %icmp0 = icmp ne i32 %b.ext, -1
189 store i1 %icmp0, i1 addrspace(1)* %out
193 ; FUNC-LABEL: {{^}}v_cmp_sext_k_neg1_i8_sext_arg:
194 ; GCN: v_cmp_ne_u32_e32 vcc, -1, v0
195 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0, 1, vcc
196 ; GCN: buffer_store_byte [[SELECT]]
197 define void @v_cmp_sext_k_neg1_i8_sext_arg(i8 signext %b) nounwind {
198 %b.ext = sext i8 %b to i32
199 %icmp0 = icmp ne i32 %b.ext, -1
200 store i1 %icmp0, i1 addrspace(1)* undef
204 ; FIXME: This ends up doing a buffer_load_ubyte, and and compare to
205 ; 255. Seems to be because of ordering problems when not allowing load widths to be reduced.
206 ; Should do a buffer_load_sbyte and compare with -1
208 ; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_arg:
209 ; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
210 ; VI: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
211 ; GCN-DAG: s_and_b32 [[B:s[0-9]+]], [[VAL]], 0xff
212 ; GCN: s_cmpk_lg_i32 [[B]], 0xff{{$}}
213 ; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0
214 ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]]
215 ; GCN: buffer_store_byte [[RESULT]]
217 define amdgpu_kernel void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind {
218 %b.ext = sext i8 %b to i32
219 %icmp0 = icmp ne i32 %b.ext, -1
220 store i1 %icmp0, i1 addrspace(1)* %out
224 ; FUNC-LABEL: {{^}}cmp_zext_k_neg1:
225 ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
226 ; GCN: buffer_store_byte [[RESULT]]
228 define amdgpu_kernel void @cmp_zext_k_neg1(i1 addrspace(1)* %out, i8 %b) nounwind {
229 %b.ext = zext i8 %b to i32
230 %icmp0 = icmp ne i32 %b.ext, -1
231 store i1 %icmp0, i1 addrspace(1)* %out
235 ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_k:
236 ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
237 ; GCN: buffer_store_byte [[RESULT]]
239 define amdgpu_kernel void @zext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
240 %icmp0 = icmp ne i32 %a, %b
241 %ext = zext i1 %icmp0 to i32
242 %icmp1 = icmp ne i32 %ext, 2
243 store i1 %icmp1, i1 addrspace(1)* %out
247 ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_k:
248 ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
249 ; GCN: buffer_store_byte [[RESULT]]
251 define amdgpu_kernel void @zext_bool_icmp_eq_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
252 %icmp0 = icmp ne i32 %a, %b
253 %ext = zext i1 %icmp0 to i32
254 %icmp1 = icmp eq i32 %ext, 2
255 store i1 %icmp1, i1 addrspace(1)* %out
259 ; FIXME: These cases should really be able fold to true/false in
262 ; This really folds away to false
263 ; FUNC-LABEL: {{^}}sext_bool_icmp_eq_1:
264 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0{{$}}
265 ; GCN: buffer_store_byte [[K]]
266 define amdgpu_kernel void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
267 %icmp0 = icmp eq i32 %a, %b
268 %ext = sext i1 %icmp0 to i32
269 %icmp1 = icmp eq i32 %ext, 1
270 store i1 %icmp1, i1 addrspace(1)* %out
274 ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_1:
275 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 1{{$}}
276 ; GCN: buffer_store_byte [[K]]
277 define amdgpu_kernel void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
278 %icmp0 = icmp ne i32 %a, %b
279 %ext = sext i1 %icmp0 to i32
280 %icmp1 = icmp ne i32 %ext, 1
281 store i1 %icmp1, i1 addrspace(1)* %out
285 ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_k:
286 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 1{{$}}
287 ; GCN: buffer_store_byte [[K]]
288 define amdgpu_kernel void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
289 %icmp0 = icmp ne i32 %a, %b
290 %ext = sext i1 %icmp0 to i32
291 %icmp1 = icmp ne i32 %ext, 2
292 store i1 %icmp1, i1 addrspace(1)* %out