1 ; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX8_9_10 -check-prefix=FUNC %s
3 ; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX9_10 -check-prefix=GFX8_9_10 -check-prefix=FUNC %s
4 ; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX9_10 -check-prefix=GFX8_9_10 -check-prefix=FUNC %s
5 ; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
7 ; FUNC-LABEL: {{^}}v_test_imin_sle_i32:
11 define amdgpu_kernel void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
12 %tid = call i32 @llvm.r600.read.tidig.x()
13 %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid
14 %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid
15 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
16 %a = load i32, i32 addrspace(1)* %a.gep, align 4
17 %b = load i32, i32 addrspace(1)* %b.gep, align 4
18 %cmp = icmp sle i32 %a, %b
19 %val = select i1 %cmp, i32 %a, i32 %b
20 store i32 %val, i32 addrspace(1)* %out.gep, align 4
24 ; FUNC-LABEL: {{^}}s_test_imin_sle_i32:
28 define amdgpu_kernel void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
29 %cmp = icmp sle i32 %a, %b
30 %val = select i1 %cmp, i32 %a, i32 %b
31 store i32 %val, i32 addrspace(1)* %out, align 4
35 ; FUNC-LABEL: {{^}}s_test_imin_sle_v1i32:
39 define amdgpu_kernel void @s_test_imin_sle_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) #0 {
40 %cmp = icmp sle <1 x i32> %a, %b
41 %val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b
42 store <1 x i32> %val, <1 x i32> addrspace(1)* %out
46 ; FUNC-LABEL: {{^}}s_test_imin_sle_v4i32:
56 define amdgpu_kernel void @s_test_imin_sle_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) #0 {
57 %cmp = icmp sle <4 x i32> %a, %b
58 %val = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
59 store <4 x i32> %val, <4 x i32> addrspace(1)* %out
63 ; FUNC-LABEL: {{^}}s_test_imin_sle_i8:
69 define amdgpu_kernel void @s_test_imin_sle_i8(i8 addrspace(1)* %out, [8 x i32], i8 %a, [8 x i32], i8 %b) #0 {
70 %cmp = icmp sle i8 %a, %b
71 %val = select i1 %cmp, i8 %a, i8 %b
72 store i8 %val, i8 addrspace(1)* %out
76 ; FIXME: Why vector and sdwa for last element?
77 ; FUNC-LABEL: {{^}}s_test_imin_sle_v4i8:
78 ; GCN-DAG: s_load_dwordx2
79 ; GCN-DAG: s_load_dword s
80 ; GCN-DAG: s_load_dword s
102 define amdgpu_kernel void @s_test_imin_sle_v4i8(<4 x i8> addrspace(1)* %out, [8 x i32], <4 x i8> %a, [8 x i32], <4 x i8> %b) #0 {
103 %cmp = icmp sle <4 x i8> %a, %b
104 %val = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b
105 store <4 x i8> %val, <4 x i8> addrspace(1)* %out
109 ; FUNC-LABEL: {{^}}s_test_imin_sle_v2i16:
110 ; GCN: s_load_dword s
111 ; GCN: s_load_dword s
125 ; GFX9_10: v_pk_min_i16
129 define amdgpu_kernel void @s_test_imin_sle_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #0 {
130 %cmp = icmp sle <2 x i16> %a, %b
131 %val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
132 store <2 x i16> %val, <2 x i16> addrspace(1)* %out
136 ; FUNC-LABEL: {{^}}s_test_imin_sle_v4i16:
137 ; SI-NOT: buffer_load
148 ; GFX9_10: v_pk_min_i16
149 ; GFX9_10: v_pk_min_i16
155 define amdgpu_kernel void @s_test_imin_sle_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) #0 {
156 %cmp = icmp sle <4 x i16> %a, %b
157 %val = select <4 x i1> %cmp, <4 x i16> %a, <4 x i16> %b
158 store <4 x i16> %val, <4 x i16> addrspace(1)* %out
162 ; FUNC-LABEL: @v_test_imin_slt_i32
166 define amdgpu_kernel void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #0 {
167 %tid = call i32 @llvm.r600.read.tidig.x()
168 %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %aptr, i32 %tid
169 %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid
170 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
171 %a = load i32, i32 addrspace(1)* %a.gep, align 4
172 %b = load i32, i32 addrspace(1)* %b.gep, align 4
173 %cmp = icmp slt i32 %a, %b
174 %val = select i1 %cmp, i32 %a, i32 %b
175 store i32 %val, i32 addrspace(1)* %out.gep, align 4
179 ; FUNC-LABEL: @v_test_imin_slt_i16
182 ; GFX8_9: v_min_i16_e32
183 ; GFX10: v_min_i16_e64
186 define amdgpu_kernel void @v_test_imin_slt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) #0 {
187 %tid = call i32 @llvm.r600.read.tidig.x()
188 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %aptr, i32 %tid
189 %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %bptr, i32 %tid
190 %out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
192 %a = load i16, i16 addrspace(1)* %a.gep
193 %b = load i16, i16 addrspace(1)* %b.gep
194 %cmp = icmp slt i16 %a, %b
195 %val = select i1 %cmp, i16 %a, i16 %b
196 store i16 %val, i16 addrspace(1)* %out.gep
200 ; FUNC-LABEL: @s_test_imin_slt_i32
204 define amdgpu_kernel void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
205 %cmp = icmp slt i32 %a, %b
206 %val = select i1 %cmp, i32 %a, i32 %b
207 store i32 %val, i32 addrspace(1)* %out, align 4
211 ; FUNC-LABEL: {{^}}s_test_imin_slt_v2i32:
217 define amdgpu_kernel void @s_test_imin_slt_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
218 %cmp = icmp slt <2 x i32> %a, %b
219 %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %b
220 store <2 x i32> %val, <2 x i32> addrspace(1)* %out
224 ; FUNC-LABEL: {{^}}s_test_imin_slt_imm_i32:
225 ; GCN: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
227 ; EG: MIN_INT {{.*}}literal.{{[xyzw]}}
228 define amdgpu_kernel void @s_test_imin_slt_imm_i32(i32 addrspace(1)* %out, i32 %a) #0 {
229 %cmp = icmp slt i32 %a, 8
230 %val = select i1 %cmp, i32 %a, i32 8
231 store i32 %val, i32 addrspace(1)* %out, align 4
235 ; FUNC-LABEL: {{^}}s_test_imin_sle_imm_i32:
236 ; GCN: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
238 ; EG: MIN_INT {{.*}}literal.{{[xyzw]}}
239 define amdgpu_kernel void @s_test_imin_sle_imm_i32(i32 addrspace(1)* %out, i32 %a) #0 {
240 %cmp = icmp sle i32 %a, 8
241 %val = select i1 %cmp, i32 %a, i32 8
242 store i32 %val, i32 addrspace(1)* %out, align 4
246 ; FUNC-LABEL: @v_test_umin_ule_i32
250 define amdgpu_kernel void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
251 %tid = call i32 @llvm.r600.read.tidig.x()
252 %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid
253 %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid
254 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
255 %a = load i32, i32 addrspace(1)* %a.gep, align 4
256 %b = load i32, i32 addrspace(1)* %b.gep, align 4
257 %cmp = icmp ule i32 %a, %b
258 %val = select i1 %cmp, i32 %a, i32 %b
259 store i32 %val, i32 addrspace(1)* %out.gep, align 4
263 ; FUNC-LABEL: @v_test_umin_ule_v3i32
267 ; GCN-NOT: v_min_u32_e32
273 define amdgpu_kernel void @v_test_umin_ule_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %a.ptr, <3 x i32> addrspace(1)* %b.ptr) #0 {
274 %tid = call i32 @llvm.r600.read.tidig.x()
275 %a.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %a.ptr, i32 %tid
276 %b.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %b.ptr, i32 %tid
277 %out.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid
279 %a = load <3 x i32>, <3 x i32> addrspace(1)* %a.gep
280 %b = load <3 x i32>, <3 x i32> addrspace(1)* %b.gep
281 %cmp = icmp ule <3 x i32> %a, %b
282 %val = select <3 x i1> %cmp, <3 x i32> %a, <3 x i32> %b
283 store <3 x i32> %val, <3 x i32> addrspace(1)* %out.gep
287 ; FIXME: Reduce unused packed component to scalar
288 ; FUNC-LABEL: @v_test_umin_ule_v3i16{{$}}
292 ; SI-NOT: v_min_u32_e32
295 ; VI: v_min_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
299 ; GFX9_10: v_pk_min_u16
300 ; GFX9_10: v_pk_min_u16
307 define amdgpu_kernel void @v_test_umin_ule_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %a.ptr, <3 x i16> addrspace(1)* %b.ptr) #0 {
308 %tid = call i32 @llvm.r600.read.tidig.x()
309 %a.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %a.ptr, i32 %tid
310 %b.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %b.ptr, i32 %tid
311 %out.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %out, i32 %tid
313 %a = load <3 x i16>, <3 x i16> addrspace(1)* %a.gep
314 %b = load <3 x i16>, <3 x i16> addrspace(1)* %b.gep
315 %cmp = icmp ule <3 x i16> %a, %b
316 %val = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
317 store <3 x i16> %val, <3 x i16> addrspace(1)* %out.gep
321 ; FUNC-LABEL: @s_test_umin_ule_i32
325 define amdgpu_kernel void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
326 %cmp = icmp ule i32 %a, %b
327 %val = select i1 %cmp, i32 %a, i32 %b
328 store i32 %val, i32 addrspace(1)* %out, align 4
332 ; FUNC-LABEL: @v_test_umin_ult_i32
336 define amdgpu_kernel void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
337 %tid = call i32 @llvm.r600.read.tidig.x()
338 %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid
339 %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid
340 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
341 %a = load i32, i32 addrspace(1)* %a.gep, align 4
342 %b = load i32, i32 addrspace(1)* %b.gep, align 4
343 %cmp = icmp ult i32 %a, %b
344 %val = select i1 %cmp, i32 %a, i32 %b
345 store i32 %val, i32 addrspace(1)* %out.gep, align 4
349 ; FUNC-LABEL: {{^}}v_test_umin_ult_i8:
350 ; SI: {{buffer|flat|global}}_load_ubyte
351 ; SI: {{buffer|flat|global}}_load_ubyte
354 ; GFX8_9_10: {{flat|global}}_load_ubyte
355 ; GFX8_9_10: {{flat|global}}_load_ubyte
356 ; GFX8_9: v_min_u16_e32
357 ; GFX10: v_min_u16_e64
360 define amdgpu_kernel void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %a.ptr, i8 addrspace(1)* %b.ptr) #0 {
361 %tid = call i32 @llvm.r600.read.tidig.x()
362 %a.gep = getelementptr inbounds i8, i8 addrspace(1)* %a.ptr, i32 %tid
363 %b.gep = getelementptr inbounds i8, i8 addrspace(1)* %b.ptr, i32 %tid
364 %out.gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i32 %tid
366 %a = load i8, i8 addrspace(1)* %a.gep, align 1
367 %b = load i8, i8 addrspace(1)* %b.gep, align 1
368 %cmp = icmp ult i8 %a, %b
369 %val = select i1 %cmp, i8 %a, i8 %b
370 store i8 %val, i8 addrspace(1)* %out.gep, align 1
374 ; FUNC-LABEL: @s_test_umin_ult_i32
378 define amdgpu_kernel void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
379 %cmp = icmp ult i32 %a, %b
380 %val = select i1 %cmp, i32 %a, i32 %b
381 store i32 %val, i32 addrspace(1)* %out, align 4
385 ; FUNC-LABEL: @v_test_umin_ult_i32_multi_use
394 define amdgpu_kernel void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace(1)* %out1, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #0 {
395 %a = load i32, i32 addrspace(1)* %aptr, align 4
396 %b = load i32, i32 addrspace(1)* %bptr, align 4
397 %cmp = icmp ult i32 %a, %b
398 %val = select i1 %cmp, i32 %a, i32 %b
399 store i32 %val, i32 addrspace(1)* %out0, align 4
400 store i1 %cmp, i1 addrspace(1)* %out1
404 ; FUNC-LABEL: @v_test_umin_ult_i16_multi_use
407 ; GCN-NEXT: v_cndmask_b32
412 define amdgpu_kernel void @v_test_umin_ult_i16_multi_use(i16 addrspace(1)* %out0, i1 addrspace(1)* %out1, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) #0 {
413 %a = load i16, i16 addrspace(1)* %aptr, align 2
414 %b = load i16, i16 addrspace(1)* %bptr, align 2
415 %cmp = icmp ult i16 %a, %b
416 %val = select i1 %cmp, i16 %a, i16 %b
417 store i16 %val, i16 addrspace(1)* %out0, align 2
418 store i1 %cmp, i1 addrspace(1)* %out1
423 ; FUNC-LABEL: @s_test_umin_ult_v1i32
427 define amdgpu_kernel void @s_test_umin_ult_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) #0 {
428 %cmp = icmp ult <1 x i32> %a, %b
429 %val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b
430 store <1 x i32> %val, <1 x i32> addrspace(1)* %out
434 ; FUNC-LABEL: {{^}}s_test_umin_ult_v8i32:
452 define amdgpu_kernel void @s_test_umin_ult_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) #0 {
453 %cmp = icmp ult <8 x i32> %a, %b
454 %val = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
455 store <8 x i32> %val, <8 x i32> addrspace(1)* %out
459 ; FUNC-LABEL: {{^}}s_test_umin_ult_v8i16:
460 ; GCN-NOT: {{buffer|flat|global}}_load
487 define amdgpu_kernel void @s_test_umin_ult_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) #0 {
488 %cmp = icmp ult <8 x i16> %a, %b
489 %val = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
490 store <8 x i16> %val, <8 x i16> addrspace(1)* %out
494 ; Make sure redundant and removed
495 ; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umin_ult_i16:
496 ; GCN-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xa|0x28}}
497 ; GCN-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
498 ; GCN: s_min_u32 [[MIN:s[0-9]+]], [[A]], [[B]]
499 ; GCN: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
500 ; GCN: buffer_store_dword [[VMIN]]
503 define amdgpu_kernel void @simplify_demanded_bits_test_umin_ult_i16(i32 addrspace(1)* %out, [8 x i32], i16 zeroext %a, [8 x i32], i16 zeroext %b) #0 {
504 %a.ext = zext i16 %a to i32
505 %b.ext = zext i16 %b to i32
506 %cmp = icmp ult i32 %a.ext, %b.ext
507 %val = select i1 %cmp, i32 %a.ext, i32 %b.ext
508 %mask = and i32 %val, 65535
509 store i32 %mask, i32 addrspace(1)* %out
513 ; Make sure redundant sign_extend_inreg removed.
515 ; FUNC-LABEL: {{^}}simplify_demanded_bits_test_min_slt_i16:
516 ; GCN-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xa|0x28}}
517 ; GCN-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
518 ; GCN-DAG: s_sext_i32_i16 [[EXT_A:s[0-9]+]], [[A]]
519 ; GCN-DAG: s_sext_i32_i16 [[EXT_B:s[0-9]+]], [[B]]
521 ; GCN: s_min_i32 [[MIN:s[0-9]+]], [[EXT_A]], [[EXT_B]]
522 ; GCN: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
523 ; GCN: buffer_store_dword [[VMIN]]
526 define amdgpu_kernel void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, [8 x i32], i16 signext %a, [8 x i32], i16 signext %b) #0 {
527 %a.ext = sext i16 %a to i32
528 %b.ext = sext i16 %b to i32
529 %cmp = icmp slt i32 %a.ext, %b.ext
530 %val = select i1 %cmp, i32 %a.ext, i32 %b.ext
531 %shl = shl i32 %val, 16
532 %sextinreg = ashr i32 %shl, 16
533 store i32 %sextinreg, i32 addrspace(1)* %out
537 ; FUNC-LABEL: {{^}}s_test_imin_sle_i16:
541 define amdgpu_kernel void @s_test_imin_sle_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) #0 {
542 %cmp = icmp sle i16 %a, %b
543 %val = select i1 %cmp, i16 %a, i16 %b
544 store i16 %val, i16 addrspace(1)* %out
549 ; FUNC-LABEL: {{^}}test_umin_ult_i64
554 define amdgpu_kernel void @test_umin_ult_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
555 %tmp = icmp ult i64 %a, %b
556 %val = select i1 %tmp, i64 %a, i64 %b
557 store i64 %val, i64 addrspace(1)* %out, align 8
561 ; FUNC-LABEL: {{^}}test_umin_ule_i64
566 define amdgpu_kernel void @test_umin_ule_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
567 %tmp = icmp ule i64 %a, %b
568 %val = select i1 %tmp, i64 %a, i64 %b
569 store i64 %val, i64 addrspace(1)* %out, align 8
573 ; FUNC-LABEL: {{^}}test_imin_slt_i64
578 define amdgpu_kernel void @test_imin_slt_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
579 %tmp = icmp slt i64 %a, %b
580 %val = select i1 %tmp, i64 %a, i64 %b
581 store i64 %val, i64 addrspace(1)* %out, align 8
585 ; FUNC-LABEL: {{^}}test_imin_sle_i64
590 define amdgpu_kernel void @test_imin_sle_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
591 %tmp = icmp sle i64 %a, %b
592 %val = select i1 %tmp, i64 %a, i64 %b
593 store i64 %val, i64 addrspace(1)* %out, align 8
597 ; FUNC-LABEL: {{^}}v_test_imin_sle_v2i16:
604 ; GFX9_10: v_pk_min_i16
608 define amdgpu_kernel void @v_test_imin_sle_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr) #0 {
609 %tid = call i32 @llvm.r600.read.tidig.x()
610 %a.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a.ptr, i32 %tid
611 %b.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b.ptr, i32 %tid
612 %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
613 %a = load <2 x i16>, <2 x i16> addrspace(1)* %a.gep
614 %b = load <2 x i16>, <2 x i16> addrspace(1)* %b.gep
615 %cmp = icmp sle <2 x i16> %a, %b
616 %val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
617 store <2 x i16> %val, <2 x i16> addrspace(1)* %out.gep
622 ; FUNC-LABEL: {{^}}v_test_imin_ule_v2i16:
629 ; GFX9_10: v_pk_min_u16
633 define amdgpu_kernel void @v_test_imin_ule_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr) #0 {
634 %tid = call i32 @llvm.r600.read.tidig.x()
635 %a.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a.ptr, i32 %tid
636 %b.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b.ptr, i32 %tid
637 %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
638 %a = load <2 x i16>, <2 x i16> addrspace(1)* %a.gep
639 %b = load <2 x i16>, <2 x i16> addrspace(1)* %b.gep
640 %cmp = icmp ule <2 x i16> %a, %b
641 %val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
642 store <2 x i16> %val, <2 x i16> addrspace(1)* %out.gep
646 declare i32 @llvm.r600.read.tidig.x() #1
648 attributes #0 = { nounwind }
649 attributes #1 = { nounwind readnone }