1 ; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 -check-prefix=FUNC %s
3 ; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s
4 ; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
6 ; FUNC-LABEL: {{^}}v_test_imin_sle_i32:
10 define amdgpu_kernel void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
11 %tid = call i32 @llvm.r600.read.tidig.x()
12 %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid
13 %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid
14 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
15 %a = load i32, i32 addrspace(1)* %a.gep, align 4
16 %b = load i32, i32 addrspace(1)* %b.gep, align 4
17 %cmp = icmp sle i32 %a, %b
18 %val = select i1 %cmp, i32 %a, i32 %b
19 store i32 %val, i32 addrspace(1)* %out.gep, align 4
23 ; FUNC-LABEL: {{^}}s_test_imin_sle_i32:
27 define amdgpu_kernel void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
28 %cmp = icmp sle i32 %a, %b
29 %val = select i1 %cmp, i32 %a, i32 %b
30 store i32 %val, i32 addrspace(1)* %out, align 4
34 ; FUNC-LABEL: {{^}}s_test_imin_sle_v1i32:
38 define amdgpu_kernel void @s_test_imin_sle_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) #0 {
39 %cmp = icmp sle <1 x i32> %a, %b
40 %val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b
41 store <1 x i32> %val, <1 x i32> addrspace(1)* %out
45 ; FUNC-LABEL: {{^}}s_test_imin_sle_v4i32:
55 define amdgpu_kernel void @s_test_imin_sle_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) #0 {
56 %cmp = icmp sle <4 x i32> %a, %b
57 %val = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
58 store <4 x i32> %val, <4 x i32> addrspace(1)* %out
62 ; FUNC-LABEL: {{^}}s_test_imin_sle_i8:
68 define amdgpu_kernel void @s_test_imin_sle_i8(i8 addrspace(1)* %out, [8 x i32], i8 %a, [8 x i32], i8 %b) #0 {
69 %cmp = icmp sle i8 %a, %b
70 %val = select i1 %cmp, i8 %a, i8 %b
71 store i8 %val, i8 addrspace(1)* %out
75 ; FIXME: Why vector and sdwa for last element?
76 ; FUNC-LABEL: {{^}}s_test_imin_sle_v4i8:
100 define amdgpu_kernel void @s_test_imin_sle_v4i8(<4 x i8> addrspace(1)* %out, [8 x i32], <4 x i8> %a, [8 x i32], <4 x i8> %b) #0 {
101 %cmp = icmp sle <4 x i8> %a, %b
102 %val = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b
103 store <4 x i8> %val, <4 x i8> addrspace(1)* %out
107 ; FUNC-LABEL: {{^}}s_test_imin_sle_v2i16:
108 ; GCN: s_load_dword s
109 ; GCN: s_load_dword s
127 define amdgpu_kernel void @s_test_imin_sle_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #0 {
128 %cmp = icmp sle <2 x i16> %a, %b
129 %val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
130 store <2 x i16> %val, <2 x i16> addrspace(1)* %out
134 ; FUNC-LABEL: {{^}}s_test_imin_sle_v4i16:
135 ; SI-NOT: buffer_load
153 define amdgpu_kernel void @s_test_imin_sle_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) #0 {
154 %cmp = icmp sle <4 x i16> %a, %b
155 %val = select <4 x i1> %cmp, <4 x i16> %a, <4 x i16> %b
156 store <4 x i16> %val, <4 x i16> addrspace(1)* %out
160 ; FUNC-LABEL: @v_test_imin_slt_i32
164 define amdgpu_kernel void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #0 {
165 %tid = call i32 @llvm.r600.read.tidig.x()
166 %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %aptr, i32 %tid
167 %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid
168 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
169 %a = load i32, i32 addrspace(1)* %a.gep, align 4
170 %b = load i32, i32 addrspace(1)* %b.gep, align 4
171 %cmp = icmp slt i32 %a, %b
172 %val = select i1 %cmp, i32 %a, i32 %b
173 store i32 %val, i32 addrspace(1)* %out.gep, align 4
177 ; FUNC-LABEL: @v_test_imin_slt_i16
180 ; GFX89: v_min_i16_e32
183 define amdgpu_kernel void @v_test_imin_slt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) #0 {
184 %tid = call i32 @llvm.r600.read.tidig.x()
185 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %aptr, i32 %tid
186 %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %bptr, i32 %tid
187 %out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
189 %a = load i16, i16 addrspace(1)* %a.gep
190 %b = load i16, i16 addrspace(1)* %b.gep
191 %cmp = icmp slt i16 %a, %b
192 %val = select i1 %cmp, i16 %a, i16 %b
193 store i16 %val, i16 addrspace(1)* %out.gep
197 ; FUNC-LABEL: @s_test_imin_slt_i32
201 define amdgpu_kernel void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
202 %cmp = icmp slt i32 %a, %b
203 %val = select i1 %cmp, i32 %a, i32 %b
204 store i32 %val, i32 addrspace(1)* %out, align 4
208 ; FUNC-LABEL: {{^}}s_test_imin_slt_v2i32:
214 define amdgpu_kernel void @s_test_imin_slt_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
215 %cmp = icmp slt <2 x i32> %a, %b
216 %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %b
217 store <2 x i32> %val, <2 x i32> addrspace(1)* %out
221 ; FUNC-LABEL: {{^}}s_test_imin_slt_imm_i32:
222 ; GCN: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
224 ; EG: MIN_INT {{.*}}literal.{{[xyzw]}}
225 define amdgpu_kernel void @s_test_imin_slt_imm_i32(i32 addrspace(1)* %out, i32 %a) #0 {
226 %cmp = icmp slt i32 %a, 8
227 %val = select i1 %cmp, i32 %a, i32 8
228 store i32 %val, i32 addrspace(1)* %out, align 4
232 ; FUNC-LABEL: {{^}}s_test_imin_sle_imm_i32:
233 ; GCN: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
235 ; EG: MIN_INT {{.*}}literal.{{[xyzw]}}
236 define amdgpu_kernel void @s_test_imin_sle_imm_i32(i32 addrspace(1)* %out, i32 %a) #0 {
237 %cmp = icmp sle i32 %a, 8
238 %val = select i1 %cmp, i32 %a, i32 8
239 store i32 %val, i32 addrspace(1)* %out, align 4
243 ; FUNC-LABEL: @v_test_umin_ule_i32
247 define amdgpu_kernel void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
248 %tid = call i32 @llvm.r600.read.tidig.x()
249 %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid
250 %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid
251 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
252 %a = load i32, i32 addrspace(1)* %a.gep, align 4
253 %b = load i32, i32 addrspace(1)* %b.gep, align 4
254 %cmp = icmp ule i32 %a, %b
255 %val = select i1 %cmp, i32 %a, i32 %b
256 store i32 %val, i32 addrspace(1)* %out.gep, align 4
260 ; FUNC-LABEL: @v_test_umin_ule_v3i32
264 ; GCN-NOT: v_min_u32_e32
270 define amdgpu_kernel void @v_test_umin_ule_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %a.ptr, <3 x i32> addrspace(1)* %b.ptr) #0 {
271 %tid = call i32 @llvm.r600.read.tidig.x()
272 %a.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %a.ptr, i32 %tid
273 %b.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %b.ptr, i32 %tid
274 %out.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid
276 %a = load <3 x i32>, <3 x i32> addrspace(1)* %a.gep
277 %b = load <3 x i32>, <3 x i32> addrspace(1)* %b.gep
278 %cmp = icmp ule <3 x i32> %a, %b
279 %val = select <3 x i1> %cmp, <3 x i32> %a, <3 x i32> %b
280 store <3 x i32> %val, <3 x i32> addrspace(1)* %out.gep
284 ; FIXME: Reduce unused packed component to scalar
285 ; FUNC-LABEL: @v_test_umin_ule_v3i16{{$}}
289 ; SI-NOT: v_min_u32_e32
292 ; VI: v_min_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
304 define amdgpu_kernel void @v_test_umin_ule_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %a.ptr, <3 x i16> addrspace(1)* %b.ptr) #0 {
305 %tid = call i32 @llvm.r600.read.tidig.x()
306 %a.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %a.ptr, i32 %tid
307 %b.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %b.ptr, i32 %tid
308 %out.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %out, i32 %tid
310 %a = load <3 x i16>, <3 x i16> addrspace(1)* %a.gep
311 %b = load <3 x i16>, <3 x i16> addrspace(1)* %b.gep
312 %cmp = icmp ule <3 x i16> %a, %b
313 %val = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
314 store <3 x i16> %val, <3 x i16> addrspace(1)* %out.gep
318 ; FUNC-LABEL: @s_test_umin_ule_i32
322 define amdgpu_kernel void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
323 %cmp = icmp ule i32 %a, %b
324 %val = select i1 %cmp, i32 %a, i32 %b
325 store i32 %val, i32 addrspace(1)* %out, align 4
329 ; FUNC-LABEL: @v_test_umin_ult_i32
333 define amdgpu_kernel void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
334 %tid = call i32 @llvm.r600.read.tidig.x()
335 %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid
336 %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid
337 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
338 %a = load i32, i32 addrspace(1)* %a.gep, align 4
339 %b = load i32, i32 addrspace(1)* %b.gep, align 4
340 %cmp = icmp ult i32 %a, %b
341 %val = select i1 %cmp, i32 %a, i32 %b
342 store i32 %val, i32 addrspace(1)* %out.gep, align 4
346 ; FUNC-LABEL: {{^}}v_test_umin_ult_i8:
347 ; SI: {{buffer|flat|global}}_load_ubyte
348 ; SI: {{buffer|flat|global}}_load_ubyte
351 ; GFX89: {{flat|global}}_load_ubyte
352 ; GFX89: {{flat|global}}_load_ubyte
353 ; GFX89: v_min_u16_e32
356 define amdgpu_kernel void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %a.ptr, i8 addrspace(1)* %b.ptr) #0 {
357 %tid = call i32 @llvm.r600.read.tidig.x()
358 %a.gep = getelementptr inbounds i8, i8 addrspace(1)* %a.ptr, i32 %tid
359 %b.gep = getelementptr inbounds i8, i8 addrspace(1)* %b.ptr, i32 %tid
360 %out.gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i32 %tid
362 %a = load i8, i8 addrspace(1)* %a.gep, align 1
363 %b = load i8, i8 addrspace(1)* %b.gep, align 1
364 %cmp = icmp ult i8 %a, %b
365 %val = select i1 %cmp, i8 %a, i8 %b
366 store i8 %val, i8 addrspace(1)* %out.gep, align 1
370 ; FUNC-LABEL: @s_test_umin_ult_i32
374 define amdgpu_kernel void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
375 %cmp = icmp ult i32 %a, %b
376 %val = select i1 %cmp, i32 %a, i32 %b
377 store i32 %val, i32 addrspace(1)* %out, align 4
381 ; FUNC-LABEL: @v_test_umin_ult_i32_multi_use
384 ; SI-NEXT: v_cndmask_b32
389 define amdgpu_kernel void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace(1)* %out1, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #0 {
390 %a = load i32, i32 addrspace(1)* %aptr, align 4
391 %b = load i32, i32 addrspace(1)* %bptr, align 4
392 %cmp = icmp ult i32 %a, %b
393 %val = select i1 %cmp, i32 %a, i32 %b
394 store i32 %val, i32 addrspace(1)* %out0, align 4
395 store i1 %cmp, i1 addrspace(1)* %out1
399 ; FUNC-LABEL: @v_test_umin_ult_i16_multi_use
402 ; GCN-NEXT: v_cndmask_b32
407 define amdgpu_kernel void @v_test_umin_ult_i16_multi_use(i16 addrspace(1)* %out0, i1 addrspace(1)* %out1, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) #0 {
408 %a = load i16, i16 addrspace(1)* %aptr, align 2
409 %b = load i16, i16 addrspace(1)* %bptr, align 2
410 %cmp = icmp ult i16 %a, %b
411 %val = select i1 %cmp, i16 %a, i16 %b
412 store i16 %val, i16 addrspace(1)* %out0, align 2
413 store i1 %cmp, i1 addrspace(1)* %out1
418 ; FUNC-LABEL: @s_test_umin_ult_v1i32
422 define amdgpu_kernel void @s_test_umin_ult_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) #0 {
423 %cmp = icmp ult <1 x i32> %a, %b
424 %val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b
425 store <1 x i32> %val, <1 x i32> addrspace(1)* %out
429 ; FUNC-LABEL: {{^}}s_test_umin_ult_v8i32:
447 define amdgpu_kernel void @s_test_umin_ult_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) #0 {
448 %cmp = icmp ult <8 x i32> %a, %b
449 %val = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
450 store <8 x i32> %val, <8 x i32> addrspace(1)* %out
454 ; FUNC-LABEL: {{^}}s_test_umin_ult_v8i16:
455 ; GCN-NOT: {{buffer|flat|global}}_load
482 define amdgpu_kernel void @s_test_umin_ult_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) #0 {
483 %cmp = icmp ult <8 x i16> %a, %b
484 %val = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
485 store <8 x i16> %val, <8 x i16> addrspace(1)* %out
489 ; Make sure redundant and removed
490 ; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umin_ult_i16:
491 ; GCN-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xa|0x28}}
492 ; GCN-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
493 ; GCN: s_min_u32 [[MIN:s[0-9]+]], [[A]], [[B]]
494 ; GCN: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
495 ; GCN: buffer_store_dword [[VMIN]]
498 define amdgpu_kernel void @simplify_demanded_bits_test_umin_ult_i16(i32 addrspace(1)* %out, [8 x i32], i16 zeroext %a, [8 x i32], i16 zeroext %b) #0 {
499 %a.ext = zext i16 %a to i32
500 %b.ext = zext i16 %b to i32
501 %cmp = icmp ult i32 %a.ext, %b.ext
502 %val = select i1 %cmp, i32 %a.ext, i32 %b.ext
503 %mask = and i32 %val, 65535
504 store i32 %mask, i32 addrspace(1)* %out
508 ; Make sure redundant sign_extend_inreg removed.
510 ; FUNC-LABEL: {{^}}simplify_demanded_bits_test_min_slt_i16:
511 ; GCN-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xa|0x28}}
512 ; GCN-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
513 ; GCN-DAG: s_sext_i32_i16 [[EXT_A:s[0-9]+]], [[A]]
514 ; GCN-DAG: s_sext_i32_i16 [[EXT_B:s[0-9]+]], [[B]]
516 ; GCN: s_min_i32 [[MIN:s[0-9]+]], [[EXT_A]], [[EXT_B]]
517 ; GCN: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
518 ; GCN: buffer_store_dword [[VMIN]]
521 define amdgpu_kernel void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, [8 x i32], i16 signext %a, [8 x i32], i16 signext %b) #0 {
522 %a.ext = sext i16 %a to i32
523 %b.ext = sext i16 %b to i32
524 %cmp = icmp slt i32 %a.ext, %b.ext
525 %val = select i1 %cmp, i32 %a.ext, i32 %b.ext
526 %shl = shl i32 %val, 16
527 %sextinreg = ashr i32 %shl, 16
528 store i32 %sextinreg, i32 addrspace(1)* %out
532 ; FUNC-LABEL: {{^}}s_test_imin_sle_i16:
536 define amdgpu_kernel void @s_test_imin_sle_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) #0 {
537 %cmp = icmp sle i16 %a, %b
538 %val = select i1 %cmp, i16 %a, i16 %b
539 store i16 %val, i16 addrspace(1)* %out
544 ; FUNC-LABEL: {{^}}test_umin_ult_i64
549 define amdgpu_kernel void @test_umin_ult_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
550 %tmp = icmp ult i64 %a, %b
551 %val = select i1 %tmp, i64 %a, i64 %b
552 store i64 %val, i64 addrspace(1)* %out, align 8
556 ; FUNC-LABEL: {{^}}test_umin_ule_i64
561 define amdgpu_kernel void @test_umin_ule_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
562 %tmp = icmp ule i64 %a, %b
563 %val = select i1 %tmp, i64 %a, i64 %b
564 store i64 %val, i64 addrspace(1)* %out, align 8
568 ; FUNC-LABEL: {{^}}test_imin_slt_i64
573 define amdgpu_kernel void @test_imin_slt_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
574 %tmp = icmp slt i64 %a, %b
575 %val = select i1 %tmp, i64 %a, i64 %b
576 store i64 %val, i64 addrspace(1)* %out, align 8
580 ; FUNC-LABEL: {{^}}test_imin_sle_i64
585 define amdgpu_kernel void @test_imin_sle_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
586 %tmp = icmp sle i64 %a, %b
587 %val = select i1 %tmp, i64 %a, i64 %b
588 store i64 %val, i64 addrspace(1)* %out, align 8
592 ; FUNC-LABEL: {{^}}v_test_imin_sle_v2i16:
603 define amdgpu_kernel void @v_test_imin_sle_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr) #0 {
604 %tid = call i32 @llvm.r600.read.tidig.x()
605 %a.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a.ptr, i32 %tid
606 %b.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b.ptr, i32 %tid
607 %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
608 %a = load <2 x i16>, <2 x i16> addrspace(1)* %a.gep
609 %b = load <2 x i16>, <2 x i16> addrspace(1)* %b.gep
610 %cmp = icmp sle <2 x i16> %a, %b
611 %val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
612 store <2 x i16> %val, <2 x i16> addrspace(1)* %out.gep
617 ; FUNC-LABEL: {{^}}v_test_imin_ule_v2i16:
628 define amdgpu_kernel void @v_test_imin_ule_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr) #0 {
629 %tid = call i32 @llvm.r600.read.tidig.x()
630 %a.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a.ptr, i32 %tid
631 %b.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b.ptr, i32 %tid
632 %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
633 %a = load <2 x i16>, <2 x i16> addrspace(1)* %a.gep
634 %b = load <2 x i16>, <2 x i16> addrspace(1)* %b.gep
635 %cmp = icmp ule <2 x i16> %a, %b
636 %val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
637 store <2 x i16> %val, <2 x i16> addrspace(1)* %out.gep
641 declare i32 @llvm.r600.read.tidig.x() #1
643 attributes #0 = { nounwind }
644 attributes #1 = { nounwind readnone }