1 ; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 -check-prefix=FUNC %s
3 ; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s
4 ; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
6 ; FUNC-LABEL: {{^}}v_test_imin_sle_i32:
10 define amdgpu_kernel void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
11 %tid = call i32 @llvm.r600.read.tidig.x()
12 %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid
13 %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid
14 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
15 %a = load i32, i32 addrspace(1)* %a.gep, align 4
16 %b = load i32, i32 addrspace(1)* %b.gep, align 4
17 %cmp = icmp sle i32 %a, %b
18 %val = select i1 %cmp, i32 %a, i32 %b
19 store i32 %val, i32 addrspace(1)* %out.gep, align 4
23 ; FUNC-LABEL: {{^}}s_test_imin_sle_i32:
27 define amdgpu_kernel void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
28 %cmp = icmp sle i32 %a, %b
29 %val = select i1 %cmp, i32 %a, i32 %b
30 store i32 %val, i32 addrspace(1)* %out, align 4
34 ; FUNC-LABEL: {{^}}s_test_imin_sle_v1i32:
38 define amdgpu_kernel void @s_test_imin_sle_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) #0 {
39 %cmp = icmp sle <1 x i32> %a, %b
40 %val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b
41 store <1 x i32> %val, <1 x i32> addrspace(1)* %out
45 ; FUNC-LABEL: {{^}}s_test_imin_sle_v4i32:
55 define amdgpu_kernel void @s_test_imin_sle_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) #0 {
56 %cmp = icmp sle <4 x i32> %a, %b
57 %val = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
58 store <4 x i32> %val, <4 x i32> addrspace(1)* %out
62 ; FUNC-LABEL: {{^}}s_test_imin_sle_i8:
68 define amdgpu_kernel void @s_test_imin_sle_i8(i8 addrspace(1)* %out, i8 %a, i8 %b) #0 {
69 %cmp = icmp sle i8 %a, %b
70 %val = select i1 %cmp, i8 %a, i8 %b
71 store i8 %val, i8 addrspace(1)* %out
75 ; XXX - should be able to use s_min if we stop unnecessarily doing
76 ; extloads with mubuf instructions.
78 ; FUNC-LABEL: {{^}}s_test_imin_sle_v4i8:
79 ; GCN: buffer_load_sbyte
80 ; GCN: buffer_load_sbyte
81 ; GCN: buffer_load_sbyte
82 ; GCN: buffer_load_sbyte
83 ; GCN: buffer_load_sbyte
84 ; GCN: buffer_load_sbyte
85 ; GCN: buffer_load_sbyte
86 ; GCN: buffer_load_sbyte
109 define amdgpu_kernel void @s_test_imin_sle_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b) #0 {
110 %cmp = icmp sle <4 x i8> %a, %b
111 %val = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b
112 store <4 x i8> %val, <4 x i8> addrspace(1)* %out
116 ; FUNC-LABEL: {{^}}s_test_imin_sle_v2i16:
127 define amdgpu_kernel void @s_test_imin_sle_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #0 {
128 %cmp = icmp sle <2 x i16> %a, %b
129 %val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
130 store <2 x i16> %val, <2 x i16> addrspace(1)* %out
134 ; FIXME: VI use s_min_i32
135 ; FUNC-LABEL: {{^}}s_test_imin_sle_v4i16:
153 define amdgpu_kernel void @s_test_imin_sle_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) #0 {
154 %cmp = icmp sle <4 x i16> %a, %b
155 %val = select <4 x i1> %cmp, <4 x i16> %a, <4 x i16> %b
156 store <4 x i16> %val, <4 x i16> addrspace(1)* %out
160 ; FUNC-LABEL: @v_test_imin_slt_i32
164 define amdgpu_kernel void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #0 {
165 %tid = call i32 @llvm.r600.read.tidig.x()
166 %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %aptr, i32 %tid
167 %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid
168 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
169 %a = load i32, i32 addrspace(1)* %a.gep, align 4
170 %b = load i32, i32 addrspace(1)* %b.gep, align 4
171 %cmp = icmp slt i32 %a, %b
172 %val = select i1 %cmp, i32 %a, i32 %b
173 store i32 %val, i32 addrspace(1)* %out.gep, align 4
177 ; FUNC-LABEL: @v_test_imin_slt_i16
180 ; GFX89: v_min_i16_e32
183 define amdgpu_kernel void @v_test_imin_slt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) #0 {
184 %tid = call i32 @llvm.r600.read.tidig.x()
185 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %aptr, i32 %tid
186 %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %bptr, i32 %tid
187 %out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
189 %a = load i16, i16 addrspace(1)* %a.gep
190 %b = load i16, i16 addrspace(1)* %b.gep
191 %cmp = icmp slt i16 %a, %b
192 %val = select i1 %cmp, i16 %a, i16 %b
193 store i16 %val, i16 addrspace(1)* %out.gep
197 ; FUNC-LABEL: @s_test_imin_slt_i32
201 define amdgpu_kernel void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
202 %cmp = icmp slt i32 %a, %b
203 %val = select i1 %cmp, i32 %a, i32 %b
204 store i32 %val, i32 addrspace(1)* %out, align 4
208 ; FUNC-LABEL: {{^}}s_test_imin_slt_v2i32:
214 define amdgpu_kernel void @s_test_imin_slt_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
215 %cmp = icmp slt <2 x i32> %a, %b
216 %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %b
217 store <2 x i32> %val, <2 x i32> addrspace(1)* %out
221 ; FUNC-LABEL: {{^}}s_test_imin_slt_imm_i32:
222 ; GCN: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
224 ; EG: MIN_INT {{.*}}literal.{{[xyzw]}}
225 define amdgpu_kernel void @s_test_imin_slt_imm_i32(i32 addrspace(1)* %out, i32 %a) #0 {
226 %cmp = icmp slt i32 %a, 8
227 %val = select i1 %cmp, i32 %a, i32 8
228 store i32 %val, i32 addrspace(1)* %out, align 4
232 ; FUNC-LABEL: {{^}}s_test_imin_sle_imm_i32:
233 ; GCN: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
235 ; EG: MIN_INT {{.*}}literal.{{[xyzw]}}
236 define amdgpu_kernel void @s_test_imin_sle_imm_i32(i32 addrspace(1)* %out, i32 %a) #0 {
237 %cmp = icmp sle i32 %a, 8
238 %val = select i1 %cmp, i32 %a, i32 8
239 store i32 %val, i32 addrspace(1)* %out, align 4
243 ; FUNC-LABEL: @v_test_umin_ule_i32
247 define amdgpu_kernel void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
248 %tid = call i32 @llvm.r600.read.tidig.x()
249 %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid
250 %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid
251 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
252 %a = load i32, i32 addrspace(1)* %a.gep, align 4
253 %b = load i32, i32 addrspace(1)* %b.gep, align 4
254 %cmp = icmp ule i32 %a, %b
255 %val = select i1 %cmp, i32 %a, i32 %b
256 store i32 %val, i32 addrspace(1)* %out.gep, align 4
260 ; FUNC-LABEL: @v_test_umin_ule_v3i32
264 ; GCN-NOT: v_min_u32_e32
270 define amdgpu_kernel void @v_test_umin_ule_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %a.ptr, <3 x i32> addrspace(1)* %b.ptr) #0 {
271 %tid = call i32 @llvm.r600.read.tidig.x()
272 %a.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %a.ptr, i32 %tid
273 %b.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %b.ptr, i32 %tid
274 %out.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid
276 %a = load <3 x i32>, <3 x i32> addrspace(1)* %a.gep
277 %b = load <3 x i32>, <3 x i32> addrspace(1)* %b.gep
278 %cmp = icmp ule <3 x i32> %a, %b
279 %val = select <3 x i1> %cmp, <3 x i32> %a, <3 x i32> %b
280 store <3 x i32> %val, <3 x i32> addrspace(1)* %out.gep
284 ; FIXME: Reduce unused packed component to scalar
285 ; FUNC-LABEL: @v_test_umin_ule_v3i16{{$}}
289 ; SI-NOT: v_min_u32_e32
294 ; VI-NOT: v_min_u16_e32
304 define amdgpu_kernel void @v_test_umin_ule_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %a.ptr, <3 x i16> addrspace(1)* %b.ptr) #0 {
305 %tid = call i32 @llvm.r600.read.tidig.x()
306 %a.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %a.ptr, i32 %tid
307 %b.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %b.ptr, i32 %tid
308 %out.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %out, i32 %tid
310 %a = load <3 x i16>, <3 x i16> addrspace(1)* %a.gep
311 %b = load <3 x i16>, <3 x i16> addrspace(1)* %b.gep
312 %cmp = icmp ule <3 x i16> %a, %b
313 %val = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
314 store <3 x i16> %val, <3 x i16> addrspace(1)* %out.gep
318 ; FUNC-LABEL: @s_test_umin_ule_i32
322 define amdgpu_kernel void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
323 %cmp = icmp ule i32 %a, %b
324 %val = select i1 %cmp, i32 %a, i32 %b
325 store i32 %val, i32 addrspace(1)* %out, align 4
329 ; FUNC-LABEL: @v_test_umin_ult_i32
333 define amdgpu_kernel void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
334 %tid = call i32 @llvm.r600.read.tidig.x()
335 %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid
336 %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid
337 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
338 %a = load i32, i32 addrspace(1)* %a.gep, align 4
339 %b = load i32, i32 addrspace(1)* %b.gep, align 4
340 %cmp = icmp ult i32 %a, %b
341 %val = select i1 %cmp, i32 %a, i32 %b
342 store i32 %val, i32 addrspace(1)* %out.gep, align 4
346 ; FUNC-LABEL: {{^}}v_test_umin_ult_i8:
347 ; SI: buffer_load_ubyte
348 ; SI: buffer_load_ubyte
351 ; GFX89: {{flat|global}}_load_ubyte
352 ; GFX89: {{flat|global}}_load_ubyte
353 ; GFX89: v_min_u16_e32
356 define amdgpu_kernel void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %a.ptr, i8 addrspace(1)* %b.ptr) #0 {
357 %tid = call i32 @llvm.r600.read.tidig.x()
358 %a.gep = getelementptr inbounds i8, i8 addrspace(1)* %a.ptr, i32 %tid
359 %b.gep = getelementptr inbounds i8, i8 addrspace(1)* %b.ptr, i32 %tid
360 %out.gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i32 %tid
362 %a = load i8, i8 addrspace(1)* %a.gep, align 1
363 %b = load i8, i8 addrspace(1)* %b.gep, align 1
364 %cmp = icmp ult i8 %a, %b
365 %val = select i1 %cmp, i8 %a, i8 %b
366 store i8 %val, i8 addrspace(1)* %out.gep, align 1
370 ; FUNC-LABEL: @s_test_umin_ult_i32
374 define amdgpu_kernel void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
375 %cmp = icmp ult i32 %a, %b
376 %val = select i1 %cmp, i32 %a, i32 %b
377 store i32 %val, i32 addrspace(1)* %out, align 4
381 ; FUNC-LABEL: @v_test_umin_ult_i32_multi_use
384 ; SI-NEXT: v_cndmask_b32
389 define amdgpu_kernel void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace(1)* %out1, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #0 {
390 %a = load i32, i32 addrspace(1)* %aptr, align 4
391 %b = load i32, i32 addrspace(1)* %bptr, align 4
392 %cmp = icmp ult i32 %a, %b
393 %val = select i1 %cmp, i32 %a, i32 %b
394 store i32 %val, i32 addrspace(1)* %out0, align 4
395 store i1 %cmp, i1 addrspace(1)* %out1
399 ; FUNC-LABEL: @v_test_umin_ult_i16_multi_use
402 ; GCN-NEXT: v_cndmask_b32
407 define amdgpu_kernel void @v_test_umin_ult_i16_multi_use(i16 addrspace(1)* %out0, i1 addrspace(1)* %out1, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) #0 {
408 %a = load i16, i16 addrspace(1)* %aptr, align 2
409 %b = load i16, i16 addrspace(1)* %bptr, align 2
410 %cmp = icmp ult i16 %a, %b
411 %val = select i1 %cmp, i16 %a, i16 %b
412 store i16 %val, i16 addrspace(1)* %out0, align 2
413 store i1 %cmp, i1 addrspace(1)* %out1
418 ; FUNC-LABEL: @s_test_umin_ult_v1i32
422 define amdgpu_kernel void @s_test_umin_ult_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) #0 {
423 %cmp = icmp ult <1 x i32> %a, %b
424 %val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b
425 store <1 x i32> %val, <1 x i32> addrspace(1)* %out
429 ; FUNC-LABEL: {{^}}s_test_umin_ult_v8i32:
447 define amdgpu_kernel void @s_test_umin_ult_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) #0 {
448 %cmp = icmp ult <8 x i32> %a, %b
449 %val = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
450 store <8 x i32> %val, <8 x i32> addrspace(1)* %out
454 ; FUNC-LABEL: {{^}}s_test_umin_ult_v8i16:
481 define amdgpu_kernel void @s_test_umin_ult_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) #0 {
482 %cmp = icmp ult <8 x i16> %a, %b
483 %val = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
484 store <8 x i16> %val, <8 x i16> addrspace(1)* %out
488 ; Make sure redundant and removed
489 ; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umin_ult_i16:
490 ; GCN-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
491 ; GCN-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
492 ; GCN: s_min_u32 [[MIN:s[0-9]+]], [[A]], [[B]]
493 ; GCN: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
494 ; GCN: buffer_store_dword [[VMIN]]
497 define amdgpu_kernel void @simplify_demanded_bits_test_umin_ult_i16(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) #0 {
498 %a.ext = zext i16 %a to i32
499 %b.ext = zext i16 %b to i32
500 %cmp = icmp ult i32 %a.ext, %b.ext
501 %val = select i1 %cmp, i32 %a.ext, i32 %b.ext
502 %mask = and i32 %val, 65535
503 store i32 %mask, i32 addrspace(1)* %out
507 ; Make sure redundant sign_extend_inreg removed.
509 ; FUNC-LABEL: {{^}}simplify_demanded_bits_test_min_slt_i16:
510 ; GCN-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
511 ; GCN-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
512 ; GCN: s_min_i32 [[MIN:s[0-9]+]], [[A]], [[B]]
513 ; GCN: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
514 ; GCN: buffer_store_dword [[VMIN]]
517 define amdgpu_kernel void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) #0 {
518 %a.ext = sext i16 %a to i32
519 %b.ext = sext i16 %b to i32
520 %cmp = icmp slt i32 %a.ext, %b.ext
521 %val = select i1 %cmp, i32 %a.ext, i32 %b.ext
522 %shl = shl i32 %val, 16
523 %sextinreg = ashr i32 %shl, 16
524 store i32 %sextinreg, i32 addrspace(1)* %out
528 ; FUNC-LABEL: {{^}}s_test_imin_sle_i16:
532 define amdgpu_kernel void @s_test_imin_sle_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) #0 {
533 %cmp = icmp sle i16 %a, %b
534 %val = select i1 %cmp, i16 %a, i16 %b
535 store i16 %val, i16 addrspace(1)* %out
540 ; FUNC-LABEL: {{^}}test_umin_ult_i64
545 define amdgpu_kernel void @test_umin_ult_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
546 %tmp = icmp ult i64 %a, %b
547 %val = select i1 %tmp, i64 %a, i64 %b
548 store i64 %val, i64 addrspace(1)* %out, align 8
552 ; FUNC-LABEL: {{^}}test_umin_ule_i64
557 define amdgpu_kernel void @test_umin_ule_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
558 %tmp = icmp ule i64 %a, %b
559 %val = select i1 %tmp, i64 %a, i64 %b
560 store i64 %val, i64 addrspace(1)* %out, align 8
564 ; FUNC-LABEL: {{^}}test_imin_slt_i64
569 define amdgpu_kernel void @test_imin_slt_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
570 %tmp = icmp slt i64 %a, %b
571 %val = select i1 %tmp, i64 %a, i64 %b
572 store i64 %val, i64 addrspace(1)* %out, align 8
576 ; FUNC-LABEL: {{^}}test_imin_sle_i64
581 define amdgpu_kernel void @test_imin_sle_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
582 %tmp = icmp sle i64 %a, %b
583 %val = select i1 %tmp, i64 %a, i64 %b
584 store i64 %val, i64 addrspace(1)* %out, align 8
588 ; FUNC-LABEL: {{^}}v_test_imin_sle_v2i16:
599 define amdgpu_kernel void @v_test_imin_sle_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr) #0 {
600 %tid = call i32 @llvm.r600.read.tidig.x()
601 %a.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a.ptr, i32 %tid
602 %b.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b.ptr, i32 %tid
603 %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
604 %a = load <2 x i16>, <2 x i16> addrspace(1)* %a.gep
605 %b = load <2 x i16>, <2 x i16> addrspace(1)* %b.gep
606 %cmp = icmp sle <2 x i16> %a, %b
607 %val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
608 store <2 x i16> %val, <2 x i16> addrspace(1)* %out.gep
613 ; FUNC-LABEL: {{^}}v_test_imin_ule_v2i16:
624 define amdgpu_kernel void @v_test_imin_ule_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr) #0 {
625 %tid = call i32 @llvm.r600.read.tidig.x()
626 %a.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a.ptr, i32 %tid
627 %b.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b.ptr, i32 %tid
628 %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
629 %a = load <2 x i16>, <2 x i16> addrspace(1)* %a.gep
630 %b = load <2 x i16>, <2 x i16> addrspace(1)* %b.gep
631 %cmp = icmp ule <2 x i16> %a, %b
632 %val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
633 store <2 x i16> %val, <2 x i16> addrspace(1)* %out.gep
637 declare i32 @llvm.r600.read.tidig.x() #1
639 attributes #0 = { nounwind }
640 attributes #1 = { nounwind readnone }