1 ; RUN: llc -global-isel -mcpu=tahiti -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX678,GFX6789 %s
2 ; RUN: llc -global-isel -mcpu=gfx900 -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9,GFX6789 %s
3 ; RUN: llc -global-isel -mcpu=gfx1010 -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s
5 declare i64 @llvm.smax.i64(i64, i64)
6 declare i64 @llvm.smin.i64(i64, i64)
8 ; GFX10-LABEL: {{^}}v_clamp_i64_i16
9 ; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
10 ; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
11 ; GFX6789: v_mov_b32_e32 [[B]], 0xffff8000
12 ; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x7fff
13 ; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]]
14 ; GFX10: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
15 ; GFX10: v_mov_b32_e32 [[B]], 0x7fff
16 ; GFX10: v_med3_i32 [[A]], 0xffff8000, [[A]], [[B]]
17 define i16 @v_clamp_i64_i16(i64 %in) #0 {
19 %max = call i64 @llvm.smax.i64(i64 %in, i64 -32768)
20 %min = call i64 @llvm.smin.i64(i64 %max, i64 32767)
21 %result = trunc i64 %min to i16
25 ; GFX10-LABEL: {{^}}v_clamp_i64_i16_reverse
26 ; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
27 ; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
28 ; GFX6789: v_mov_b32_e32 [[B]], 0xffff8000
29 ; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x7fff
30 ; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]]
31 ; GFX10: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
32 ; GFX10: v_mov_b32_e32 [[B]], 0x7fff
33 ; GFX10: v_med3_i32 [[A]], 0xffff8000, [[A]], [[B]]
34 define i16 @v_clamp_i64_i16_reverse(i64 %in) #0 {
36 %min = call i64 @llvm.smin.i64(i64 %in, i64 32767)
37 %max = call i64 @llvm.smax.i64(i64 %min, i64 -32768)
38 %result = trunc i64 %max to i16
42 ; GFX10-LABEL: {{^}}v_clamp_i64_i16_invalid_lower
43 ; GFX6789: v_mov_b32_e32 [[B:v[0-9]+]], 0x8001
44 ; GFX6789: v_cndmask_b32_e32 [[A:v[0-9]+]], [[B]], [[A]], vcc
45 ; GFX6789: v_cndmask_b32_e32 [[C:v[0-9]+]], 0, [[C]], vcc
47 ; GFX10: v_cndmask_b32_e32 [[A:v[0-9]+]], 0x8001, [[A]], vcc_lo
48 ; GFX10: v_cndmask_b32_e32 [[B:v[0-9]+]], 0, [[B]], vcc_lo
49 define i16 @v_clamp_i64_i16_invalid_lower(i64 %in) #0 {
51 %min = call i64 @llvm.smin.i64(i64 %in, i64 32769)
52 %max = call i64 @llvm.smax.i64(i64 %min, i64 -32768)
53 %result = trunc i64 %max to i16
57 ; GFX10-LABEL: {{^}}v_clamp_i64_i16_invalid_lower_and_higher
58 ; GFX6789: v_mov_b32_e32 [[B:v[0-9]+]], 0x8000
59 ; GFX6789: v_cndmask_b32_e32 [[A:v[0-9]+]], [[B]], [[A]], vcc
60 ; GFX10: v_cndmask_b32_e32 [[A:v[0-9]+]], 0x8000, [[A]], vcc_lo
61 define i16 @v_clamp_i64_i16_invalid_lower_and_higher(i64 %in) #0 {
63 %max = call i64 @llvm.smax.i64(i64 %in, i64 -32769)
64 %min = call i64 @llvm.smin.i64(i64 %max, i64 32768)
65 %result = trunc i64 %min to i16
69 ; GFX10-LABEL: {{^}}v_clamp_i64_i16_lower_than_short
70 ; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
71 ; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
72 ; GFX6789: v_mov_b32_e32 [[B]], 0xffffff01
73 ; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x100
74 ; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]]
75 ; GFX10: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
76 ; GFX10: v_mov_b32_e32 [[B]], 0x100
77 ; GFX10: v_med3_i32 [[A]], 0xffffff01, [[A]], [[B]]
78 define i16 @v_clamp_i64_i16_lower_than_short(i64 %in) #0 {
80 %min = call i64 @llvm.smin.i64(i64 %in, i64 256)
81 %max = call i64 @llvm.smax.i64(i64 %min, i64 -255)
82 %result = trunc i64 %max to i16
86 ; GFX10-LABEL: {{^}}v_clamp_i64_i16_lower_than_short_reverse
87 ; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
88 ; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
89 ; GFX6789: v_mov_b32_e32 [[B]], 0xffffff01
90 ; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x100
91 ; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]]
92 ; GFX10: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
93 ; GFX10: v_mov_b32_e32 [[B]], 0x100
94 ; GFX10: v_med3_i32 [[A]], 0xffffff01, [[A]], [[B]]
95 define i16 @v_clamp_i64_i16_lower_than_short_reverse(i64 %in) #0 {
97 %max = call i64 @llvm.smax.i64(i64 %in, i64 -255)
98 %min = call i64 @llvm.smin.i64(i64 %max, i64 256)
99 %result = trunc i64 %min to i16
103 ; GFX10-LABEL: {{^}}v_clamp_i64_i16_zero
104 ; GFX6789: v_mov_b32_e32 v0, 0
105 ; GFX10: v_mov_b32_e32 v0, 0
106 define i16 @v_clamp_i64_i16_zero(i64 %in) #0 {
108 %max = call i64 @llvm.smax.i64(i64 %in, i64 0)
109 %min = call i64 @llvm.smin.i64(i64 %max, i64 0)
110 %result = trunc i64 %min to i16