1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=SDAG-VI %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=SDAG-GFX9 %s
4 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1101 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,SDAG-GFX11 %s
6 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GISEL-VI %s
7 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GISEL-GFX9 %s
8 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1101 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GFX11,GISEL-GFX11 %s
10 ; <GFX9 has no V_SAT_PK, GFX9+ has V_SAT_PK, GFX11 has V_SAT_PK with t16
12 declare i16 @llvm.smin.i16(i16, i16)
13 declare i16 @llvm.smax.i16(i16, i16)
15 declare <2 x i16> @llvm.smin.v2i16(<2 x i16>, <2 x i16>)
16 declare <2 x i16> @llvm.smax.v2i16(<2 x i16>, <2 x i16>)
18 define <2 x i16> @basic_smax_smin(i16 %src0, i16 %src1) {
19 ; SDAG-VI-LABEL: basic_smax_smin:
21 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22 ; SDAG-VI-NEXT: v_max_i16_e32 v0, 0, v0
23 ; SDAG-VI-NEXT: v_max_i16_e32 v1, 0, v1
24 ; SDAG-VI-NEXT: v_mov_b32_e32 v2, 0xff
25 ; SDAG-VI-NEXT: v_min_i16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
26 ; SDAG-VI-NEXT: v_min_i16_e32 v0, 0xff, v0
27 ; SDAG-VI-NEXT: v_or_b32_e32 v0, v0, v1
28 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
30 ; SDAG-GFX9-LABEL: basic_smax_smin:
32 ; SDAG-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33 ; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, 0xff
34 ; SDAG-GFX9-NEXT: v_med3_i16 v0, v0, 0, v2
35 ; SDAG-GFX9-NEXT: v_med3_i16 v1, v1, 0, v2
36 ; SDAG-GFX9-NEXT: s_mov_b32 s4, 0x5040100
37 ; SDAG-GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
38 ; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31]
40 ; SDAG-GFX11-LABEL: basic_smax_smin:
41 ; SDAG-GFX11: ; %bb.0:
42 ; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43 ; SDAG-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
44 ; SDAG-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
45 ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
46 ; SDAG-GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
47 ; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
49 ; GISEL-VI-LABEL: basic_smax_smin:
51 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52 ; GISEL-VI-NEXT: v_max_i16_e32 v0, 0, v0
53 ; GISEL-VI-NEXT: v_mov_b32_e32 v2, 0xff
54 ; GISEL-VI-NEXT: v_max_i16_e32 v1, 0, v1
55 ; GISEL-VI-NEXT: v_min_i16_e32 v0, 0xff, v0
56 ; GISEL-VI-NEXT: v_min_i16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
57 ; GISEL-VI-NEXT: v_or_b32_e32 v0, v0, v1
58 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
60 ; GISEL-GFX9-LABEL: basic_smax_smin:
61 ; GISEL-GFX9: ; %bb.0:
62 ; GISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
63 ; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0xff
64 ; GISEL-GFX9-NEXT: v_med3_i16 v0, v0, 0, v2
65 ; GISEL-GFX9-NEXT: v_med3_i16 v1, v1, 0, v2
66 ; GISEL-GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
67 ; GISEL-GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
68 ; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31]
70 ; GISEL-GFX11-LABEL: basic_smax_smin:
71 ; GISEL-GFX11: ; %bb.0:
72 ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73 ; GISEL-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
74 ; GISEL-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
75 ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
76 ; GISEL-GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
77 ; GISEL-GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
78 ; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
79 %src0.max = call i16 @llvm.smax.i16(i16 %src0, i16 0)
80 %src0.clamp = call i16 @llvm.smin.i16(i16 %src0.max, i16 255)
81 %src1.max = call i16 @llvm.smax.i16(i16 %src1, i16 0)
82 %src1.clamp = call i16 @llvm.smin.i16(i16 %src1.max, i16 255)
83 %insert.0 = insertelement <2 x i16> undef, i16 %src0.clamp, i32 0
84 %vec = insertelement <2 x i16> %insert.0, i16 %src1.clamp, i32 1
88 ; Check that we don't emit a VALU instruction for SGPR inputs.
89 define amdgpu_kernel void @basic_smax_smin_sgpr(ptr addrspace(1) %out, i32 inreg %src0ext, i32 inreg %src1ext) {
90 ; SDAG-VI-LABEL: basic_smax_smin_sgpr:
92 ; SDAG-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
93 ; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0xff
94 ; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
95 ; SDAG-VI-NEXT: v_max_i16_e64 v1, s2, 0
96 ; SDAG-VI-NEXT: v_max_i16_e64 v2, s3, 0
97 ; SDAG-VI-NEXT: v_min_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
98 ; SDAG-VI-NEXT: v_min_i16_e32 v1, 0xff, v1
99 ; SDAG-VI-NEXT: v_or_b32_e32 v2, v1, v0
100 ; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
101 ; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
102 ; SDAG-VI-NEXT: flat_store_dword v[0:1], v2
103 ; SDAG-VI-NEXT: s_endpgm
105 ; SDAG-GFX9-LABEL: basic_smax_smin_sgpr:
106 ; SDAG-GFX9: ; %bb.0:
107 ; SDAG-GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
108 ; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0xff
109 ; SDAG-GFX9-NEXT: v_mov_b32_e32 v0, 0
110 ; SDAG-GFX9-NEXT: s_waitcnt lgkmcnt(0)
111 ; SDAG-GFX9-NEXT: v_med3_i16 v2, s2, 0, v1
112 ; SDAG-GFX9-NEXT: v_med3_i16 v1, s3, 0, v1
113 ; SDAG-GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2
114 ; SDAG-GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v2
115 ; SDAG-GFX9-NEXT: global_store_dword v0, v1, s[0:1]
116 ; SDAG-GFX9-NEXT: s_endpgm
118 ; SDAG-GFX11-LABEL: basic_smax_smin_sgpr:
119 ; SDAG-GFX11: ; %bb.0:
120 ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
121 ; SDAG-GFX11-NEXT: v_mov_b32_e32 v2, 0
122 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
123 ; SDAG-GFX11-NEXT: v_med3_i16 v0, s2, 0, 0xff
124 ; SDAG-GFX11-NEXT: v_med3_i16 v1, s3, 0, 0xff
125 ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
126 ; SDAG-GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
127 ; SDAG-GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
128 ; SDAG-GFX11-NEXT: global_store_b32 v2, v0, s[0:1]
129 ; SDAG-GFX11-NEXT: s_endpgm
131 ; GISEL-VI-LABEL: basic_smax_smin_sgpr:
133 ; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
134 ; GISEL-VI-NEXT: s_sext_i32_i16 s4, 0
135 ; GISEL-VI-NEXT: s_sext_i32_i16 s5, 0xff
136 ; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
137 ; GISEL-VI-NEXT: s_sext_i32_i16 s3, s3
138 ; GISEL-VI-NEXT: s_sext_i32_i16 s2, s2
139 ; GISEL-VI-NEXT: s_max_i32 s3, s3, s4
140 ; GISEL-VI-NEXT: s_max_i32 s2, s2, s4
141 ; GISEL-VI-NEXT: s_sext_i32_i16 s3, s3
142 ; GISEL-VI-NEXT: s_sext_i32_i16 s2, s2
143 ; GISEL-VI-NEXT: s_min_i32 s3, s3, s5
144 ; GISEL-VI-NEXT: s_min_i32 s2, s2, s5
145 ; GISEL-VI-NEXT: s_and_b32 s3, 0xffff, s3
146 ; GISEL-VI-NEXT: s_and_b32 s2, 0xffff, s2
147 ; GISEL-VI-NEXT: s_lshl_b32 s3, s3, 16
148 ; GISEL-VI-NEXT: s_or_b32 s2, s2, s3
149 ; GISEL-VI-NEXT: v_mov_b32_e32 v0, s0
150 ; GISEL-VI-NEXT: v_mov_b32_e32 v2, s2
151 ; GISEL-VI-NEXT: v_mov_b32_e32 v1, s1
152 ; GISEL-VI-NEXT: flat_store_dword v[0:1], v2
153 ; GISEL-VI-NEXT: s_endpgm
155 ; GISEL-GFX9-LABEL: basic_smax_smin_sgpr:
156 ; GISEL-GFX9: ; %bb.0:
157 ; GISEL-GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
158 ; GISEL-GFX9-NEXT: s_sext_i32_i16 s4, 0
159 ; GISEL-GFX9-NEXT: s_sext_i32_i16 s5, 0xff
160 ; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, 0
161 ; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0)
162 ; GISEL-GFX9-NEXT: s_sext_i32_i16 s2, s2
163 ; GISEL-GFX9-NEXT: s_sext_i32_i16 s3, s3
164 ; GISEL-GFX9-NEXT: s_max_i32 s2, s2, s4
165 ; GISEL-GFX9-NEXT: s_max_i32 s3, s3, s4
166 ; GISEL-GFX9-NEXT: s_sext_i32_i16 s2, s2
167 ; GISEL-GFX9-NEXT: s_sext_i32_i16 s3, s3
168 ; GISEL-GFX9-NEXT: s_min_i32 s2, s2, s5
169 ; GISEL-GFX9-NEXT: s_min_i32 s3, s3, s5
170 ; GISEL-GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s3
171 ; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s2
172 ; GISEL-GFX9-NEXT: global_store_dword v1, v0, s[0:1]
173 ; GISEL-GFX9-NEXT: s_endpgm
175 ; GISEL-GFX11-LABEL: basic_smax_smin_sgpr:
176 ; GISEL-GFX11: ; %bb.0:
177 ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
178 ; GISEL-GFX11-NEXT: s_sext_i32_i16 s4, 0
179 ; GISEL-GFX11-NEXT: s_sext_i32_i16 s5, 0xff
180 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
181 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
182 ; GISEL-GFX11-NEXT: s_sext_i32_i16 s2, s2
183 ; GISEL-GFX11-NEXT: s_sext_i32_i16 s3, s3
184 ; GISEL-GFX11-NEXT: s_max_i32 s2, s2, s4
185 ; GISEL-GFX11-NEXT: s_max_i32 s3, s3, s4
186 ; GISEL-GFX11-NEXT: s_sext_i32_i16 s2, s2
187 ; GISEL-GFX11-NEXT: s_sext_i32_i16 s3, s3
188 ; GISEL-GFX11-NEXT: s_min_i32 s2, s2, s5
189 ; GISEL-GFX11-NEXT: s_min_i32 s3, s3, s5
190 ; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
191 ; GISEL-GFX11-NEXT: s_pack_ll_b32_b16 s2, s2, s3
192 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
193 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
194 ; GISEL-GFX11-NEXT: s_endpgm
195 %src0 = trunc i32 %src0ext to i16
196 %src1 = trunc i32 %src1ext to i16
197 %src0.max = call i16 @llvm.smax.i16(i16 %src0, i16 0)
198 %src0.clamp = call i16 @llvm.smin.i16(i16 %src0.max, i16 255)
199 %src1.max = call i16 @llvm.smax.i16(i16 %src1, i16 0)
200 %src1.clamp = call i16 @llvm.smin.i16(i16 %src1.max, i16 255)
201 %insert.0 = insertelement <2 x i16> undef, i16 %src0.clamp, i32 0
202 %vec = insertelement <2 x i16> %insert.0, i16 %src1.clamp, i32 1
203 store <2 x i16> %vec, ptr addrspace(1) %out
207 define <2 x i16> @basic_smin_smax(i16 %src0, i16 %src1) {
208 ; SDAG-VI-LABEL: basic_smin_smax:
210 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
211 ; SDAG-VI-NEXT: v_min_i16_e32 v0, 0xff, v0
212 ; SDAG-VI-NEXT: v_min_i16_e32 v1, 0xff, v1
213 ; SDAG-VI-NEXT: v_mov_b32_e32 v2, 0
214 ; SDAG-VI-NEXT: v_max_i16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
215 ; SDAG-VI-NEXT: v_max_i16_e32 v0, 0, v0
216 ; SDAG-VI-NEXT: v_or_b32_e32 v0, v0, v1
217 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
219 ; SDAG-GFX9-LABEL: basic_smin_smax:
220 ; SDAG-GFX9: ; %bb.0:
221 ; SDAG-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222 ; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, 0xff
223 ; SDAG-GFX9-NEXT: v_med3_i16 v0, v0, 0, v2
224 ; SDAG-GFX9-NEXT: v_med3_i16 v1, v1, 0, v2
225 ; SDAG-GFX9-NEXT: s_mov_b32 s4, 0x5040100
226 ; SDAG-GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
227 ; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31]
229 ; SDAG-GFX11-LABEL: basic_smin_smax:
230 ; SDAG-GFX11: ; %bb.0:
231 ; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232 ; SDAG-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
233 ; SDAG-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
234 ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
235 ; SDAG-GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
236 ; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
238 ; GISEL-VI-LABEL: basic_smin_smax:
240 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
241 ; GISEL-VI-NEXT: v_min_i16_e32 v0, 0xff, v0
242 ; GISEL-VI-NEXT: v_min_i16_e32 v1, 0xff, v1
243 ; GISEL-VI-NEXT: v_mov_b32_e32 v2, 0
244 ; GISEL-VI-NEXT: v_max_i16_e32 v0, 0, v0
245 ; GISEL-VI-NEXT: v_max_i16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
246 ; GISEL-VI-NEXT: v_or_b32_e32 v0, v0, v1
247 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
249 ; GISEL-GFX9-LABEL: basic_smin_smax:
250 ; GISEL-GFX9: ; %bb.0:
251 ; GISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
252 ; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0xff
253 ; GISEL-GFX9-NEXT: v_med3_i16 v0, v0, 0, v2
254 ; GISEL-GFX9-NEXT: v_med3_i16 v1, v1, 0, v2
255 ; GISEL-GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
256 ; GISEL-GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
257 ; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31]
259 ; GISEL-GFX11-LABEL: basic_smin_smax:
260 ; GISEL-GFX11: ; %bb.0:
261 ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
262 ; GISEL-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
263 ; GISEL-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
264 ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
265 ; GISEL-GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
266 ; GISEL-GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
267 ; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
268 %src0.min = call i16 @llvm.smin.i16(i16 %src0, i16 255)
269 %src0.clamp = call i16 @llvm.smax.i16(i16 %src0.min, i16 0)
270 %src1.min = call i16 @llvm.smin.i16(i16 %src1, i16 255)
271 %src1.clamp = call i16 @llvm.smax.i16(i16 %src1.min, i16 0)
272 %insert.0 = insertelement <2 x i16> undef, i16 %src0.clamp, i32 0
273 %vec = insertelement <2 x i16> %insert.0, i16 %src1.clamp, i32 1
277 define <2 x i16> @basic_smin_smax_combined(i16 %src0, i16 %src1) {
278 ; SDAG-VI-LABEL: basic_smin_smax_combined:
280 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
281 ; SDAG-VI-NEXT: v_min_i16_e32 v0, 0xff, v0
282 ; SDAG-VI-NEXT: v_max_i16_e32 v1, 0, v1
283 ; SDAG-VI-NEXT: v_mov_b32_e32 v2, 0xff
284 ; SDAG-VI-NEXT: v_min_i16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
285 ; SDAG-VI-NEXT: v_max_i16_e32 v0, 0, v0
286 ; SDAG-VI-NEXT: v_or_b32_e32 v0, v0, v1
287 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
289 ; SDAG-GFX9-LABEL: basic_smin_smax_combined:
290 ; SDAG-GFX9: ; %bb.0:
291 ; SDAG-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
292 ; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, 0xff
293 ; SDAG-GFX9-NEXT: v_med3_i16 v0, v0, 0, v2
294 ; SDAG-GFX9-NEXT: v_med3_i16 v1, v1, 0, v2
295 ; SDAG-GFX9-NEXT: s_mov_b32 s4, 0x5040100
296 ; SDAG-GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
297 ; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31]
299 ; SDAG-GFX11-LABEL: basic_smin_smax_combined:
300 ; SDAG-GFX11: ; %bb.0:
301 ; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
302 ; SDAG-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
303 ; SDAG-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
304 ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
305 ; SDAG-GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
306 ; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
308 ; GISEL-VI-LABEL: basic_smin_smax_combined:
310 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
311 ; GISEL-VI-NEXT: v_mov_b32_e32 v2, 0xff
312 ; GISEL-VI-NEXT: v_min_i16_e32 v0, 0xff, v0
313 ; GISEL-VI-NEXT: v_max_i16_e32 v1, 0, v1
314 ; GISEL-VI-NEXT: v_max_i16_e32 v0, 0, v0
315 ; GISEL-VI-NEXT: v_min_i16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
316 ; GISEL-VI-NEXT: v_or_b32_e32 v0, v0, v1
317 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
319 ; GISEL-GFX9-LABEL: basic_smin_smax_combined:
320 ; GISEL-GFX9: ; %bb.0:
321 ; GISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322 ; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0xff
323 ; GISEL-GFX9-NEXT: v_med3_i16 v0, v0, 0, v2
324 ; GISEL-GFX9-NEXT: v_med3_i16 v1, v1, 0, v2
325 ; GISEL-GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
326 ; GISEL-GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
327 ; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31]
329 ; GISEL-GFX11-LABEL: basic_smin_smax_combined:
330 ; GISEL-GFX11: ; %bb.0:
331 ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
332 ; GISEL-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
333 ; GISEL-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
334 ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
335 ; GISEL-GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
336 ; GISEL-GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
337 ; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
338 %src0.min = call i16 @llvm.smin.i16(i16 %src0, i16 255)
339 %src0.clamp = call i16 @llvm.smax.i16(i16 %src0.min, i16 0)
340 %src1.max = call i16 @llvm.smax.i16(i16 %src1, i16 0)
341 %src1.clamp = call i16 @llvm.smin.i16(i16 %src1.max, i16 255)
342 %insert.0 = insertelement <2 x i16> undef, i16 %src0.clamp, i32 0
343 %vec = insertelement <2 x i16> %insert.0, i16 %src1.clamp, i32 1
347 define <2 x i16> @vec_smax_smin(<2 x i16> %src) {
348 ; SDAG-VI-LABEL: vec_smax_smin:
350 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351 ; SDAG-VI-NEXT: v_mov_b32_e32 v1, 0
352 ; SDAG-VI-NEXT: v_max_i16_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
353 ; SDAG-VI-NEXT: v_max_i16_e32 v0, 0, v0
354 ; SDAG-VI-NEXT: v_mov_b32_e32 v2, 0xff
355 ; SDAG-VI-NEXT: v_min_i16_e32 v0, 0xff, v0
356 ; SDAG-VI-NEXT: v_min_i16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
357 ; SDAG-VI-NEXT: v_or_b32_e32 v0, v0, v1
358 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
360 ; SDAG-GFX9-LABEL: vec_smax_smin:
361 ; SDAG-GFX9: ; %bb.0:
362 ; SDAG-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
363 ; SDAG-GFX9-NEXT: v_pk_max_i16 v0, v0, 0
364 ; SDAG-GFX9-NEXT: s_movk_i32 s4, 0xff
365 ; SDAG-GFX9-NEXT: v_pk_min_i16 v0, v0, s4 op_sel_hi:[1,0]
366 ; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31]
368 ; SDAG-GFX11-LABEL: vec_smax_smin:
369 ; SDAG-GFX11: ; %bb.0:
370 ; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
371 ; SDAG-GFX11-NEXT: v_pk_max_i16 v0, v0, 0
372 ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
373 ; SDAG-GFX11-NEXT: v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
374 ; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
376 ; GISEL-VI-LABEL: vec_smax_smin:
378 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
379 ; GISEL-VI-NEXT: v_mov_b32_e32 v2, 0
380 ; GISEL-VI-NEXT: v_max_i16_e32 v1, 0, v0
381 ; GISEL-VI-NEXT: v_max_i16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
382 ; GISEL-VI-NEXT: v_mov_b32_e32 v2, 0xff
383 ; GISEL-VI-NEXT: v_min_i16_e32 v1, 0xff, v1
384 ; GISEL-VI-NEXT: v_min_i16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
385 ; GISEL-VI-NEXT: v_or_b32_e32 v0, v1, v0
386 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
388 ; GISEL-GFX9-LABEL: vec_smax_smin:
389 ; GISEL-GFX9: ; %bb.0:
390 ; GISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
391 ; GISEL-GFX9-NEXT: v_pk_max_i16 v0, v0, 0
392 ; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, 0xff00ff
393 ; GISEL-GFX9-NEXT: v_pk_min_i16 v0, v0, v1
394 ; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31]
396 ; GISEL-GFX11-LABEL: vec_smax_smin:
397 ; GISEL-GFX11: ; %bb.0:
398 ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
399 ; GISEL-GFX11-NEXT: v_pk_max_i16 v0, v0, 0
400 ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
401 ; GISEL-GFX11-NEXT: v_pk_min_i16 v0, 0xff00ff, v0
402 ; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
403 %src.max = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %src, <2 x i16> <i16 0, i16 0>)
404 %src.clamp = call <2 x i16> @llvm.smin.v2i16(<2 x i16> %src.max, <2 x i16> <i16 255, i16 255>)
405 ret <2 x i16> %src.clamp
408 ; Check that we don't emit a VALU instruction for SGPR inputs.
409 define amdgpu_kernel void @vec_smax_smin_sgpr(ptr addrspace(1) %out, <2 x i16> inreg %src) {
410 ; SDAG-VI-LABEL: vec_smax_smin_sgpr:
412 ; SDAG-VI-NEXT: s_load_dword s2, s[4:5], 0x2c
413 ; SDAG-VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
414 ; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0xff
415 ; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
416 ; SDAG-VI-NEXT: s_lshr_b32 s3, s2, 16
417 ; SDAG-VI-NEXT: v_max_i16_e64 v1, s2, 0
418 ; SDAG-VI-NEXT: v_max_i16_e64 v2, s3, 0
419 ; SDAG-VI-NEXT: v_min_i16_e32 v1, 0xff, v1
420 ; SDAG-VI-NEXT: v_min_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
421 ; SDAG-VI-NEXT: v_or_b32_e32 v2, v1, v0
422 ; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
423 ; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
424 ; SDAG-VI-NEXT: flat_store_dword v[0:1], v2
425 ; SDAG-VI-NEXT: s_endpgm
427 ; SDAG-GFX9-LABEL: vec_smax_smin_sgpr:
428 ; SDAG-GFX9: ; %bb.0:
429 ; SDAG-GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c
430 ; SDAG-GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
431 ; SDAG-GFX9-NEXT: v_mov_b32_e32 v0, 0
432 ; SDAG-GFX9-NEXT: s_waitcnt lgkmcnt(0)
433 ; SDAG-GFX9-NEXT: v_pk_max_i16 v1, s2, 0
434 ; SDAG-GFX9-NEXT: s_movk_i32 s2, 0xff
435 ; SDAG-GFX9-NEXT: v_pk_min_i16 v1, v1, s2 op_sel_hi:[1,0]
436 ; SDAG-GFX9-NEXT: global_store_dword v0, v1, s[0:1]
437 ; SDAG-GFX9-NEXT: s_endpgm
439 ; SDAG-GFX11-LABEL: vec_smax_smin_sgpr:
440 ; SDAG-GFX11: ; %bb.0:
441 ; SDAG-GFX11-NEXT: s_clause 0x1
442 ; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c
443 ; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
444 ; SDAG-GFX11-NEXT: v_mov_b32_e32 v1, 0
445 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
446 ; SDAG-GFX11-NEXT: v_pk_max_i16 v0, s2, 0
447 ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
448 ; SDAG-GFX11-NEXT: v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
449 ; SDAG-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
450 ; SDAG-GFX11-NEXT: s_endpgm
452 ; GISEL-VI-LABEL: vec_smax_smin_sgpr:
454 ; GISEL-VI-NEXT: s_load_dword s2, s[4:5], 0x2c
455 ; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
456 ; GISEL-VI-NEXT: s_sext_i32_i16 s3, 0
457 ; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
458 ; GISEL-VI-NEXT: s_lshr_b32 s4, s2, 16
459 ; GISEL-VI-NEXT: s_sext_i32_i16 s2, s2
460 ; GISEL-VI-NEXT: s_sext_i32_i16 s4, s4
461 ; GISEL-VI-NEXT: s_max_i32 s2, s2, s3
462 ; GISEL-VI-NEXT: s_max_i32 s3, s4, s3
463 ; GISEL-VI-NEXT: s_sext_i32_i16 s4, 0xff
464 ; GISEL-VI-NEXT: s_sext_i32_i16 s3, s3
465 ; GISEL-VI-NEXT: s_sext_i32_i16 s2, s2
466 ; GISEL-VI-NEXT: s_min_i32 s3, s3, s4
467 ; GISEL-VI-NEXT: s_min_i32 s2, s2, s4
468 ; GISEL-VI-NEXT: s_and_b32 s3, 0xffff, s3
469 ; GISEL-VI-NEXT: s_and_b32 s2, 0xffff, s2
470 ; GISEL-VI-NEXT: s_lshl_b32 s3, s3, 16
471 ; GISEL-VI-NEXT: s_or_b32 s2, s2, s3
472 ; GISEL-VI-NEXT: v_mov_b32_e32 v0, s0
473 ; GISEL-VI-NEXT: v_mov_b32_e32 v2, s2
474 ; GISEL-VI-NEXT: v_mov_b32_e32 v1, s1
475 ; GISEL-VI-NEXT: flat_store_dword v[0:1], v2
476 ; GISEL-VI-NEXT: s_endpgm
478 ; GISEL-GFX9-LABEL: vec_smax_smin_sgpr:
479 ; GISEL-GFX9: ; %bb.0:
480 ; GISEL-GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c
481 ; GISEL-GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
482 ; GISEL-GFX9-NEXT: s_sext_i32_i16 s3, 0
483 ; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, 0
484 ; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0)
485 ; GISEL-GFX9-NEXT: s_sext_i32_i16 s4, s2
486 ; GISEL-GFX9-NEXT: s_ashr_i32 s2, s2, 16
487 ; GISEL-GFX9-NEXT: s_max_i32 s3, s4, s3
488 ; GISEL-GFX9-NEXT: s_max_i32 s2, s2, 0
489 ; GISEL-GFX9-NEXT: s_pack_ll_b32_b16 s2, s3, s2
490 ; GISEL-GFX9-NEXT: s_sext_i32_i16 s3, s2
491 ; GISEL-GFX9-NEXT: s_ashr_i32 s2, s2, 16
492 ; GISEL-GFX9-NEXT: s_sext_i32_i16 s4, 0xff00ff
493 ; GISEL-GFX9-NEXT: s_min_i32 s3, s3, s4
494 ; GISEL-GFX9-NEXT: s_min_i32 s2, s2, 0xff
495 ; GISEL-GFX9-NEXT: s_pack_ll_b32_b16 s2, s3, s2
496 ; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s2
497 ; GISEL-GFX9-NEXT: global_store_dword v1, v0, s[0:1]
498 ; GISEL-GFX9-NEXT: s_endpgm
500 ; GISEL-GFX11-LABEL: vec_smax_smin_sgpr:
501 ; GISEL-GFX11: ; %bb.0:
502 ; GISEL-GFX11-NEXT: s_clause 0x1
503 ; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c
504 ; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
505 ; GISEL-GFX11-NEXT: s_sext_i32_i16 s3, 0
506 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
507 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
508 ; GISEL-GFX11-NEXT: s_sext_i32_i16 s4, s2
509 ; GISEL-GFX11-NEXT: s_ashr_i32 s2, s2, 16
510 ; GISEL-GFX11-NEXT: s_max_i32 s3, s4, s3
511 ; GISEL-GFX11-NEXT: s_max_i32 s2, s2, 0
512 ; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
513 ; GISEL-GFX11-NEXT: s_pack_ll_b32_b16 s2, s3, s2
514 ; GISEL-GFX11-NEXT: s_sext_i32_i16 s3, 0xff00ff
515 ; GISEL-GFX11-NEXT: s_sext_i32_i16 s4, s2
516 ; GISEL-GFX11-NEXT: s_ashr_i32 s2, s2, 16
517 ; GISEL-GFX11-NEXT: s_min_i32 s3, s4, s3
518 ; GISEL-GFX11-NEXT: s_min_i32 s2, s2, 0xff
519 ; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
520 ; GISEL-GFX11-NEXT: s_pack_ll_b32_b16 s2, s3, s2
521 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
522 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
523 ; GISEL-GFX11-NEXT: s_endpgm
524 %src.max = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %src, <2 x i16> <i16 0, i16 0>)
525 %src.clamp = call <2 x i16> @llvm.smin.v2i16(<2 x i16> %src.max, <2 x i16> <i16 255, i16 255>)
526 store <2 x i16> %src.clamp, ptr addrspace(1) %out
530 define <2 x i16> @vec_smin_smax(<2 x i16> %src) {
531 ; SDAG-VI-LABEL: vec_smin_smax:
533 ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
534 ; SDAG-VI-NEXT: v_mov_b32_e32 v1, 0xff
535 ; SDAG-VI-NEXT: v_min_i16_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
536 ; SDAG-VI-NEXT: v_min_i16_e32 v0, 0xff, v0
537 ; SDAG-VI-NEXT: v_mov_b32_e32 v2, 0
538 ; SDAG-VI-NEXT: v_max_i16_e32 v0, 0, v0
539 ; SDAG-VI-NEXT: v_max_i16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
540 ; SDAG-VI-NEXT: v_or_b32_e32 v0, v0, v1
541 ; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
543 ; SDAG-GFX9-LABEL: vec_smin_smax:
544 ; SDAG-GFX9: ; %bb.0:
545 ; SDAG-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
546 ; SDAG-GFX9-NEXT: s_movk_i32 s4, 0xff
547 ; SDAG-GFX9-NEXT: v_pk_min_i16 v0, v0, s4 op_sel_hi:[1,0]
548 ; SDAG-GFX9-NEXT: v_pk_max_i16 v0, v0, 0
549 ; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31]
551 ; SDAG-GFX11-LABEL: vec_smin_smax:
552 ; SDAG-GFX11: ; %bb.0:
553 ; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
554 ; SDAG-GFX11-NEXT: v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
555 ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
556 ; SDAG-GFX11-NEXT: v_pk_max_i16 v0, v0, 0
557 ; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
559 ; GISEL-VI-LABEL: vec_smin_smax:
561 ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
562 ; GISEL-VI-NEXT: v_mov_b32_e32 v1, 0xff
563 ; GISEL-VI-NEXT: v_min_i16_e32 v2, 0xff, v0
564 ; GISEL-VI-NEXT: v_min_i16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
565 ; GISEL-VI-NEXT: v_max_i16_e32 v1, 0, v2
566 ; GISEL-VI-NEXT: v_mov_b32_e32 v2, 0
567 ; GISEL-VI-NEXT: v_max_i16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
568 ; GISEL-VI-NEXT: v_or_b32_e32 v0, v1, v0
569 ; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
571 ; GISEL-GFX9-LABEL: vec_smin_smax:
572 ; GISEL-GFX9: ; %bb.0:
573 ; GISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
574 ; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, 0xff00ff
575 ; GISEL-GFX9-NEXT: v_pk_min_i16 v0, v0, v1
576 ; GISEL-GFX9-NEXT: v_pk_max_i16 v0, v0, 0
577 ; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31]
579 ; GISEL-GFX11-LABEL: vec_smin_smax:
580 ; GISEL-GFX11: ; %bb.0:
581 ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
582 ; GISEL-GFX11-NEXT: v_pk_min_i16 v0, 0xff00ff, v0
583 ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
584 ; GISEL-GFX11-NEXT: v_pk_max_i16 v0, v0, 0
585 ; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
586 %src.min = call <2 x i16> @llvm.smin.v2i16(<2 x i16> %src, <2 x i16> <i16 255, i16 255>)
587 %src.clamp = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %src.min, <2 x i16> <i16 0, i16 0>)
588 ret <2 x i16> %src.clamp
590 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: