1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck --check-prefix=GFX6 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck --check-prefix=GFX8 %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s
5 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10PLUS,GFX10 %s
6 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX10PLUS,GFX11,GFX11-TRUE16 %s
7 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX10PLUS,GFX11,GFX11-FAKE16 %s
9 define i8 @v_ssubsat_i8(i8 %lhs, i8 %rhs) {
10 ; GFX6-LABEL: v_ssubsat_i8:
12 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 8
14 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8
15 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
16 ; GFX6-NEXT: s_movk_i32 s4, 0xff80
17 ; GFX6-NEXT: v_mov_b32_e32 v1, 0x7f
18 ; GFX6-NEXT: v_med3_i32 v0, v0, s4, v1
19 ; GFX6-NEXT: s_setpc_b64 s[30:31]
21 ; GFX8-LABEL: v_ssubsat_i8:
23 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24 ; GFX8-NEXT: v_sub_u16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
25 ; GFX8-NEXT: v_min_i16_e32 v0, 0x7f, v0
26 ; GFX8-NEXT: v_max_i16_e32 v0, 0xff80, v0
27 ; GFX8-NEXT: s_setpc_b64 s[30:31]
29 ; GFX9-LABEL: v_ssubsat_i8:
31 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
32 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1
33 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 8, v0
34 ; GFX9-NEXT: v_sub_i16 v0, v0, v1 clamp
35 ; GFX9-NEXT: v_ashrrev_i16_e32 v0, 8, v0
36 ; GFX9-NEXT: s_setpc_b64 s[30:31]
38 ; GFX10-LABEL: v_ssubsat_i8:
40 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41 ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1
42 ; GFX10-NEXT: v_lshlrev_b16 v0, 8, v0
43 ; GFX10-NEXT: v_sub_nc_i16 v0, v0, v1 clamp
44 ; GFX10-NEXT: v_ashrrev_i16 v0, 8, v0
45 ; GFX10-NEXT: s_setpc_b64 s[30:31]
47 ; GFX11-TRUE16-LABEL: v_ssubsat_i8:
48 ; GFX11-TRUE16: ; %bb.0:
49 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50 ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v1.l
51 ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v0.l, 8, v0.l
52 ; GFX11-TRUE16-NEXT: v_sub_nc_i16 v0.l, v0.l, v0.h clamp
53 ; GFX11-TRUE16-NEXT: v_ashrrev_i16 v0.l, 8, v0.l
54 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
56 ; GFX11-FAKE16-LABEL: v_ssubsat_i8:
57 ; GFX11-FAKE16: ; %bb.0:
58 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
59 ; GFX11-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
60 ; GFX11-FAKE16-NEXT: v_lshlrev_b16 v0, 8, v0
61 ; GFX11-FAKE16-NEXT: v_sub_nc_i16 v0, v0, v1 clamp
62 ; GFX11-FAKE16-NEXT: v_ashrrev_i16 v0, 8, v0
63 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
64 %result = call i8 @llvm.ssub.sat.i8(i8 %lhs, i8 %rhs)
68 define i16 @v_ssubsat_i16(i16 %lhs, i16 %rhs) {
69 ; GFX6-LABEL: v_ssubsat_i16:
71 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
73 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
74 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
75 ; GFX6-NEXT: s_movk_i32 s4, 0x8000
76 ; GFX6-NEXT: v_mov_b32_e32 v1, 0x7fff
77 ; GFX6-NEXT: v_med3_i32 v0, v0, s4, v1
78 ; GFX6-NEXT: s_setpc_b64 s[30:31]
80 ; GFX8-LABEL: v_ssubsat_i16:
82 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v1
84 ; GFX8-NEXT: v_sub_u16_e32 v1, v0, v1
85 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v1, v0
86 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v1
87 ; GFX8-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
88 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
89 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
90 ; GFX8-NEXT: s_setpc_b64 s[30:31]
92 ; GFX9-LABEL: v_ssubsat_i16:
94 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95 ; GFX9-NEXT: v_sub_i16 v0, v0, v1 clamp
96 ; GFX9-NEXT: s_setpc_b64 s[30:31]
98 ; GFX10-LABEL: v_ssubsat_i16:
100 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
101 ; GFX10-NEXT: v_sub_nc_i16 v0, v0, v1 clamp
102 ; GFX10-NEXT: s_setpc_b64 s[30:31]
104 ; GFX11-TRUE16-LABEL: v_ssubsat_i16:
105 ; GFX11-TRUE16: ; %bb.0:
106 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
107 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
108 ; GFX11-TRUE16-NEXT: v_sub_nc_i16 v0.l, v0.l, v0.h clamp
109 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
111 ; GFX11-FAKE16-LABEL: v_ssubsat_i16:
112 ; GFX11-FAKE16: ; %bb.0:
113 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114 ; GFX11-FAKE16-NEXT: v_sub_nc_i16 v0, v0, v1 clamp
115 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
116 %result = call i16 @llvm.ssub.sat.i16(i16 %lhs, i16 %rhs)
120 define i32 @v_ssubsat_i32(i32 %lhs, i32 %rhs) {
121 ; GFX6-LABEL: v_ssubsat_i32:
123 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v1
125 ; GFX6-NEXT: v_sub_i32_e64 v1, s[4:5], v0, v1
126 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, v0
127 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v1
128 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
129 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
130 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
131 ; GFX6-NEXT: s_setpc_b64 s[30:31]
133 ; GFX8-LABEL: v_ssubsat_i32:
135 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v1
137 ; GFX8-NEXT: v_sub_u32_e64 v1, s[4:5], v0, v1
138 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, v0
139 ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v1
140 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
141 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
142 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
143 ; GFX8-NEXT: s_setpc_b64 s[30:31]
145 ; GFX9-LABEL: v_ssubsat_i32:
147 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
148 ; GFX9-NEXT: v_sub_i32 v0, v0, v1 clamp
149 ; GFX9-NEXT: s_setpc_b64 s[30:31]
151 ; GFX10PLUS-LABEL: v_ssubsat_i32:
152 ; GFX10PLUS: ; %bb.0:
153 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
154 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v1 clamp
155 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
156 %result = call i32 @llvm.ssub.sat.i32(i32 %lhs, i32 %rhs)
160 define <2 x i16> @v_ssubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
161 ; GFX6-LABEL: v_ssubsat_v2i16:
163 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
165 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
166 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16
167 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
168 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
169 ; GFX6-NEXT: s_movk_i32 s4, 0x8000
170 ; GFX6-NEXT: v_mov_b32_e32 v3, 0x7fff
171 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
172 ; GFX6-NEXT: v_med3_i32 v1, v1, s4, v3
173 ; GFX6-NEXT: v_med3_i32 v0, v0, s4, v3
174 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v1
175 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
176 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v4
177 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
178 ; GFX6-NEXT: s_setpc_b64 s[30:31]
180 ; GFX8-LABEL: v_ssubsat_v2i16:
182 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
183 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
184 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
185 ; GFX8-NEXT: v_sub_u16_e32 v4, v3, v2
186 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v4, v3
187 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v2
188 ; GFX8-NEXT: v_ashrrev_i16_e32 v2, 15, v4
189 ; GFX8-NEXT: v_xor_b32_e32 v2, 0xffff8000, v2
190 ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
191 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
192 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v1
193 ; GFX8-NEXT: v_sub_u16_e32 v1, v0, v1
194 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v1, v0
195 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v1
196 ; GFX8-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
197 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
198 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
199 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
200 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
201 ; GFX8-NEXT: s_setpc_b64 s[30:31]
203 ; GFX9-LABEL: v_ssubsat_v2i16:
205 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206 ; GFX9-NEXT: v_pk_sub_i16 v0, v0, v1 clamp
207 ; GFX9-NEXT: s_setpc_b64 s[30:31]
209 ; GFX10PLUS-LABEL: v_ssubsat_v2i16:
210 ; GFX10PLUS: ; %bb.0:
211 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212 ; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v1 clamp
213 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
214 %result = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs)
215 ret <2 x i16> %result
218 define <3 x i16> @v_ssubsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) {
219 ; GFX6-LABEL: v_ssubsat_v3i16:
221 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16
223 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
224 ; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16
225 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
226 ; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16
227 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
228 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
229 ; GFX6-NEXT: s_movk_i32 s4, 0x8000
230 ; GFX6-NEXT: v_mov_b32_e32 v4, 0x7fff
231 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
232 ; GFX6-NEXT: v_med3_i32 v1, v1, s4, v4
233 ; GFX6-NEXT: v_med3_i32 v0, v0, s4, v4
234 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v5
235 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
236 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
237 ; GFX6-NEXT: v_med3_i32 v3, v2, s4, v4
238 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
239 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3
240 ; GFX6-NEXT: v_alignbit_b32 v1, v3, v1, 16
241 ; GFX6-NEXT: s_setpc_b64 s[30:31]
243 ; GFX8-LABEL: v_ssubsat_v3i16:
245 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
246 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2
247 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0
248 ; GFX8-NEXT: v_sub_u16_e32 v6, v5, v4
249 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v6, v5
250 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v4
251 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, 15, v6
252 ; GFX8-NEXT: v_xor_b32_e32 v4, 0xffff8000, v4
253 ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
254 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
255 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v3
256 ; GFX8-NEXT: v_sub_u16_e32 v3, v1, v3
257 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v3, v1
258 ; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v3
259 ; GFX8-NEXT: v_xor_b32_e32 v1, 0xffff8000, v1
260 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
261 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
262 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v2
263 ; GFX8-NEXT: v_sub_u16_e32 v2, v0, v2
264 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v2, v0
265 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v2
266 ; GFX8-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
267 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
268 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
269 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4
270 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
271 ; GFX8-NEXT: s_setpc_b64 s[30:31]
273 ; GFX9-LABEL: v_ssubsat_v3i16:
275 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
276 ; GFX9-NEXT: v_pk_sub_i16 v1, v1, v3 clamp
277 ; GFX9-NEXT: v_pk_sub_i16 v0, v0, v2 clamp
278 ; GFX9-NEXT: s_setpc_b64 s[30:31]
280 ; GFX10PLUS-LABEL: v_ssubsat_v3i16:
281 ; GFX10PLUS: ; %bb.0:
282 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
283 ; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v2 clamp
284 ; GFX10PLUS-NEXT: v_pk_sub_i16 v1, v1, v3 clamp
285 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
286 %result = call <3 x i16> @llvm.ssub.sat.v3i16(<3 x i16> %lhs, <3 x i16> %rhs)
287 ret <3 x i16> %result
290 define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
291 ; GFX6-LABEL: v_ssubsat_v4i16:
293 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
294 ; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16
295 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
296 ; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16
297 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
298 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
299 ; GFX6-NEXT: s_movk_i32 s4, 0x8000
300 ; GFX6-NEXT: v_mov_b32_e32 v5, 0x7fff
301 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
302 ; GFX6-NEXT: v_med3_i32 v1, v1, s4, v5
303 ; GFX6-NEXT: v_med3_i32 v0, v0, s4, v5
304 ; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 16
305 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
306 ; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 16
307 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16
308 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
309 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
310 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
311 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v3, v7
312 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v6
313 ; GFX6-NEXT: v_med3_i32 v1, v1, s4, v5
314 ; GFX6-NEXT: v_med3_i32 v2, v2, s4, v5
315 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
316 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
317 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
318 ; GFX6-NEXT: s_setpc_b64 s[30:31]
320 ; GFX8-LABEL: v_ssubsat_v4i16:
322 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
323 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2
324 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0
325 ; GFX8-NEXT: v_sub_u16_e32 v6, v5, v4
326 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v6, v5
327 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v4
328 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, 15, v6
329 ; GFX8-NEXT: v_xor_b32_e32 v4, 0xffff8000, v4
330 ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
331 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
332 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v2
333 ; GFX8-NEXT: v_sub_u16_e32 v2, v0, v2
334 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v2, v0
335 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v2
336 ; GFX8-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
337 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
338 ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v4
339 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
340 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
341 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v3
342 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1
343 ; GFX8-NEXT: v_sub_u16_e32 v5, v4, v2
344 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v5, v4
345 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v2
346 ; GFX8-NEXT: v_ashrrev_i16_e32 v2, 15, v5
347 ; GFX8-NEXT: v_xor_b32_e32 v2, 0xffff8000, v2
348 ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
349 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
350 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v3
351 ; GFX8-NEXT: v_sub_u16_e32 v3, v1, v3
352 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v3, v1
353 ; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v3
354 ; GFX8-NEXT: v_xor_b32_e32 v1, 0xffff8000, v1
355 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
356 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
357 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
358 ; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
359 ; GFX8-NEXT: s_setpc_b64 s[30:31]
361 ; GFX9-LABEL: v_ssubsat_v4i16:
363 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364 ; GFX9-NEXT: v_pk_sub_i16 v0, v0, v2 clamp
365 ; GFX9-NEXT: v_pk_sub_i16 v1, v1, v3 clamp
366 ; GFX9-NEXT: s_setpc_b64 s[30:31]
368 ; GFX10PLUS-LABEL: v_ssubsat_v4i16:
369 ; GFX10PLUS: ; %bb.0:
370 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
371 ; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v2 clamp
372 ; GFX10PLUS-NEXT: v_pk_sub_i16 v1, v1, v3 clamp
373 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
374 %result = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
375 %cast = bitcast <4 x i16> %result to <2 x float>
376 ret <2 x float> %cast
379 define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
380 ; GFX6-LABEL: v_ssubsat_v2i32:
382 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
383 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v2
384 ; GFX6-NEXT: v_sub_i32_e64 v2, s[4:5], v0, v2
385 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v0
386 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v2
387 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
388 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
389 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
390 ; GFX6-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v3
391 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3
392 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v1
393 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v2
394 ; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
395 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
396 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
397 ; GFX6-NEXT: s_setpc_b64 s[30:31]
399 ; GFX8-LABEL: v_ssubsat_v2i32:
401 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
402 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v2
403 ; GFX8-NEXT: v_sub_u32_e64 v2, s[4:5], v0, v2
404 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v0
405 ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v2
406 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
407 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
408 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
409 ; GFX8-NEXT: v_sub_u32_e64 v2, s[4:5], v1, v3
410 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3
411 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v1
412 ; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v2
413 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
414 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
415 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
416 ; GFX8-NEXT: s_setpc_b64 s[30:31]
418 ; GFX9-LABEL: v_ssubsat_v2i32:
420 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
421 ; GFX9-NEXT: v_sub_i32 v0, v0, v2 clamp
422 ; GFX9-NEXT: v_sub_i32 v1, v1, v3 clamp
423 ; GFX9-NEXT: s_setpc_b64 s[30:31]
425 ; GFX10PLUS-LABEL: v_ssubsat_v2i32:
426 ; GFX10PLUS: ; %bb.0:
427 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
428 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v2 clamp
429 ; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v3 clamp
430 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
431 %result = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
432 ret <2 x i32> %result
435 define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
436 ; GFX6-LABEL: v_ssubsat_v3i32:
438 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
439 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3
440 ; GFX6-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v3
441 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v0
442 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v3
443 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
444 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
445 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
446 ; GFX6-NEXT: v_sub_i32_e64 v3, s[4:5], v1, v4
447 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4
448 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v1
449 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v3
450 ; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
451 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
452 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
453 ; GFX6-NEXT: v_sub_i32_e64 v3, s[4:5], v2, v5
454 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5
455 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v2
456 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v3
457 ; GFX6-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
458 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
459 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
460 ; GFX6-NEXT: s_setpc_b64 s[30:31]
462 ; GFX8-LABEL: v_ssubsat_v3i32:
464 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
465 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3
466 ; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], v0, v3
467 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v0
468 ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v3
469 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
470 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
471 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
472 ; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], v1, v4
473 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4
474 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v1
475 ; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v3
476 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
477 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
478 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
479 ; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], v2, v5
480 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5
481 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v2
482 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v3
483 ; GFX8-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
484 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
485 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
486 ; GFX8-NEXT: s_setpc_b64 s[30:31]
488 ; GFX9-LABEL: v_ssubsat_v3i32:
490 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
491 ; GFX9-NEXT: v_sub_i32 v0, v0, v3 clamp
492 ; GFX9-NEXT: v_sub_i32 v1, v1, v4 clamp
493 ; GFX9-NEXT: v_sub_i32 v2, v2, v5 clamp
494 ; GFX9-NEXT: s_setpc_b64 s[30:31]
496 ; GFX10PLUS-LABEL: v_ssubsat_v3i32:
497 ; GFX10PLUS: ; %bb.0:
498 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
499 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v3 clamp
500 ; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v4 clamp
501 ; GFX10PLUS-NEXT: v_sub_nc_i32 v2, v2, v5 clamp
502 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
503 %result = call <3 x i32> @llvm.ssub.sat.v3i32(<3 x i32> %lhs, <3 x i32> %rhs)
504 ret <3 x i32> %result
507 define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
508 ; GFX6-LABEL: v_ssubsat_v4i32:
510 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
511 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4
512 ; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v0, v4
513 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v0
514 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v4
515 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
516 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
517 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
518 ; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v1, v5
519 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5
520 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v1
521 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v4
522 ; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
523 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
524 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
525 ; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v2, v6
526 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v6
527 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v2
528 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v4
529 ; GFX6-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
530 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
531 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
532 ; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v7
533 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v7
534 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v3
535 ; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v4
536 ; GFX6-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
537 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
538 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
539 ; GFX6-NEXT: s_setpc_b64 s[30:31]
541 ; GFX8-LABEL: v_ssubsat_v4i32:
543 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
544 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4
545 ; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v0, v4
546 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v0
547 ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v4
548 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
549 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
550 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
551 ; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v1, v5
552 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5
553 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v1
554 ; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v4
555 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
556 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
557 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
558 ; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v2, v6
559 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v6
560 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v2
561 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v4
562 ; GFX8-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
563 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
564 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
565 ; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v3, v7
566 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v7
567 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v3
568 ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v4
569 ; GFX8-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
570 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
571 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
572 ; GFX8-NEXT: s_setpc_b64 s[30:31]
574 ; GFX9-LABEL: v_ssubsat_v4i32:
576 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
577 ; GFX9-NEXT: v_sub_i32 v0, v0, v4 clamp
578 ; GFX9-NEXT: v_sub_i32 v1, v1, v5 clamp
579 ; GFX9-NEXT: v_sub_i32 v2, v2, v6 clamp
580 ; GFX9-NEXT: v_sub_i32 v3, v3, v7 clamp
581 ; GFX9-NEXT: s_setpc_b64 s[30:31]
583 ; GFX10PLUS-LABEL: v_ssubsat_v4i32:
584 ; GFX10PLUS: ; %bb.0:
585 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
586 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v4 clamp
587 ; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v5 clamp
588 ; GFX10PLUS-NEXT: v_sub_nc_i32 v2, v2, v6 clamp
589 ; GFX10PLUS-NEXT: v_sub_nc_i32 v3, v3, v7 clamp
590 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
591 %result = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
592 ret <4 x i32> %result
595 define <8 x i32> @v_ssubsat_v8i32(<8 x i32> %lhs, <8 x i32> %rhs) {
596 ; GFX6-LABEL: v_ssubsat_v8i32:
598 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
599 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v8
600 ; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v0, v8
601 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v0
602 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v8
603 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
604 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
605 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
606 ; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v1, v9
607 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v9
608 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v1
609 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v8
610 ; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
611 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
612 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
613 ; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v2, v10
614 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v10
615 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v2
616 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v8
617 ; GFX6-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
618 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
619 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
620 ; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v3, v11
621 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v11
622 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v3
623 ; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v8
624 ; GFX6-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
625 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
626 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
627 ; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v4, v12
628 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v12
629 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v4
630 ; GFX6-NEXT: v_ashrrev_i32_e32 v4, 31, v8
631 ; GFX6-NEXT: v_xor_b32_e32 v4, 0x80000000, v4
632 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
633 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
634 ; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v5, v13
635 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v13
636 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v5
637 ; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v8
638 ; GFX6-NEXT: v_xor_b32_e32 v5, 0x80000000, v5
639 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
640 ; GFX6-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
641 ; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v6, v14
642 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v14
643 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v6
644 ; GFX6-NEXT: v_ashrrev_i32_e32 v6, 31, v8
645 ; GFX6-NEXT: v_xor_b32_e32 v6, 0x80000000, v6
646 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
647 ; GFX6-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
648 ; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v7, v15
649 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v15
650 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v7
651 ; GFX6-NEXT: v_ashrrev_i32_e32 v7, 31, v8
652 ; GFX6-NEXT: v_xor_b32_e32 v7, 0x80000000, v7
653 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
654 ; GFX6-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
655 ; GFX6-NEXT: s_setpc_b64 s[30:31]
657 ; GFX8-LABEL: v_ssubsat_v8i32:
659 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
660 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v8
661 ; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v0, v8
662 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v0
663 ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v8
664 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
665 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
666 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
667 ; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v1, v9
668 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v9
669 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v1
670 ; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v8
671 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
672 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
673 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
674 ; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v2, v10
675 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v10
676 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v2
677 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v8
678 ; GFX8-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
679 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
680 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
681 ; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v3, v11
682 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v11
683 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v3
684 ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v8
685 ; GFX8-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
686 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
687 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
688 ; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v4, v12
689 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v12
690 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v4
691 ; GFX8-NEXT: v_ashrrev_i32_e32 v4, 31, v8
692 ; GFX8-NEXT: v_xor_b32_e32 v4, 0x80000000, v4
693 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
694 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
695 ; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v5, v13
696 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v13
697 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v5
698 ; GFX8-NEXT: v_ashrrev_i32_e32 v5, 31, v8
699 ; GFX8-NEXT: v_xor_b32_e32 v5, 0x80000000, v5
700 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
701 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
702 ; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v6, v14
703 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v14
704 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v6
705 ; GFX8-NEXT: v_ashrrev_i32_e32 v6, 31, v8
706 ; GFX8-NEXT: v_xor_b32_e32 v6, 0x80000000, v6
707 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
708 ; GFX8-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
709 ; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v7, v15
710 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v15
711 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v7
712 ; GFX8-NEXT: v_ashrrev_i32_e32 v7, 31, v8
713 ; GFX8-NEXT: v_xor_b32_e32 v7, 0x80000000, v7
714 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
715 ; GFX8-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
716 ; GFX8-NEXT: s_setpc_b64 s[30:31]
718 ; GFX9-LABEL: v_ssubsat_v8i32:
720 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
721 ; GFX9-NEXT: v_sub_i32 v0, v0, v8 clamp
722 ; GFX9-NEXT: v_sub_i32 v1, v1, v9 clamp
723 ; GFX9-NEXT: v_sub_i32 v2, v2, v10 clamp
724 ; GFX9-NEXT: v_sub_i32 v3, v3, v11 clamp
725 ; GFX9-NEXT: v_sub_i32 v4, v4, v12 clamp
726 ; GFX9-NEXT: v_sub_i32 v5, v5, v13 clamp
727 ; GFX9-NEXT: v_sub_i32 v6, v6, v14 clamp
728 ; GFX9-NEXT: v_sub_i32 v7, v7, v15 clamp
729 ; GFX9-NEXT: s_setpc_b64 s[30:31]
731 ; GFX10PLUS-LABEL: v_ssubsat_v8i32:
732 ; GFX10PLUS: ; %bb.0:
733 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
734 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v8 clamp
735 ; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v9 clamp
736 ; GFX10PLUS-NEXT: v_sub_nc_i32 v2, v2, v10 clamp
737 ; GFX10PLUS-NEXT: v_sub_nc_i32 v3, v3, v11 clamp
738 ; GFX10PLUS-NEXT: v_sub_nc_i32 v4, v4, v12 clamp
739 ; GFX10PLUS-NEXT: v_sub_nc_i32 v5, v5, v13 clamp
740 ; GFX10PLUS-NEXT: v_sub_nc_i32 v6, v6, v14 clamp
741 ; GFX10PLUS-NEXT: v_sub_nc_i32 v7, v7, v15 clamp
742 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
743 %result = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %lhs, <8 x i32> %rhs)
744 ret <8 x i32> %result
747 define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
748 ; GFX6-LABEL: v_ssubsat_v16i32:
750 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
751 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v16
752 ; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v0, v16
753 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v0
754 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v16
755 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
756 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
757 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
758 ; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v1, v17
759 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v17
760 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v1
761 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v16
762 ; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
763 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
764 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
765 ; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v2, v18
766 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v18
767 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v2
768 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v16
769 ; GFX6-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
770 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
771 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
772 ; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v3, v19
773 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v19
774 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v3
775 ; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v16
776 ; GFX6-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
777 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
778 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
779 ; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v4, v20
780 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v20
781 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v4
782 ; GFX6-NEXT: v_ashrrev_i32_e32 v4, 31, v16
783 ; GFX6-NEXT: v_xor_b32_e32 v4, 0x80000000, v4
784 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
785 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
786 ; GFX6-NEXT: buffer_load_dword v16, off, s[0:3], s32
787 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v5, v21
788 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v21
789 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v5
790 ; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v17
791 ; GFX6-NEXT: v_xor_b32_e32 v5, 0x80000000, v5
792 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
793 ; GFX6-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc
794 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v6, v22
795 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v22
796 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v6
797 ; GFX6-NEXT: v_ashrrev_i32_e32 v6, 31, v17
798 ; GFX6-NEXT: v_xor_b32_e32 v6, 0x80000000, v6
799 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
800 ; GFX6-NEXT: v_cndmask_b32_e32 v6, v17, v6, vcc
801 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v7, v23
802 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v23
803 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v7
804 ; GFX6-NEXT: v_ashrrev_i32_e32 v7, 31, v17
805 ; GFX6-NEXT: v_xor_b32_e32 v7, 0x80000000, v7
806 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
807 ; GFX6-NEXT: v_cndmask_b32_e32 v7, v17, v7, vcc
808 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v8, v24
809 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v24
810 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v8
811 ; GFX6-NEXT: v_ashrrev_i32_e32 v8, 31, v17
812 ; GFX6-NEXT: v_xor_b32_e32 v8, 0x80000000, v8
813 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
814 ; GFX6-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
815 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v9, v25
816 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v25
817 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v9
818 ; GFX6-NEXT: v_ashrrev_i32_e32 v9, 31, v17
819 ; GFX6-NEXT: v_xor_b32_e32 v9, 0x80000000, v9
820 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
821 ; GFX6-NEXT: v_cndmask_b32_e32 v9, v17, v9, vcc
822 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v10, v26
823 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v26
824 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v10
825 ; GFX6-NEXT: v_ashrrev_i32_e32 v10, 31, v17
826 ; GFX6-NEXT: v_xor_b32_e32 v10, 0x80000000, v10
827 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
828 ; GFX6-NEXT: v_cndmask_b32_e32 v10, v17, v10, vcc
829 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v11, v27
830 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v27
831 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v11
832 ; GFX6-NEXT: v_ashrrev_i32_e32 v11, 31, v17
833 ; GFX6-NEXT: v_xor_b32_e32 v11, 0x80000000, v11
834 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
835 ; GFX6-NEXT: v_cndmask_b32_e32 v11, v17, v11, vcc
836 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v12, v28
837 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v28
838 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v12
839 ; GFX6-NEXT: v_ashrrev_i32_e32 v12, 31, v17
840 ; GFX6-NEXT: v_xor_b32_e32 v12, 0x80000000, v12
841 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
842 ; GFX6-NEXT: v_cndmask_b32_e32 v12, v17, v12, vcc
843 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v13, v29
844 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v29
845 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v13
846 ; GFX6-NEXT: v_ashrrev_i32_e32 v13, 31, v17
847 ; GFX6-NEXT: v_xor_b32_e32 v13, 0x80000000, v13
848 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
849 ; GFX6-NEXT: v_cndmask_b32_e32 v13, v17, v13, vcc
850 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v14, v30
851 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v30
852 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v14
853 ; GFX6-NEXT: v_ashrrev_i32_e32 v14, 31, v17
854 ; GFX6-NEXT: v_xor_b32_e32 v14, 0x80000000, v14
855 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
856 ; GFX6-NEXT: v_cndmask_b32_e32 v14, v17, v14, vcc
857 ; GFX6-NEXT: s_waitcnt vmcnt(0)
858 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v16
859 ; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v15, v16
860 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v15
861 ; GFX6-NEXT: v_ashrrev_i32_e32 v15, 31, v16
862 ; GFX6-NEXT: v_xor_b32_e32 v15, 0x80000000, v15
863 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
864 ; GFX6-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
865 ; GFX6-NEXT: s_setpc_b64 s[30:31]
867 ; GFX8-LABEL: v_ssubsat_v16i32:
869 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
870 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v16
871 ; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v0, v16
872 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v0
873 ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v16
874 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
875 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
876 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
877 ; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v1, v17
878 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v17
879 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v1
880 ; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v16
881 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
882 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
883 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
884 ; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v2, v18
885 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v18
886 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v2
887 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v16
888 ; GFX8-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
889 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
890 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
891 ; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v3, v19
892 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v19
893 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v3
894 ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v16
895 ; GFX8-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
896 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
897 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
898 ; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v4, v20
899 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v20
900 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v4
901 ; GFX8-NEXT: v_ashrrev_i32_e32 v4, 31, v16
902 ; GFX8-NEXT: v_xor_b32_e32 v4, 0x80000000, v4
903 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
904 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
905 ; GFX8-NEXT: buffer_load_dword v16, off, s[0:3], s32
906 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v5, v21
907 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v21
908 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v5
909 ; GFX8-NEXT: v_ashrrev_i32_e32 v5, 31, v17
910 ; GFX8-NEXT: v_xor_b32_e32 v5, 0x80000000, v5
911 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
912 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc
913 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v6, v22
914 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v22
915 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v6
916 ; GFX8-NEXT: v_ashrrev_i32_e32 v6, 31, v17
917 ; GFX8-NEXT: v_xor_b32_e32 v6, 0x80000000, v6
918 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
919 ; GFX8-NEXT: v_cndmask_b32_e32 v6, v17, v6, vcc
920 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v7, v23
921 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v23
922 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v7
923 ; GFX8-NEXT: v_ashrrev_i32_e32 v7, 31, v17
924 ; GFX8-NEXT: v_xor_b32_e32 v7, 0x80000000, v7
925 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
926 ; GFX8-NEXT: v_cndmask_b32_e32 v7, v17, v7, vcc
927 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v8, v24
928 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v24
929 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v8
930 ; GFX8-NEXT: v_ashrrev_i32_e32 v8, 31, v17
931 ; GFX8-NEXT: v_xor_b32_e32 v8, 0x80000000, v8
932 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
933 ; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
934 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v9, v25
935 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v25
936 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v9
937 ; GFX8-NEXT: v_ashrrev_i32_e32 v9, 31, v17
938 ; GFX8-NEXT: v_xor_b32_e32 v9, 0x80000000, v9
939 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
940 ; GFX8-NEXT: v_cndmask_b32_e32 v9, v17, v9, vcc
941 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v10, v26
942 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v26
943 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v10
944 ; GFX8-NEXT: v_ashrrev_i32_e32 v10, 31, v17
945 ; GFX8-NEXT: v_xor_b32_e32 v10, 0x80000000, v10
946 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
947 ; GFX8-NEXT: v_cndmask_b32_e32 v10, v17, v10, vcc
948 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v11, v27
949 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v27
950 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v11
951 ; GFX8-NEXT: v_ashrrev_i32_e32 v11, 31, v17
952 ; GFX8-NEXT: v_xor_b32_e32 v11, 0x80000000, v11
953 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
954 ; GFX8-NEXT: v_cndmask_b32_e32 v11, v17, v11, vcc
955 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v12, v28
956 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v28
957 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v12
958 ; GFX8-NEXT: v_ashrrev_i32_e32 v12, 31, v17
959 ; GFX8-NEXT: v_xor_b32_e32 v12, 0x80000000, v12
960 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
961 ; GFX8-NEXT: v_cndmask_b32_e32 v12, v17, v12, vcc
962 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v13, v29
963 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v29
964 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v13
965 ; GFX8-NEXT: v_ashrrev_i32_e32 v13, 31, v17
966 ; GFX8-NEXT: v_xor_b32_e32 v13, 0x80000000, v13
967 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
968 ; GFX8-NEXT: v_cndmask_b32_e32 v13, v17, v13, vcc
969 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v14, v30
970 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v30
971 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v14
972 ; GFX8-NEXT: v_ashrrev_i32_e32 v14, 31, v17
973 ; GFX8-NEXT: v_xor_b32_e32 v14, 0x80000000, v14
974 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
975 ; GFX8-NEXT: v_cndmask_b32_e32 v14, v17, v14, vcc
976 ; GFX8-NEXT: s_waitcnt vmcnt(0)
977 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v16
978 ; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v15, v16
979 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v15
980 ; GFX8-NEXT: v_ashrrev_i32_e32 v15, 31, v16
981 ; GFX8-NEXT: v_xor_b32_e32 v15, 0x80000000, v15
982 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
983 ; GFX8-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
984 ; GFX8-NEXT: s_setpc_b64 s[30:31]
986 ; GFX9-LABEL: v_ssubsat_v16i32:
988 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
989 ; GFX9-NEXT: v_sub_i32 v0, v0, v16 clamp
990 ; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32
991 ; GFX9-NEXT: v_sub_i32 v1, v1, v17 clamp
992 ; GFX9-NEXT: v_sub_i32 v2, v2, v18 clamp
993 ; GFX9-NEXT: v_sub_i32 v3, v3, v19 clamp
994 ; GFX9-NEXT: v_sub_i32 v4, v4, v20 clamp
995 ; GFX9-NEXT: v_sub_i32 v5, v5, v21 clamp
996 ; GFX9-NEXT: v_sub_i32 v6, v6, v22 clamp
997 ; GFX9-NEXT: v_sub_i32 v7, v7, v23 clamp
998 ; GFX9-NEXT: v_sub_i32 v8, v8, v24 clamp
999 ; GFX9-NEXT: v_sub_i32 v9, v9, v25 clamp
1000 ; GFX9-NEXT: v_sub_i32 v10, v10, v26 clamp
1001 ; GFX9-NEXT: v_sub_i32 v11, v11, v27 clamp
1002 ; GFX9-NEXT: v_sub_i32 v12, v12, v28 clamp
1003 ; GFX9-NEXT: v_sub_i32 v13, v13, v29 clamp
1004 ; GFX9-NEXT: v_sub_i32 v14, v14, v30 clamp
1005 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1006 ; GFX9-NEXT: v_sub_i32 v15, v15, v16 clamp
1007 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1009 ; GFX10-LABEL: v_ssubsat_v16i32:
1011 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1012 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
1013 ; GFX10-NEXT: v_sub_nc_i32 v0, v0, v16 clamp
1014 ; GFX10-NEXT: v_sub_nc_i32 v1, v1, v17 clamp
1015 ; GFX10-NEXT: v_sub_nc_i32 v2, v2, v18 clamp
1016 ; GFX10-NEXT: v_sub_nc_i32 v3, v3, v19 clamp
1017 ; GFX10-NEXT: v_sub_nc_i32 v4, v4, v20 clamp
1018 ; GFX10-NEXT: v_sub_nc_i32 v5, v5, v21 clamp
1019 ; GFX10-NEXT: v_sub_nc_i32 v6, v6, v22 clamp
1020 ; GFX10-NEXT: v_sub_nc_i32 v7, v7, v23 clamp
1021 ; GFX10-NEXT: v_sub_nc_i32 v8, v8, v24 clamp
1022 ; GFX10-NEXT: v_sub_nc_i32 v9, v9, v25 clamp
1023 ; GFX10-NEXT: v_sub_nc_i32 v10, v10, v26 clamp
1024 ; GFX10-NEXT: v_sub_nc_i32 v11, v11, v27 clamp
1025 ; GFX10-NEXT: v_sub_nc_i32 v12, v12, v28 clamp
1026 ; GFX10-NEXT: v_sub_nc_i32 v13, v13, v29 clamp
1027 ; GFX10-NEXT: v_sub_nc_i32 v14, v14, v30 clamp
1028 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1029 ; GFX10-NEXT: v_sub_nc_i32 v15, v15, v31 clamp
1030 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1032 ; GFX11-LABEL: v_ssubsat_v16i32:
1034 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1035 ; GFX11-NEXT: scratch_load_b32 v31, off, s32
1036 ; GFX11-NEXT: v_sub_nc_i32 v0, v0, v16 clamp
1037 ; GFX11-NEXT: v_sub_nc_i32 v1, v1, v17 clamp
1038 ; GFX11-NEXT: v_sub_nc_i32 v2, v2, v18 clamp
1039 ; GFX11-NEXT: v_sub_nc_i32 v3, v3, v19 clamp
1040 ; GFX11-NEXT: v_sub_nc_i32 v4, v4, v20 clamp
1041 ; GFX11-NEXT: v_sub_nc_i32 v5, v5, v21 clamp
1042 ; GFX11-NEXT: v_sub_nc_i32 v6, v6, v22 clamp
1043 ; GFX11-NEXT: v_sub_nc_i32 v7, v7, v23 clamp
1044 ; GFX11-NEXT: v_sub_nc_i32 v8, v8, v24 clamp
1045 ; GFX11-NEXT: v_sub_nc_i32 v9, v9, v25 clamp
1046 ; GFX11-NEXT: v_sub_nc_i32 v10, v10, v26 clamp
1047 ; GFX11-NEXT: v_sub_nc_i32 v11, v11, v27 clamp
1048 ; GFX11-NEXT: v_sub_nc_i32 v12, v12, v28 clamp
1049 ; GFX11-NEXT: v_sub_nc_i32 v13, v13, v29 clamp
1050 ; GFX11-NEXT: v_sub_nc_i32 v14, v14, v30 clamp
1051 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1052 ; GFX11-NEXT: v_sub_nc_i32 v15, v15, v31 clamp
1053 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1054 %result = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> %lhs, <16 x i32> %rhs)
1055 ret <16 x i32> %result
1059 define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
1060 ; GFX6-LABEL: v_ssubsat_i64:
1062 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1063 ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
1064 ; GFX6-NEXT: v_subb_u32_e32 v5, vcc, v1, v3, vcc
1065 ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
1066 ; GFX6-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
1067 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v5
1068 ; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc
1069 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
1070 ; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1071 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
1072 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1074 ; GFX8-LABEL: v_ssubsat_i64:
1076 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1077 ; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v0, v2
1078 ; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v1, v3, vcc
1079 ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
1080 ; GFX8-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
1081 ; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v5
1082 ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
1083 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
1084 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1085 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
1086 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1088 ; GFX9-LABEL: v_ssubsat_i64:
1090 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1091 ; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, v0, v2
1092 ; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v3, vcc
1093 ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
1094 ; GFX9-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
1095 ; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v5
1096 ; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc
1097 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
1098 ; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1099 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
1100 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1102 ; GFX10-LABEL: v_ssubsat_i64:
1104 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1105 ; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2
1106 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
1107 ; GFX10-NEXT: v_cmp_lt_i64_e64 s4, 0, v[2:3]
1108 ; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v5
1109 ; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
1110 ; GFX10-NEXT: v_xor_b32_e32 v1, 0x80000000, v6
1111 ; GFX10-NEXT: s_xor_b32 vcc_lo, s4, vcc_lo
1112 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc_lo
1113 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
1114 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1116 ; GFX11-LABEL: v_ssubsat_i64:
1118 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1119 ; GFX11-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2
1120 ; GFX11-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
1121 ; GFX11-NEXT: v_cmp_lt_i64_e64 s0, 0, v[2:3]
1122 ; GFX11-NEXT: v_ashrrev_i32_e32 v6, 31, v5
1123 ; GFX11-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
1124 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v6
1125 ; GFX11-NEXT: s_xor_b32 vcc_lo, s0, vcc_lo
1126 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v4, v6 :: v_dual_cndmask_b32 v1, v5, v1
1127 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1128 %result = call i64 @llvm.ssub.sat.i64(i64 %lhs, i64 %rhs)
1132 declare i8 @llvm.ssub.sat.i8(i8, i8) #0
1133 declare i16 @llvm.ssub.sat.i16(i16, i16) #0
1134 declare <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16>, <2 x i16>) #0
1135 declare <3 x i16> @llvm.ssub.sat.v3i16(<3 x i16>, <3 x i16>) #0
1136 declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>) #0
1137 declare i32 @llvm.ssub.sat.i32(i32, i32) #0
1138 declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) #0
1139 declare <3 x i32> @llvm.ssub.sat.v3i32(<3 x i32>, <3 x i32>) #0
1140 declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) #0
1141 declare <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32>, <8 x i32>) #0
1142 declare <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32>, <16 x i32>) #0
1143 declare i64 @llvm.ssub.sat.i64(i64, i64) #0
1145 attributes #0 = { nounwind readnone speculatable willreturn }