1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck --check-prefix=GFX6 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck --check-prefix=GFX8 %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s
5 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10PLUS,GFX10 %s
6 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX10PLUS,GFX11 %s
8 define i8 @v_ssubsat_i8(i8 %lhs, i8 %rhs) {
9 ; GFX6-LABEL: v_ssubsat_i8:
11 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 8
13 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8
14 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
15 ; GFX6-NEXT: v_min_i32_e32 v0, 0x7f, v0
16 ; GFX6-NEXT: v_max_i32_e32 v0, 0xffffff80, v0
17 ; GFX6-NEXT: s_setpc_b64 s[30:31]
19 ; GFX8-LABEL: v_ssubsat_i8:
21 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22 ; GFX8-NEXT: v_sub_u16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
23 ; GFX8-NEXT: v_min_i16_e32 v0, 0x7f, v0
24 ; GFX8-NEXT: v_max_i16_e32 v0, 0xff80, v0
25 ; GFX8-NEXT: s_setpc_b64 s[30:31]
27 ; GFX9-LABEL: v_ssubsat_i8:
29 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1
31 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 8, v0
32 ; GFX9-NEXT: v_sub_i16 v0, v0, v1 clamp
33 ; GFX9-NEXT: v_ashrrev_i16_e32 v0, 8, v0
34 ; GFX9-NEXT: s_setpc_b64 s[30:31]
36 ; GFX10PLUS-LABEL: v_ssubsat_i8:
38 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39 ; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
40 ; GFX10PLUS-NEXT: v_lshlrev_b16 v1, 8, v1
41 ; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 8, v0
42 ; GFX10PLUS-NEXT: v_sub_nc_i16 v0, v0, v1 clamp
43 ; GFX10PLUS-NEXT: v_ashrrev_i16 v0, 8, v0
44 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
45 %result = call i8 @llvm.ssub.sat.i8(i8 %lhs, i8 %rhs)
49 define i16 @v_ssubsat_i16(i16 %lhs, i16 %rhs) {
50 ; GFX6-LABEL: v_ssubsat_i16:
52 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
54 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
55 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
56 ; GFX6-NEXT: v_min_i32_e32 v0, 0x7fff, v0
57 ; GFX6-NEXT: v_max_i32_e32 v0, 0xffff8000, v0
58 ; GFX6-NEXT: s_setpc_b64 s[30:31]
60 ; GFX8-LABEL: v_ssubsat_i16:
62 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
63 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v1
64 ; GFX8-NEXT: v_sub_u16_e32 v1, v0, v1
65 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v1, v0
66 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v1
67 ; GFX8-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
68 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
69 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
70 ; GFX8-NEXT: s_setpc_b64 s[30:31]
72 ; GFX9-LABEL: v_ssubsat_i16:
74 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75 ; GFX9-NEXT: v_sub_i16 v0, v0, v1 clamp
76 ; GFX9-NEXT: s_setpc_b64 s[30:31]
78 ; GFX10PLUS-LABEL: v_ssubsat_i16:
80 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
81 ; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
82 ; GFX10PLUS-NEXT: v_sub_nc_i16 v0, v0, v1 clamp
83 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
84 %result = call i16 @llvm.ssub.sat.i16(i16 %lhs, i16 %rhs)
88 define i32 @v_ssubsat_i32(i32 %lhs, i32 %rhs) {
89 ; GFX6-LABEL: v_ssubsat_i32:
91 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
92 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v1
93 ; GFX6-NEXT: v_sub_i32_e64 v1, s[4:5], v0, v1
94 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, v0
95 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v1
96 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
97 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
98 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
99 ; GFX6-NEXT: s_setpc_b64 s[30:31]
101 ; GFX8-LABEL: v_ssubsat_i32:
103 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v1
105 ; GFX8-NEXT: v_sub_u32_e64 v1, s[4:5], v0, v1
106 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, v0
107 ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v1
108 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
109 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
110 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
111 ; GFX8-NEXT: s_setpc_b64 s[30:31]
113 ; GFX9-LABEL: v_ssubsat_i32:
115 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116 ; GFX9-NEXT: v_sub_i32 v0, v0, v1 clamp
117 ; GFX9-NEXT: s_setpc_b64 s[30:31]
119 ; GFX10PLUS-LABEL: v_ssubsat_i32:
120 ; GFX10PLUS: ; %bb.0:
121 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122 ; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
123 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v1 clamp
124 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
125 %result = call i32 @llvm.ssub.sat.i32(i32 %lhs, i32 %rhs)
129 define <2 x i16> @v_ssubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
130 ; GFX6-LABEL: v_ssubsat_v2i16:
132 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
134 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
135 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16
136 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
137 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
138 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
139 ; GFX6-NEXT: v_min_i32_e32 v1, 0x7fff, v1
140 ; GFX6-NEXT: v_min_i32_e32 v0, 0x7fff, v0
141 ; GFX6-NEXT: v_max_i32_e32 v1, 0xffff8000, v1
142 ; GFX6-NEXT: v_max_i32_e32 v0, 0xffff8000, v0
143 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v1
144 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
145 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v3
146 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
147 ; GFX6-NEXT: s_setpc_b64 s[30:31]
149 ; GFX8-LABEL: v_ssubsat_v2i16:
151 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
153 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
154 ; GFX8-NEXT: v_sub_u16_e32 v4, v3, v2
155 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v4, v3
156 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v2
157 ; GFX8-NEXT: v_ashrrev_i16_e32 v2, 15, v4
158 ; GFX8-NEXT: v_xor_b32_e32 v2, 0xffff8000, v2
159 ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
160 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
161 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v1
162 ; GFX8-NEXT: v_sub_u16_e32 v1, v0, v1
163 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v1, v0
164 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v1
165 ; GFX8-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
166 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
167 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
168 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
169 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
170 ; GFX8-NEXT: s_setpc_b64 s[30:31]
172 ; GFX9-LABEL: v_ssubsat_v2i16:
174 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
175 ; GFX9-NEXT: v_pk_sub_i16 v0, v0, v1 clamp
176 ; GFX9-NEXT: s_setpc_b64 s[30:31]
178 ; GFX10PLUS-LABEL: v_ssubsat_v2i16:
179 ; GFX10PLUS: ; %bb.0:
180 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
181 ; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
182 ; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v1 clamp
183 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
184 %result = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs)
185 ret <2 x i16> %result
188 define <3 x i16> @v_ssubsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) {
189 ; GFX6-LABEL: v_ssubsat_v3i16:
191 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
192 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16
193 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
194 ; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16
195 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
196 ; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16
197 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
198 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
199 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
200 ; GFX6-NEXT: v_min_i32_e32 v1, 0x7fff, v1
201 ; GFX6-NEXT: v_min_i32_e32 v0, 0x7fff, v0
202 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v5
203 ; GFX6-NEXT: v_max_i32_e32 v1, 0xffff8000, v1
204 ; GFX6-NEXT: v_max_i32_e32 v0, 0xffff8000, v0
205 ; GFX6-NEXT: v_min_i32_e32 v2, 0x7fff, v2
206 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
207 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
208 ; GFX6-NEXT: v_max_i32_e32 v3, 0xffff8000, v2
209 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
210 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3
211 ; GFX6-NEXT: v_alignbit_b32 v1, v3, v1, 16
212 ; GFX6-NEXT: s_setpc_b64 s[30:31]
214 ; GFX8-LABEL: v_ssubsat_v3i16:
216 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2
218 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0
219 ; GFX8-NEXT: v_sub_u16_e32 v6, v5, v4
220 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v6, v5
221 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v4
222 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, 15, v6
223 ; GFX8-NEXT: v_xor_b32_e32 v4, 0xffff8000, v4
224 ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
225 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
226 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v3
227 ; GFX8-NEXT: v_sub_u16_e32 v3, v1, v3
228 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v3, v1
229 ; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v3
230 ; GFX8-NEXT: v_xor_b32_e32 v1, 0xffff8000, v1
231 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
232 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
233 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v2
234 ; GFX8-NEXT: v_sub_u16_e32 v2, v0, v2
235 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v2, v0
236 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v2
237 ; GFX8-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
238 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
239 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
240 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4
241 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
242 ; GFX8-NEXT: s_setpc_b64 s[30:31]
244 ; GFX9-LABEL: v_ssubsat_v3i16:
246 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
247 ; GFX9-NEXT: v_pk_sub_i16 v1, v1, v3 clamp
248 ; GFX9-NEXT: v_pk_sub_i16 v0, v0, v2 clamp
249 ; GFX9-NEXT: s_setpc_b64 s[30:31]
251 ; GFX10PLUS-LABEL: v_ssubsat_v3i16:
252 ; GFX10PLUS: ; %bb.0:
253 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254 ; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
255 ; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v2 clamp
256 ; GFX10PLUS-NEXT: v_pk_sub_i16 v1, v1, v3 clamp
257 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
258 %result = call <3 x i16> @llvm.ssub.sat.v3i16(<3 x i16> %lhs, <3 x i16> %rhs)
259 ret <3 x i16> %result
262 define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
263 ; GFX6-LABEL: v_ssubsat_v4i16:
265 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266 ; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16
267 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
268 ; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16
269 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
270 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
271 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
272 ; GFX6-NEXT: v_min_i32_e32 v1, 0x7fff, v1
273 ; GFX6-NEXT: v_min_i32_e32 v0, 0x7fff, v0
274 ; GFX6-NEXT: v_max_i32_e32 v1, 0xffff8000, v1
275 ; GFX6-NEXT: v_max_i32_e32 v0, 0xffff8000, v0
276 ; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 16
277 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
278 ; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 16
279 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16
280 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
281 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
282 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
283 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v3, v7
284 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v6
285 ; GFX6-NEXT: v_min_i32_e32 v1, 0x7fff, v1
286 ; GFX6-NEXT: v_min_i32_e32 v2, 0x7fff, v2
287 ; GFX6-NEXT: v_max_i32_e32 v1, 0xffff8000, v1
288 ; GFX6-NEXT: v_max_i32_e32 v2, 0xffff8000, v2
289 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
290 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
291 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
292 ; GFX6-NEXT: s_setpc_b64 s[30:31]
294 ; GFX8-LABEL: v_ssubsat_v4i16:
296 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
297 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2
298 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0
299 ; GFX8-NEXT: v_sub_u16_e32 v6, v5, v4
300 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v6, v5
301 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v4
302 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, 15, v6
303 ; GFX8-NEXT: v_xor_b32_e32 v4, 0xffff8000, v4
304 ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
305 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
306 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v2
307 ; GFX8-NEXT: v_sub_u16_e32 v2, v0, v2
308 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v2, v0
309 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v2
310 ; GFX8-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
311 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
312 ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v4
313 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
314 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
315 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v3
316 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1
317 ; GFX8-NEXT: v_sub_u16_e32 v5, v4, v2
318 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v5, v4
319 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v2
320 ; GFX8-NEXT: v_ashrrev_i16_e32 v2, 15, v5
321 ; GFX8-NEXT: v_xor_b32_e32 v2, 0xffff8000, v2
322 ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
323 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
324 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v3
325 ; GFX8-NEXT: v_sub_u16_e32 v3, v1, v3
326 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v3, v1
327 ; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v3
328 ; GFX8-NEXT: v_xor_b32_e32 v1, 0xffff8000, v1
329 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
330 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
331 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
332 ; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
333 ; GFX8-NEXT: s_setpc_b64 s[30:31]
335 ; GFX9-LABEL: v_ssubsat_v4i16:
337 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
338 ; GFX9-NEXT: v_pk_sub_i16 v0, v0, v2 clamp
339 ; GFX9-NEXT: v_pk_sub_i16 v1, v1, v3 clamp
340 ; GFX9-NEXT: s_setpc_b64 s[30:31]
342 ; GFX10PLUS-LABEL: v_ssubsat_v4i16:
343 ; GFX10PLUS: ; %bb.0:
344 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
345 ; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
346 ; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v2 clamp
347 ; GFX10PLUS-NEXT: v_pk_sub_i16 v1, v1, v3 clamp
348 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
349 %result = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
350 %cast = bitcast <4 x i16> %result to <2 x float>
351 ret <2 x float> %cast
354 define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
355 ; GFX6-LABEL: v_ssubsat_v2i32:
357 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
358 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v2
359 ; GFX6-NEXT: v_sub_i32_e64 v2, s[4:5], v0, v2
360 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v0
361 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v2
362 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
363 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
364 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
365 ; GFX6-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v3
366 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3
367 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v1
368 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v2
369 ; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
370 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
371 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
372 ; GFX6-NEXT: s_setpc_b64 s[30:31]
374 ; GFX8-LABEL: v_ssubsat_v2i32:
376 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v2
378 ; GFX8-NEXT: v_sub_u32_e64 v2, s[4:5], v0, v2
379 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v0
380 ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v2
381 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
382 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
383 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
384 ; GFX8-NEXT: v_sub_u32_e64 v2, s[4:5], v1, v3
385 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3
386 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v1
387 ; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v2
388 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
389 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
390 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
391 ; GFX8-NEXT: s_setpc_b64 s[30:31]
393 ; GFX9-LABEL: v_ssubsat_v2i32:
395 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
396 ; GFX9-NEXT: v_sub_i32 v0, v0, v2 clamp
397 ; GFX9-NEXT: v_sub_i32 v1, v1, v3 clamp
398 ; GFX9-NEXT: s_setpc_b64 s[30:31]
400 ; GFX10PLUS-LABEL: v_ssubsat_v2i32:
401 ; GFX10PLUS: ; %bb.0:
402 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
403 ; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
404 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v2 clamp
405 ; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v3 clamp
406 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
407 %result = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
408 ret <2 x i32> %result
411 define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
412 ; GFX6-LABEL: v_ssubsat_v3i32:
414 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
415 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3
416 ; GFX6-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v3
417 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v0
418 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v3
419 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
420 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
421 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
422 ; GFX6-NEXT: v_sub_i32_e64 v3, s[4:5], v1, v4
423 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4
424 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v1
425 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v3
426 ; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
427 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
428 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
429 ; GFX6-NEXT: v_sub_i32_e64 v3, s[4:5], v2, v5
430 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5
431 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v2
432 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v3
433 ; GFX6-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
434 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
435 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
436 ; GFX6-NEXT: s_setpc_b64 s[30:31]
438 ; GFX8-LABEL: v_ssubsat_v3i32:
440 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
441 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3
442 ; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], v0, v3
443 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v0
444 ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v3
445 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
446 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
447 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
448 ; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], v1, v4
449 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4
450 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v1
451 ; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v3
452 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
453 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
454 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
455 ; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], v2, v5
456 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5
457 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v2
458 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v3
459 ; GFX8-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
460 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
461 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
462 ; GFX8-NEXT: s_setpc_b64 s[30:31]
464 ; GFX9-LABEL: v_ssubsat_v3i32:
466 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
467 ; GFX9-NEXT: v_sub_i32 v0, v0, v3 clamp
468 ; GFX9-NEXT: v_sub_i32 v1, v1, v4 clamp
469 ; GFX9-NEXT: v_sub_i32 v2, v2, v5 clamp
470 ; GFX9-NEXT: s_setpc_b64 s[30:31]
472 ; GFX10PLUS-LABEL: v_ssubsat_v3i32:
473 ; GFX10PLUS: ; %bb.0:
474 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
475 ; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
476 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v3 clamp
477 ; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v4 clamp
478 ; GFX10PLUS-NEXT: v_sub_nc_i32 v2, v2, v5 clamp
479 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
480 %result = call <3 x i32> @llvm.ssub.sat.v3i32(<3 x i32> %lhs, <3 x i32> %rhs)
481 ret <3 x i32> %result
484 define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
485 ; GFX6-LABEL: v_ssubsat_v4i32:
487 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
488 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4
489 ; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v0, v4
490 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v0
491 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v4
492 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
493 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
494 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
495 ; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v1, v5
496 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5
497 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v1
498 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v4
499 ; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
500 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
501 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
502 ; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v2, v6
503 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v6
504 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v2
505 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v4
506 ; GFX6-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
507 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
508 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
509 ; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v7
510 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v7
511 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v3
512 ; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v4
513 ; GFX6-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
514 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
515 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
516 ; GFX6-NEXT: s_setpc_b64 s[30:31]
518 ; GFX8-LABEL: v_ssubsat_v4i32:
520 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4
522 ; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v0, v4
523 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v0
524 ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v4
525 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
526 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
527 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
528 ; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v1, v5
529 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5
530 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v1
531 ; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v4
532 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
533 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
534 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
535 ; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v2, v6
536 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v6
537 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v2
538 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v4
539 ; GFX8-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
540 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
541 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
542 ; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v3, v7
543 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v7
544 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v3
545 ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v4
546 ; GFX8-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
547 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
548 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
549 ; GFX8-NEXT: s_setpc_b64 s[30:31]
551 ; GFX9-LABEL: v_ssubsat_v4i32:
553 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
554 ; GFX9-NEXT: v_sub_i32 v0, v0, v4 clamp
555 ; GFX9-NEXT: v_sub_i32 v1, v1, v5 clamp
556 ; GFX9-NEXT: v_sub_i32 v2, v2, v6 clamp
557 ; GFX9-NEXT: v_sub_i32 v3, v3, v7 clamp
558 ; GFX9-NEXT: s_setpc_b64 s[30:31]
560 ; GFX10PLUS-LABEL: v_ssubsat_v4i32:
561 ; GFX10PLUS: ; %bb.0:
562 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
563 ; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
564 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v4 clamp
565 ; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v5 clamp
566 ; GFX10PLUS-NEXT: v_sub_nc_i32 v2, v2, v6 clamp
567 ; GFX10PLUS-NEXT: v_sub_nc_i32 v3, v3, v7 clamp
568 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
569 %result = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
570 ret <4 x i32> %result
573 define <8 x i32> @v_ssubsat_v8i32(<8 x i32> %lhs, <8 x i32> %rhs) {
574 ; GFX6-LABEL: v_ssubsat_v8i32:
576 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
577 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v8
578 ; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v0, v8
579 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v0
580 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v8
581 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
582 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
583 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
584 ; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v1, v9
585 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v9
586 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v1
587 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v8
588 ; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
589 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
590 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
591 ; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v2, v10
592 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v10
593 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v2
594 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v8
595 ; GFX6-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
596 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
597 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
598 ; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v3, v11
599 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v11
600 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v3
601 ; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v8
602 ; GFX6-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
603 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
604 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
605 ; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v4, v12
606 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v12
607 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v4
608 ; GFX6-NEXT: v_ashrrev_i32_e32 v4, 31, v8
609 ; GFX6-NEXT: v_xor_b32_e32 v4, 0x80000000, v4
610 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
611 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
612 ; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v5, v13
613 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v13
614 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v5
615 ; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v8
616 ; GFX6-NEXT: v_xor_b32_e32 v5, 0x80000000, v5
617 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
618 ; GFX6-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
619 ; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v6, v14
620 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v14
621 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v6
622 ; GFX6-NEXT: v_ashrrev_i32_e32 v6, 31, v8
623 ; GFX6-NEXT: v_xor_b32_e32 v6, 0x80000000, v6
624 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
625 ; GFX6-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
626 ; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v7, v15
627 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v15
628 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v7
629 ; GFX6-NEXT: v_ashrrev_i32_e32 v7, 31, v8
630 ; GFX6-NEXT: v_xor_b32_e32 v7, 0x80000000, v7
631 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
632 ; GFX6-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
633 ; GFX6-NEXT: s_setpc_b64 s[30:31]
635 ; GFX8-LABEL: v_ssubsat_v8i32:
637 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
638 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v8
639 ; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v0, v8
640 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v0
641 ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v8
642 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
643 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
644 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
645 ; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v1, v9
646 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v9
647 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v1
648 ; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v8
649 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
650 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
651 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
652 ; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v2, v10
653 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v10
654 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v2
655 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v8
656 ; GFX8-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
657 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
658 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
659 ; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v3, v11
660 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v11
661 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v3
662 ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v8
663 ; GFX8-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
664 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
665 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
666 ; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v4, v12
667 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v12
668 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v4
669 ; GFX8-NEXT: v_ashrrev_i32_e32 v4, 31, v8
670 ; GFX8-NEXT: v_xor_b32_e32 v4, 0x80000000, v4
671 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
672 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
673 ; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v5, v13
674 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v13
675 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v5
676 ; GFX8-NEXT: v_ashrrev_i32_e32 v5, 31, v8
677 ; GFX8-NEXT: v_xor_b32_e32 v5, 0x80000000, v5
678 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
679 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
680 ; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v6, v14
681 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v14
682 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v6
683 ; GFX8-NEXT: v_ashrrev_i32_e32 v6, 31, v8
684 ; GFX8-NEXT: v_xor_b32_e32 v6, 0x80000000, v6
685 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
686 ; GFX8-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
687 ; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v7, v15
688 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v15
689 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v7
690 ; GFX8-NEXT: v_ashrrev_i32_e32 v7, 31, v8
691 ; GFX8-NEXT: v_xor_b32_e32 v7, 0x80000000, v7
692 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
693 ; GFX8-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
694 ; GFX8-NEXT: s_setpc_b64 s[30:31]
696 ; GFX9-LABEL: v_ssubsat_v8i32:
698 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
699 ; GFX9-NEXT: v_sub_i32 v0, v0, v8 clamp
700 ; GFX9-NEXT: v_sub_i32 v1, v1, v9 clamp
701 ; GFX9-NEXT: v_sub_i32 v2, v2, v10 clamp
702 ; GFX9-NEXT: v_sub_i32 v3, v3, v11 clamp
703 ; GFX9-NEXT: v_sub_i32 v4, v4, v12 clamp
704 ; GFX9-NEXT: v_sub_i32 v5, v5, v13 clamp
705 ; GFX9-NEXT: v_sub_i32 v6, v6, v14 clamp
706 ; GFX9-NEXT: v_sub_i32 v7, v7, v15 clamp
707 ; GFX9-NEXT: s_setpc_b64 s[30:31]
709 ; GFX10PLUS-LABEL: v_ssubsat_v8i32:
710 ; GFX10PLUS: ; %bb.0:
711 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
712 ; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
713 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v8 clamp
714 ; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v9 clamp
715 ; GFX10PLUS-NEXT: v_sub_nc_i32 v2, v2, v10 clamp
716 ; GFX10PLUS-NEXT: v_sub_nc_i32 v3, v3, v11 clamp
717 ; GFX10PLUS-NEXT: v_sub_nc_i32 v4, v4, v12 clamp
718 ; GFX10PLUS-NEXT: v_sub_nc_i32 v5, v5, v13 clamp
719 ; GFX10PLUS-NEXT: v_sub_nc_i32 v6, v6, v14 clamp
720 ; GFX10PLUS-NEXT: v_sub_nc_i32 v7, v7, v15 clamp
721 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
722 %result = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %lhs, <8 x i32> %rhs)
723 ret <8 x i32> %result
726 define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
727 ; GFX6-LABEL: v_ssubsat_v16i32:
729 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
730 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v16
731 ; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v0, v16
732 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v0
733 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v16
734 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
735 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
736 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
737 ; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v1, v17
738 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v17
739 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v1
740 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v16
741 ; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
742 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
743 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
744 ; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v2, v18
745 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v18
746 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v2
747 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v16
748 ; GFX6-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
749 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
750 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
751 ; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v3, v19
752 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v19
753 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v3
754 ; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v16
755 ; GFX6-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
756 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
757 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
758 ; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v4, v20
759 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v20
760 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v4
761 ; GFX6-NEXT: v_ashrrev_i32_e32 v4, 31, v16
762 ; GFX6-NEXT: v_xor_b32_e32 v4, 0x80000000, v4
763 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
764 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
765 ; GFX6-NEXT: buffer_load_dword v16, off, s[0:3], s32
766 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v5, v21
767 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v21
768 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v5
769 ; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v17
770 ; GFX6-NEXT: v_xor_b32_e32 v5, 0x80000000, v5
771 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
772 ; GFX6-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc
773 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v6, v22
774 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v22
775 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v6
776 ; GFX6-NEXT: v_ashrrev_i32_e32 v6, 31, v17
777 ; GFX6-NEXT: v_xor_b32_e32 v6, 0x80000000, v6
778 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
779 ; GFX6-NEXT: v_cndmask_b32_e32 v6, v17, v6, vcc
780 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v7, v23
781 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v23
782 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v7
783 ; GFX6-NEXT: v_ashrrev_i32_e32 v7, 31, v17
784 ; GFX6-NEXT: v_xor_b32_e32 v7, 0x80000000, v7
785 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
786 ; GFX6-NEXT: v_cndmask_b32_e32 v7, v17, v7, vcc
787 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v8, v24
788 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v24
789 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v8
790 ; GFX6-NEXT: v_ashrrev_i32_e32 v8, 31, v17
791 ; GFX6-NEXT: v_xor_b32_e32 v8, 0x80000000, v8
792 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
793 ; GFX6-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
794 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v9, v25
795 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v25
796 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v9
797 ; GFX6-NEXT: v_ashrrev_i32_e32 v9, 31, v17
798 ; GFX6-NEXT: v_xor_b32_e32 v9, 0x80000000, v9
799 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
800 ; GFX6-NEXT: v_cndmask_b32_e32 v9, v17, v9, vcc
801 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v10, v26
802 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v26
803 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v10
804 ; GFX6-NEXT: v_ashrrev_i32_e32 v10, 31, v17
805 ; GFX6-NEXT: v_xor_b32_e32 v10, 0x80000000, v10
806 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
807 ; GFX6-NEXT: v_cndmask_b32_e32 v10, v17, v10, vcc
808 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v11, v27
809 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v27
810 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v11
811 ; GFX6-NEXT: v_ashrrev_i32_e32 v11, 31, v17
812 ; GFX6-NEXT: v_xor_b32_e32 v11, 0x80000000, v11
813 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
814 ; GFX6-NEXT: v_cndmask_b32_e32 v11, v17, v11, vcc
815 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v12, v28
816 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v28
817 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v12
818 ; GFX6-NEXT: v_ashrrev_i32_e32 v12, 31, v17
819 ; GFX6-NEXT: v_xor_b32_e32 v12, 0x80000000, v12
820 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
821 ; GFX6-NEXT: v_cndmask_b32_e32 v12, v17, v12, vcc
822 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v13, v29
823 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v29
824 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v13
825 ; GFX6-NEXT: v_ashrrev_i32_e32 v13, 31, v17
826 ; GFX6-NEXT: v_xor_b32_e32 v13, 0x80000000, v13
827 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
828 ; GFX6-NEXT: v_cndmask_b32_e32 v13, v17, v13, vcc
829 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v14, v30
830 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v30
831 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v14
832 ; GFX6-NEXT: v_ashrrev_i32_e32 v14, 31, v17
833 ; GFX6-NEXT: v_xor_b32_e32 v14, 0x80000000, v14
834 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
835 ; GFX6-NEXT: v_cndmask_b32_e32 v14, v17, v14, vcc
836 ; GFX6-NEXT: s_waitcnt vmcnt(0)
837 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v16
838 ; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v15, v16
839 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v15
840 ; GFX6-NEXT: v_ashrrev_i32_e32 v15, 31, v16
841 ; GFX6-NEXT: v_xor_b32_e32 v15, 0x80000000, v15
842 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
843 ; GFX6-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
844 ; GFX6-NEXT: s_setpc_b64 s[30:31]
846 ; GFX8-LABEL: v_ssubsat_v16i32:
848 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
849 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v16
850 ; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v0, v16
851 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v0
852 ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v16
853 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
854 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
855 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
856 ; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v1, v17
857 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v17
858 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v1
859 ; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v16
860 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
861 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
862 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
863 ; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v2, v18
864 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v18
865 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v2
866 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v16
867 ; GFX8-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
868 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
869 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
870 ; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v3, v19
871 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v19
872 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v3
873 ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v16
874 ; GFX8-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
875 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
876 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
877 ; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v4, v20
878 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v20
879 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v4
880 ; GFX8-NEXT: v_ashrrev_i32_e32 v4, 31, v16
881 ; GFX8-NEXT: v_xor_b32_e32 v4, 0x80000000, v4
882 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
883 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
884 ; GFX8-NEXT: buffer_load_dword v16, off, s[0:3], s32
885 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v5, v21
886 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v21
887 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v5
888 ; GFX8-NEXT: v_ashrrev_i32_e32 v5, 31, v17
889 ; GFX8-NEXT: v_xor_b32_e32 v5, 0x80000000, v5
890 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
891 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc
892 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v6, v22
893 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v22
894 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v6
895 ; GFX8-NEXT: v_ashrrev_i32_e32 v6, 31, v17
896 ; GFX8-NEXT: v_xor_b32_e32 v6, 0x80000000, v6
897 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
898 ; GFX8-NEXT: v_cndmask_b32_e32 v6, v17, v6, vcc
899 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v7, v23
900 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v23
901 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v7
902 ; GFX8-NEXT: v_ashrrev_i32_e32 v7, 31, v17
903 ; GFX8-NEXT: v_xor_b32_e32 v7, 0x80000000, v7
904 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
905 ; GFX8-NEXT: v_cndmask_b32_e32 v7, v17, v7, vcc
906 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v8, v24
907 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v24
908 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v8
909 ; GFX8-NEXT: v_ashrrev_i32_e32 v8, 31, v17
910 ; GFX8-NEXT: v_xor_b32_e32 v8, 0x80000000, v8
911 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
912 ; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
913 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v9, v25
914 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v25
915 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v9
916 ; GFX8-NEXT: v_ashrrev_i32_e32 v9, 31, v17
917 ; GFX8-NEXT: v_xor_b32_e32 v9, 0x80000000, v9
918 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
919 ; GFX8-NEXT: v_cndmask_b32_e32 v9, v17, v9, vcc
920 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v10, v26
921 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v26
922 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v10
923 ; GFX8-NEXT: v_ashrrev_i32_e32 v10, 31, v17
924 ; GFX8-NEXT: v_xor_b32_e32 v10, 0x80000000, v10
925 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
926 ; GFX8-NEXT: v_cndmask_b32_e32 v10, v17, v10, vcc
927 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v11, v27
928 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v27
929 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v11
930 ; GFX8-NEXT: v_ashrrev_i32_e32 v11, 31, v17
931 ; GFX8-NEXT: v_xor_b32_e32 v11, 0x80000000, v11
932 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
933 ; GFX8-NEXT: v_cndmask_b32_e32 v11, v17, v11, vcc
934 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v12, v28
935 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v28
936 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v12
937 ; GFX8-NEXT: v_ashrrev_i32_e32 v12, 31, v17
938 ; GFX8-NEXT: v_xor_b32_e32 v12, 0x80000000, v12
939 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
940 ; GFX8-NEXT: v_cndmask_b32_e32 v12, v17, v12, vcc
941 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v13, v29
942 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v29
943 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v13
944 ; GFX8-NEXT: v_ashrrev_i32_e32 v13, 31, v17
945 ; GFX8-NEXT: v_xor_b32_e32 v13, 0x80000000, v13
946 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
947 ; GFX8-NEXT: v_cndmask_b32_e32 v13, v17, v13, vcc
948 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v14, v30
949 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v30
950 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v14
951 ; GFX8-NEXT: v_ashrrev_i32_e32 v14, 31, v17
952 ; GFX8-NEXT: v_xor_b32_e32 v14, 0x80000000, v14
953 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
954 ; GFX8-NEXT: v_cndmask_b32_e32 v14, v17, v14, vcc
955 ; GFX8-NEXT: s_waitcnt vmcnt(0)
956 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v16
957 ; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v15, v16
958 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v15
959 ; GFX8-NEXT: v_ashrrev_i32_e32 v15, 31, v16
960 ; GFX8-NEXT: v_xor_b32_e32 v15, 0x80000000, v15
961 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
962 ; GFX8-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
963 ; GFX8-NEXT: s_setpc_b64 s[30:31]
965 ; GFX9-LABEL: v_ssubsat_v16i32:
967 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
968 ; GFX9-NEXT: v_sub_i32 v0, v0, v16 clamp
969 ; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32
970 ; GFX9-NEXT: v_sub_i32 v1, v1, v17 clamp
971 ; GFX9-NEXT: v_sub_i32 v2, v2, v18 clamp
972 ; GFX9-NEXT: v_sub_i32 v3, v3, v19 clamp
973 ; GFX9-NEXT: v_sub_i32 v4, v4, v20 clamp
974 ; GFX9-NEXT: v_sub_i32 v5, v5, v21 clamp
975 ; GFX9-NEXT: v_sub_i32 v6, v6, v22 clamp
976 ; GFX9-NEXT: v_sub_i32 v7, v7, v23 clamp
977 ; GFX9-NEXT: v_sub_i32 v8, v8, v24 clamp
978 ; GFX9-NEXT: v_sub_i32 v9, v9, v25 clamp
979 ; GFX9-NEXT: v_sub_i32 v10, v10, v26 clamp
980 ; GFX9-NEXT: v_sub_i32 v11, v11, v27 clamp
981 ; GFX9-NEXT: v_sub_i32 v12, v12, v28 clamp
982 ; GFX9-NEXT: v_sub_i32 v13, v13, v29 clamp
983 ; GFX9-NEXT: v_sub_i32 v14, v14, v30 clamp
984 ; GFX9-NEXT: s_waitcnt vmcnt(0)
985 ; GFX9-NEXT: v_sub_i32 v15, v15, v16 clamp
986 ; GFX9-NEXT: s_setpc_b64 s[30:31]
988 ; GFX10-LABEL: v_ssubsat_v16i32:
990 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
991 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
992 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
993 ; GFX10-NEXT: v_sub_nc_i32 v0, v0, v16 clamp
994 ; GFX10-NEXT: v_sub_nc_i32 v1, v1, v17 clamp
995 ; GFX10-NEXT: v_sub_nc_i32 v2, v2, v18 clamp
996 ; GFX10-NEXT: v_sub_nc_i32 v3, v3, v19 clamp
997 ; GFX10-NEXT: v_sub_nc_i32 v4, v4, v20 clamp
998 ; GFX10-NEXT: v_sub_nc_i32 v5, v5, v21 clamp
999 ; GFX10-NEXT: v_sub_nc_i32 v6, v6, v22 clamp
1000 ; GFX10-NEXT: v_sub_nc_i32 v7, v7, v23 clamp
1001 ; GFX10-NEXT: v_sub_nc_i32 v8, v8, v24 clamp
1002 ; GFX10-NEXT: v_sub_nc_i32 v9, v9, v25 clamp
1003 ; GFX10-NEXT: v_sub_nc_i32 v10, v10, v26 clamp
1004 ; GFX10-NEXT: v_sub_nc_i32 v11, v11, v27 clamp
1005 ; GFX10-NEXT: v_sub_nc_i32 v12, v12, v28 clamp
1006 ; GFX10-NEXT: v_sub_nc_i32 v13, v13, v29 clamp
1007 ; GFX10-NEXT: v_sub_nc_i32 v14, v14, v30 clamp
1008 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1009 ; GFX10-NEXT: v_sub_nc_i32 v15, v15, v31 clamp
1010 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1012 ; GFX11-LABEL: v_ssubsat_v16i32:
1014 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1015 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1016 ; GFX11-NEXT: scratch_load_b32 v31, off, s32
1017 ; GFX11-NEXT: v_sub_nc_i32 v0, v0, v16 clamp
1018 ; GFX11-NEXT: v_sub_nc_i32 v1, v1, v17 clamp
1019 ; GFX11-NEXT: v_sub_nc_i32 v2, v2, v18 clamp
1020 ; GFX11-NEXT: v_sub_nc_i32 v3, v3, v19 clamp
1021 ; GFX11-NEXT: v_sub_nc_i32 v4, v4, v20 clamp
1022 ; GFX11-NEXT: v_sub_nc_i32 v5, v5, v21 clamp
1023 ; GFX11-NEXT: v_sub_nc_i32 v6, v6, v22 clamp
1024 ; GFX11-NEXT: v_sub_nc_i32 v7, v7, v23 clamp
1025 ; GFX11-NEXT: v_sub_nc_i32 v8, v8, v24 clamp
1026 ; GFX11-NEXT: v_sub_nc_i32 v9, v9, v25 clamp
1027 ; GFX11-NEXT: v_sub_nc_i32 v10, v10, v26 clamp
1028 ; GFX11-NEXT: v_sub_nc_i32 v11, v11, v27 clamp
1029 ; GFX11-NEXT: v_sub_nc_i32 v12, v12, v28 clamp
1030 ; GFX11-NEXT: v_sub_nc_i32 v13, v13, v29 clamp
1031 ; GFX11-NEXT: v_sub_nc_i32 v14, v14, v30 clamp
1032 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1033 ; GFX11-NEXT: v_sub_nc_i32 v15, v15, v31 clamp
1034 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1035 %result = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> %lhs, <16 x i32> %rhs)
1036 ret <16 x i32> %result
1040 define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
1041 ; GFX6-LABEL: v_ssubsat_i64:
1043 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1044 ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
1045 ; GFX6-NEXT: v_subb_u32_e32 v5, vcc, v1, v3, vcc
1046 ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
1047 ; GFX6-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
1048 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v5
1049 ; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc
1050 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
1051 ; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1052 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
1053 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1055 ; GFX8-LABEL: v_ssubsat_i64:
1057 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1058 ; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v0, v2
1059 ; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v1, v3, vcc
1060 ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
1061 ; GFX8-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
1062 ; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v5
1063 ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
1064 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
1065 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1066 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
1067 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1069 ; GFX9-LABEL: v_ssubsat_i64:
1071 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1072 ; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, v0, v2
1073 ; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v3, vcc
1074 ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
1075 ; GFX9-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
1076 ; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v5
1077 ; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc
1078 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
1079 ; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1080 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
1081 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1083 ; GFX10-LABEL: v_ssubsat_i64:
1085 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1086 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1087 ; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2
1088 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
1089 ; GFX10-NEXT: v_cmp_lt_i64_e64 s4, 0, v[2:3]
1090 ; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v5
1091 ; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
1092 ; GFX10-NEXT: v_xor_b32_e32 v1, 0x80000000, v6
1093 ; GFX10-NEXT: s_xor_b32 vcc_lo, s4, vcc_lo
1094 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc_lo
1095 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
1096 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1098 ; GFX11-LABEL: v_ssubsat_i64:
1100 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1101 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1102 ; GFX11-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2
1103 ; GFX11-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
1104 ; GFX11-NEXT: v_cmp_lt_i64_e64 s0, 0, v[2:3]
1105 ; GFX11-NEXT: v_ashrrev_i32_e32 v6, 31, v5
1106 ; GFX11-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
1107 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v6
1108 ; GFX11-NEXT: s_xor_b32 vcc_lo, s0, vcc_lo
1109 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v4, v6 :: v_dual_cndmask_b32 v1, v5, v1
1110 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1111 %result = call i64 @llvm.ssub.sat.i64(i64 %lhs, i64 %rhs)
1115 declare i8 @llvm.ssub.sat.i8(i8, i8) #0
1116 declare i16 @llvm.ssub.sat.i16(i16, i16) #0
1117 declare <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16>, <2 x i16>) #0
1118 declare <3 x i16> @llvm.ssub.sat.v3i16(<3 x i16>, <3 x i16>) #0
1119 declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>) #0
1120 declare i32 @llvm.ssub.sat.i32(i32, i32) #0
1121 declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) #0
1122 declare <3 x i32> @llvm.ssub.sat.v3i32(<3 x i32>, <3 x i32>) #0
1123 declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) #0
1124 declare <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32>, <8 x i32>) #0
1125 declare <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32>, <16 x i32>) #0
1126 declare i64 @llvm.ssub.sat.i64(i64, i64) #0
1128 attributes #0 = { nounwind readnone speculatable willreturn }