1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx600 < %s | FileCheck -check-prefix=GFX6 %s
3 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s
4 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s
5 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
6 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
7 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
8 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
10 define i16 @abs_i16(i16 %arg) {
11 ; GFX6-LABEL: abs_i16:
13 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
15 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 0, v0
16 ; GFX6-NEXT: v_max_i32_e32 v0, v0, v1
17 ; GFX6-NEXT: s_setpc_b64 s[30:31]
19 ; GFX7-LABEL: abs_i16:
21 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22 ; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16
23 ; GFX7-NEXT: v_sub_i32_e32 v1, vcc, 0, v0
24 ; GFX7-NEXT: v_max_i32_e32 v0, v0, v1
25 ; GFX7-NEXT: s_setpc_b64 s[30:31]
27 ; GFX8-LABEL: abs_i16:
29 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30 ; GFX8-NEXT: v_sub_u16_e32 v1, 0, v0
31 ; GFX8-NEXT: v_max_i16_e32 v0, v0, v1
32 ; GFX8-NEXT: s_setpc_b64 s[30:31]
34 ; GFX9-LABEL: abs_i16:
36 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37 ; GFX9-NEXT: v_sub_u16_e32 v1, 0, v0
38 ; GFX9-NEXT: v_max_i16_e32 v0, v0, v1
39 ; GFX9-NEXT: s_setpc_b64 s[30:31]
41 ; GFX10-LABEL: abs_i16:
43 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44 ; GFX10-NEXT: v_sub_nc_u16 v1, 0, v0
45 ; GFX10-NEXT: v_max_i16 v0, v0, v1
46 ; GFX10-NEXT: s_setpc_b64 s[30:31]
48 ; GFX11-LABEL: abs_i16:
50 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51 ; GFX11-NEXT: v_sub_nc_u16 v1, 0, v0
52 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
53 ; GFX11-NEXT: v_max_i16 v0, v0, v1
54 ; GFX11-NEXT: s_setpc_b64 s[30:31]
56 ; GFX12-LABEL: abs_i16:
58 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
59 ; GFX12-NEXT: s_wait_expcnt 0x0
60 ; GFX12-NEXT: s_wait_samplecnt 0x0
61 ; GFX12-NEXT: s_wait_bvhcnt 0x0
62 ; GFX12-NEXT: s_wait_kmcnt 0x0
63 ; GFX12-NEXT: v_sub_nc_u16 v1, 0, v0
64 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
65 ; GFX12-NEXT: v_max_i16 v0, v0, v1
66 ; GFX12-NEXT: s_setpc_b64 s[30:31]
67 %res = call i16 @llvm.abs.i16(i16 %arg, i1 false)
71 define <2 x i16> @v_abs_v2i16(<2 x i16> %arg) {
72 ; GFX6-LABEL: v_abs_v2i16:
74 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
76 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
77 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0, v0
78 ; GFX6-NEXT: v_max_i32_e32 v0, v0, v2
79 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0, v1
80 ; GFX6-NEXT: v_max_i32_e32 v1, v1, v2
81 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v1
82 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
83 ; GFX6-NEXT: s_setpc_b64 s[30:31]
85 ; GFX7-LABEL: v_abs_v2i16:
87 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88 ; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16
89 ; GFX7-NEXT: v_bfe_i32 v1, v1, 0, 16
90 ; GFX7-NEXT: v_sub_i32_e32 v2, vcc, 0, v0
91 ; GFX7-NEXT: v_max_i32_e32 v0, v0, v2
92 ; GFX7-NEXT: v_sub_i32_e32 v2, vcc, 0, v1
93 ; GFX7-NEXT: v_max_i32_e32 v1, v1, v2
94 ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v1
95 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v2
96 ; GFX7-NEXT: s_setpc_b64 s[30:31]
98 ; GFX8-LABEL: v_abs_v2i16:
100 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
101 ; GFX8-NEXT: v_mov_b32_e32 v1, 0
102 ; GFX8-NEXT: v_sub_u16_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
103 ; GFX8-NEXT: v_sub_u16_e32 v2, 0, v0
104 ; GFX8-NEXT: v_max_i16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
105 ; GFX8-NEXT: v_max_i16_e32 v0, v0, v2
106 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
107 ; GFX8-NEXT: s_setpc_b64 s[30:31]
109 ; GFX9-LABEL: v_abs_v2i16:
111 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112 ; GFX9-NEXT: v_pk_sub_i16 v1, 0, v0
113 ; GFX9-NEXT: v_pk_max_i16 v0, v0, v1
114 ; GFX9-NEXT: s_setpc_b64 s[30:31]
116 ; GFX10-LABEL: v_abs_v2i16:
118 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
119 ; GFX10-NEXT: v_pk_sub_i16 v1, 0, v0
120 ; GFX10-NEXT: v_pk_max_i16 v0, v0, v1
121 ; GFX10-NEXT: s_setpc_b64 s[30:31]
123 ; GFX11-LABEL: v_abs_v2i16:
125 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126 ; GFX11-NEXT: v_pk_sub_i16 v1, 0, v0
127 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
128 ; GFX11-NEXT: v_pk_max_i16 v0, v0, v1
129 ; GFX11-NEXT: s_setpc_b64 s[30:31]
131 ; GFX12-LABEL: v_abs_v2i16:
133 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
134 ; GFX12-NEXT: s_wait_expcnt 0x0
135 ; GFX12-NEXT: s_wait_samplecnt 0x0
136 ; GFX12-NEXT: s_wait_bvhcnt 0x0
137 ; GFX12-NEXT: s_wait_kmcnt 0x0
138 ; GFX12-NEXT: v_pk_sub_i16 v1, 0, v0
139 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
140 ; GFX12-NEXT: v_pk_max_i16 v0, v0, v1
141 ; GFX12-NEXT: s_setpc_b64 s[30:31]
142 %res = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %arg, i1 false)
146 define <3 x i16> @v_abs_v3i16(<3 x i16> %arg) {
147 ; GFX6-LABEL: v_abs_v3i16:
149 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
151 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
152 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
153 ; GFX6-NEXT: v_max_i32_e32 v0, v0, v3
154 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
155 ; GFX6-NEXT: v_max_i32_e32 v1, v1, v3
156 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
157 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
158 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
159 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 0, v2
160 ; GFX6-NEXT: v_max_i32_e32 v2, v2, v1
161 ; GFX6-NEXT: v_alignbit_b32 v1, v2, v0, 16
162 ; GFX6-NEXT: s_setpc_b64 s[30:31]
164 ; GFX7-LABEL: v_abs_v3i16:
166 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
167 ; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16
168 ; GFX7-NEXT: v_bfe_i32 v1, v1, 0, 16
169 ; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
170 ; GFX7-NEXT: v_max_i32_e32 v0, v0, v3
171 ; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
172 ; GFX7-NEXT: v_max_i32_e32 v1, v1, v3
173 ; GFX7-NEXT: v_bfe_i32 v2, v2, 0, 16
174 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
175 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
176 ; GFX7-NEXT: v_sub_i32_e32 v1, vcc, 0, v2
177 ; GFX7-NEXT: v_max_i32_e32 v2, v2, v1
178 ; GFX7-NEXT: v_alignbit_b32 v1, v2, v0, 16
179 ; GFX7-NEXT: s_setpc_b64 s[30:31]
181 ; GFX8-LABEL: v_abs_v3i16:
183 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
184 ; GFX8-NEXT: v_mov_b32_e32 v2, 0
185 ; GFX8-NEXT: v_sub_u16_e32 v3, 0, v1
186 ; GFX8-NEXT: v_sub_u16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
187 ; GFX8-NEXT: v_max_i16_e32 v1, v1, v3
188 ; GFX8-NEXT: v_sub_u16_e32 v3, 0, v0
189 ; GFX8-NEXT: v_max_i16_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
190 ; GFX8-NEXT: v_max_i16_e32 v0, v0, v3
191 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
192 ; GFX8-NEXT: s_setpc_b64 s[30:31]
194 ; GFX9-LABEL: v_abs_v3i16:
196 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
197 ; GFX9-NEXT: v_pk_sub_i16 v2, 0, v0
198 ; GFX9-NEXT: v_pk_max_i16 v0, v0, v2
199 ; GFX9-NEXT: v_pk_sub_i16 v2, 0, v1
200 ; GFX9-NEXT: v_pk_max_i16 v1, v1, v2
201 ; GFX9-NEXT: s_setpc_b64 s[30:31]
203 ; GFX10-LABEL: v_abs_v3i16:
205 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206 ; GFX10-NEXT: v_pk_sub_i16 v2, 0, v0
207 ; GFX10-NEXT: v_pk_sub_i16 v3, 0, v1
208 ; GFX10-NEXT: v_pk_max_i16 v0, v0, v2
209 ; GFX10-NEXT: v_pk_max_i16 v1, v1, v3
210 ; GFX10-NEXT: s_setpc_b64 s[30:31]
212 ; GFX11-LABEL: v_abs_v3i16:
214 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
215 ; GFX11-NEXT: v_pk_sub_i16 v2, 0, v0
216 ; GFX11-NEXT: v_pk_sub_i16 v3, 0, v1
217 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
218 ; GFX11-NEXT: v_pk_max_i16 v0, v0, v2
219 ; GFX11-NEXT: v_pk_max_i16 v1, v1, v3
220 ; GFX11-NEXT: s_setpc_b64 s[30:31]
222 ; GFX12-LABEL: v_abs_v3i16:
224 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
225 ; GFX12-NEXT: s_wait_expcnt 0x0
226 ; GFX12-NEXT: s_wait_samplecnt 0x0
227 ; GFX12-NEXT: s_wait_bvhcnt 0x0
228 ; GFX12-NEXT: s_wait_kmcnt 0x0
229 ; GFX12-NEXT: v_pk_sub_i16 v2, 0, v0
230 ; GFX12-NEXT: v_pk_sub_i16 v3, 0, v1
231 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
232 ; GFX12-NEXT: v_pk_max_i16 v0, v0, v2
233 ; GFX12-NEXT: v_pk_max_i16 v1, v1, v3
234 ; GFX12-NEXT: s_setpc_b64 s[30:31]
235 %res = call <3 x i16> @llvm.abs.v3i16(<3 x i16> %arg, i1 false)
239 define <4 x i16> @v_abs_v4i16(<4 x i16> %arg) {
240 ; GFX6-LABEL: v_abs_v4i16:
242 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
243 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
244 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16
245 ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0, v2
246 ; GFX6-NEXT: v_max_i32_e32 v2, v2, v4
247 ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0, v3
248 ; GFX6-NEXT: v_max_i32_e32 v3, v3, v4
249 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
250 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
251 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
252 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
253 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
254 ; GFX6-NEXT: v_max_i32_e32 v0, v0, v3
255 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
256 ; GFX6-NEXT: v_max_i32_e32 v1, v1, v3
257 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
258 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
259 ; GFX6-NEXT: v_alignbit_b32 v1, v2, v0, 16
260 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
261 ; GFX6-NEXT: s_setpc_b64 s[30:31]
263 ; GFX7-LABEL: v_abs_v4i16:
265 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266 ; GFX7-NEXT: v_bfe_i32 v2, v2, 0, 16
267 ; GFX7-NEXT: v_bfe_i32 v3, v3, 0, 16
268 ; GFX7-NEXT: v_sub_i32_e32 v4, vcc, 0, v2
269 ; GFX7-NEXT: v_max_i32_e32 v2, v2, v4
270 ; GFX7-NEXT: v_sub_i32_e32 v4, vcc, 0, v3
271 ; GFX7-NEXT: v_max_i32_e32 v3, v3, v4
272 ; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16
273 ; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
274 ; GFX7-NEXT: v_bfe_i32 v1, v1, 0, 16
275 ; GFX7-NEXT: v_or_b32_e32 v2, v2, v3
276 ; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
277 ; GFX7-NEXT: v_max_i32_e32 v0, v0, v3
278 ; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
279 ; GFX7-NEXT: v_max_i32_e32 v1, v1, v3
280 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
281 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
282 ; GFX7-NEXT: v_alignbit_b32 v1, v2, v0, 16
283 ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v2
284 ; GFX7-NEXT: s_setpc_b64 s[30:31]
286 ; GFX8-LABEL: v_abs_v4i16:
288 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
289 ; GFX8-NEXT: v_mov_b32_e32 v2, 0
290 ; GFX8-NEXT: v_sub_u16_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
291 ; GFX8-NEXT: v_sub_u16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
292 ; GFX8-NEXT: v_sub_u16_e32 v4, 0, v1
293 ; GFX8-NEXT: v_sub_u16_e32 v5, 0, v0
294 ; GFX8-NEXT: v_max_i16_sdwa v3, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
295 ; GFX8-NEXT: v_max_i16_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
296 ; GFX8-NEXT: v_max_i16_e32 v0, v0, v5
297 ; GFX8-NEXT: v_max_i16_e32 v1, v1, v4
298 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
299 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
300 ; GFX8-NEXT: s_setpc_b64 s[30:31]
302 ; GFX9-LABEL: v_abs_v4i16:
304 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
305 ; GFX9-NEXT: v_pk_sub_i16 v2, 0, v0
306 ; GFX9-NEXT: v_pk_max_i16 v0, v0, v2
307 ; GFX9-NEXT: v_pk_sub_i16 v2, 0, v1
308 ; GFX9-NEXT: v_pk_max_i16 v1, v1, v2
309 ; GFX9-NEXT: s_setpc_b64 s[30:31]
311 ; GFX10-LABEL: v_abs_v4i16:
313 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
314 ; GFX10-NEXT: v_pk_sub_i16 v2, 0, v0
315 ; GFX10-NEXT: v_pk_sub_i16 v3, 0, v1
316 ; GFX10-NEXT: v_pk_max_i16 v0, v0, v2
317 ; GFX10-NEXT: v_pk_max_i16 v1, v1, v3
318 ; GFX10-NEXT: s_setpc_b64 s[30:31]
320 ; GFX11-LABEL: v_abs_v4i16:
322 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
323 ; GFX11-NEXT: v_pk_sub_i16 v2, 0, v0
324 ; GFX11-NEXT: v_pk_sub_i16 v3, 0, v1
325 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
326 ; GFX11-NEXT: v_pk_max_i16 v0, v0, v2
327 ; GFX11-NEXT: v_pk_max_i16 v1, v1, v3
328 ; GFX11-NEXT: s_setpc_b64 s[30:31]
330 ; GFX12-LABEL: v_abs_v4i16:
332 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
333 ; GFX12-NEXT: s_wait_expcnt 0x0
334 ; GFX12-NEXT: s_wait_samplecnt 0x0
335 ; GFX12-NEXT: s_wait_bvhcnt 0x0
336 ; GFX12-NEXT: s_wait_kmcnt 0x0
337 ; GFX12-NEXT: v_pk_sub_i16 v2, 0, v0
338 ; GFX12-NEXT: v_pk_sub_i16 v3, 0, v1
339 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
340 ; GFX12-NEXT: v_pk_max_i16 v0, v0, v2
341 ; GFX12-NEXT: v_pk_max_i16 v1, v1, v3
342 ; GFX12-NEXT: s_setpc_b64 s[30:31]
343 %res = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %arg, i1 false)
347 define <6 x i16> @v_abs_v6i16(<6 x i16> %arg) {
348 ; GFX6-LABEL: v_abs_v6i16:
350 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
352 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16
353 ; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 0, v2
354 ; GFX6-NEXT: v_max_i32_e32 v2, v2, v6
355 ; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 0, v3
356 ; GFX6-NEXT: v_max_i32_e32 v3, v3, v6
357 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
358 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
359 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
360 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
361 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
362 ; GFX6-NEXT: v_max_i32_e32 v0, v0, v3
363 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
364 ; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16
365 ; GFX6-NEXT: v_max_i32_e32 v1, v1, v3
366 ; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16
367 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
368 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v5
369 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
370 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 0, v4
371 ; GFX6-NEXT: v_max_i32_e32 v5, v5, v3
372 ; GFX6-NEXT: v_max_i32_e32 v1, v4, v1
373 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v5
374 ; GFX6-NEXT: v_or_b32_e32 v4, v1, v3
375 ; GFX6-NEXT: v_alignbit_b32 v1, v2, v0, 16
376 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
377 ; GFX6-NEXT: s_setpc_b64 s[30:31]
379 ; GFX7-LABEL: v_abs_v6i16:
381 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
382 ; GFX7-NEXT: v_bfe_i32 v2, v2, 0, 16
383 ; GFX7-NEXT: v_bfe_i32 v3, v3, 0, 16
384 ; GFX7-NEXT: v_sub_i32_e32 v6, vcc, 0, v2
385 ; GFX7-NEXT: v_max_i32_e32 v2, v2, v6
386 ; GFX7-NEXT: v_sub_i32_e32 v6, vcc, 0, v3
387 ; GFX7-NEXT: v_max_i32_e32 v3, v3, v6
388 ; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16
389 ; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
390 ; GFX7-NEXT: v_bfe_i32 v1, v1, 0, 16
391 ; GFX7-NEXT: v_or_b32_e32 v2, v2, v3
392 ; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
393 ; GFX7-NEXT: v_max_i32_e32 v0, v0, v3
394 ; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
395 ; GFX7-NEXT: v_bfe_i32 v5, v5, 0, 16
396 ; GFX7-NEXT: v_max_i32_e32 v1, v1, v3
397 ; GFX7-NEXT: v_bfe_i32 v4, v4, 0, 16
398 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
399 ; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v5
400 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
401 ; GFX7-NEXT: v_sub_i32_e32 v1, vcc, 0, v4
402 ; GFX7-NEXT: v_max_i32_e32 v5, v5, v3
403 ; GFX7-NEXT: v_max_i32_e32 v1, v4, v1
404 ; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v5
405 ; GFX7-NEXT: v_or_b32_e32 v4, v1, v3
406 ; GFX7-NEXT: v_alignbit_b32 v1, v2, v0, 16
407 ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v2
408 ; GFX7-NEXT: s_setpc_b64 s[30:31]
410 ; GFX8-LABEL: v_abs_v6i16:
412 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
413 ; GFX8-NEXT: v_mov_b32_e32 v3, 0
414 ; GFX8-NEXT: v_sub_u16_sdwa v4, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
415 ; GFX8-NEXT: v_sub_u16_sdwa v5, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
416 ; GFX8-NEXT: v_sub_u16_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
417 ; GFX8-NEXT: v_sub_u16_e32 v6, 0, v2
418 ; GFX8-NEXT: v_sub_u16_e32 v7, 0, v1
419 ; GFX8-NEXT: v_sub_u16_e32 v8, 0, v0
420 ; GFX8-NEXT: v_max_i16_sdwa v4, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
421 ; GFX8-NEXT: v_max_i16_sdwa v5, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
422 ; GFX8-NEXT: v_max_i16_sdwa v3, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
423 ; GFX8-NEXT: v_max_i16_e32 v0, v0, v8
424 ; GFX8-NEXT: v_max_i16_e32 v1, v1, v7
425 ; GFX8-NEXT: v_max_i16_e32 v2, v2, v6
426 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v3
427 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v5
428 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v4
429 ; GFX8-NEXT: s_setpc_b64 s[30:31]
431 ; GFX9-LABEL: v_abs_v6i16:
433 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
434 ; GFX9-NEXT: v_pk_sub_i16 v3, 0, v0
435 ; GFX9-NEXT: v_pk_max_i16 v0, v0, v3
436 ; GFX9-NEXT: v_pk_sub_i16 v3, 0, v1
437 ; GFX9-NEXT: v_pk_max_i16 v1, v1, v3
438 ; GFX9-NEXT: v_pk_sub_i16 v3, 0, v2
439 ; GFX9-NEXT: v_pk_max_i16 v2, v2, v3
440 ; GFX9-NEXT: s_setpc_b64 s[30:31]
442 ; GFX10-LABEL: v_abs_v6i16:
444 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
445 ; GFX10-NEXT: v_pk_sub_i16 v3, 0, v0
446 ; GFX10-NEXT: v_pk_sub_i16 v4, 0, v1
447 ; GFX10-NEXT: v_pk_sub_i16 v5, 0, v2
448 ; GFX10-NEXT: v_pk_max_i16 v0, v0, v3
449 ; GFX10-NEXT: v_pk_max_i16 v1, v1, v4
450 ; GFX10-NEXT: v_pk_max_i16 v2, v2, v5
451 ; GFX10-NEXT: s_setpc_b64 s[30:31]
453 ; GFX11-LABEL: v_abs_v6i16:
455 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
456 ; GFX11-NEXT: v_pk_sub_i16 v3, 0, v0
457 ; GFX11-NEXT: v_pk_sub_i16 v4, 0, v1
458 ; GFX11-NEXT: v_pk_sub_i16 v5, 0, v2
459 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
460 ; GFX11-NEXT: v_pk_max_i16 v0, v0, v3
461 ; GFX11-NEXT: v_pk_max_i16 v1, v1, v4
462 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
463 ; GFX11-NEXT: v_pk_max_i16 v2, v2, v5
464 ; GFX11-NEXT: s_setpc_b64 s[30:31]
466 ; GFX12-LABEL: v_abs_v6i16:
468 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
469 ; GFX12-NEXT: s_wait_expcnt 0x0
470 ; GFX12-NEXT: s_wait_samplecnt 0x0
471 ; GFX12-NEXT: s_wait_bvhcnt 0x0
472 ; GFX12-NEXT: s_wait_kmcnt 0x0
473 ; GFX12-NEXT: v_pk_sub_i16 v3, 0, v0
474 ; GFX12-NEXT: v_pk_sub_i16 v4, 0, v1
475 ; GFX12-NEXT: v_pk_sub_i16 v5, 0, v2
476 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
477 ; GFX12-NEXT: v_pk_max_i16 v0, v0, v3
478 ; GFX12-NEXT: v_pk_max_i16 v1, v1, v4
479 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
480 ; GFX12-NEXT: v_pk_max_i16 v2, v2, v5
481 ; GFX12-NEXT: s_setpc_b64 s[30:31]
482 %res = call <6 x i16> @llvm.abs.v6i16(<6 x i16> %arg, i1 false)
486 define <8 x i16> @v_abs_v8i16(<8 x i16> %arg) {
487 ; GFX6-LABEL: v_abs_v8i16:
489 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
490 ; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 16
491 ; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 16
492 ; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 0, v6
493 ; GFX6-NEXT: v_max_i32_e32 v6, v6, v8
494 ; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 0, v7
495 ; GFX6-NEXT: v_max_i32_e32 v7, v7, v8
496 ; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16
497 ; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7
498 ; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16
499 ; GFX6-NEXT: v_or_b32_e32 v6, v6, v7
500 ; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 0, v4
501 ; GFX6-NEXT: v_max_i32_e32 v4, v4, v7
502 ; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 0, v5
503 ; GFX6-NEXT: v_max_i32_e32 v5, v5, v7
504 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
505 ; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5
506 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16
507 ; GFX6-NEXT: v_or_b32_e32 v4, v4, v5
508 ; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
509 ; GFX6-NEXT: v_max_i32_e32 v2, v2, v5
510 ; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
511 ; GFX6-NEXT: v_max_i32_e32 v3, v3, v5
512 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
513 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
514 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
515 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
516 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
517 ; GFX6-NEXT: v_max_i32_e32 v0, v0, v3
518 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
519 ; GFX6-NEXT: v_max_i32_e32 v1, v1, v3
520 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
521 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
522 ; GFX6-NEXT: v_alignbit_b32 v1, v2, v0, 16
523 ; GFX6-NEXT: v_alignbit_b32 v5, v6, v4, 16
524 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
525 ; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v6
526 ; GFX6-NEXT: s_setpc_b64 s[30:31]
528 ; GFX7-LABEL: v_abs_v8i16:
530 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
531 ; GFX7-NEXT: v_bfe_i32 v6, v6, 0, 16
532 ; GFX7-NEXT: v_bfe_i32 v7, v7, 0, 16
533 ; GFX7-NEXT: v_sub_i32_e32 v8, vcc, 0, v6
534 ; GFX7-NEXT: v_max_i32_e32 v6, v6, v8
535 ; GFX7-NEXT: v_sub_i32_e32 v8, vcc, 0, v7
536 ; GFX7-NEXT: v_max_i32_e32 v7, v7, v8
537 ; GFX7-NEXT: v_bfe_i32 v4, v4, 0, 16
538 ; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7
539 ; GFX7-NEXT: v_bfe_i32 v5, v5, 0, 16
540 ; GFX7-NEXT: v_or_b32_e32 v6, v6, v7
541 ; GFX7-NEXT: v_sub_i32_e32 v7, vcc, 0, v4
542 ; GFX7-NEXT: v_max_i32_e32 v4, v4, v7
543 ; GFX7-NEXT: v_sub_i32_e32 v7, vcc, 0, v5
544 ; GFX7-NEXT: v_max_i32_e32 v5, v5, v7
545 ; GFX7-NEXT: v_bfe_i32 v2, v2, 0, 16
546 ; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5
547 ; GFX7-NEXT: v_bfe_i32 v3, v3, 0, 16
548 ; GFX7-NEXT: v_or_b32_e32 v4, v4, v5
549 ; GFX7-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
550 ; GFX7-NEXT: v_max_i32_e32 v2, v2, v5
551 ; GFX7-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
552 ; GFX7-NEXT: v_max_i32_e32 v3, v3, v5
553 ; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16
554 ; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
555 ; GFX7-NEXT: v_bfe_i32 v1, v1, 0, 16
556 ; GFX7-NEXT: v_or_b32_e32 v2, v2, v3
557 ; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
558 ; GFX7-NEXT: v_max_i32_e32 v0, v0, v3
559 ; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
560 ; GFX7-NEXT: v_max_i32_e32 v1, v1, v3
561 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
562 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
563 ; GFX7-NEXT: v_alignbit_b32 v1, v2, v0, 16
564 ; GFX7-NEXT: v_alignbit_b32 v5, v6, v4, 16
565 ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v2
566 ; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v6
567 ; GFX7-NEXT: s_setpc_b64 s[30:31]
569 ; GFX8-LABEL: v_abs_v8i16:
571 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
572 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
573 ; GFX8-NEXT: v_sub_u16_sdwa v5, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
574 ; GFX8-NEXT: v_sub_u16_sdwa v6, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
575 ; GFX8-NEXT: v_sub_u16_sdwa v7, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
576 ; GFX8-NEXT: v_sub_u16_sdwa v4, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
577 ; GFX8-NEXT: v_sub_u16_e32 v8, 0, v3
578 ; GFX8-NEXT: v_sub_u16_e32 v9, 0, v2
579 ; GFX8-NEXT: v_sub_u16_e32 v10, 0, v1
580 ; GFX8-NEXT: v_sub_u16_e32 v11, 0, v0
581 ; GFX8-NEXT: v_max_i16_sdwa v5, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
582 ; GFX8-NEXT: v_max_i16_sdwa v6, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
583 ; GFX8-NEXT: v_max_i16_sdwa v7, v1, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
584 ; GFX8-NEXT: v_max_i16_sdwa v4, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
585 ; GFX8-NEXT: v_max_i16_e32 v0, v0, v11
586 ; GFX8-NEXT: v_max_i16_e32 v1, v1, v10
587 ; GFX8-NEXT: v_max_i16_e32 v2, v2, v9
588 ; GFX8-NEXT: v_max_i16_e32 v3, v3, v8
589 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
590 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v7
591 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v6
592 ; GFX8-NEXT: v_or_b32_e32 v3, v3, v5
593 ; GFX8-NEXT: s_setpc_b64 s[30:31]
595 ; GFX9-LABEL: v_abs_v8i16:
597 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
598 ; GFX9-NEXT: v_pk_sub_i16 v4, 0, v0
599 ; GFX9-NEXT: v_pk_max_i16 v0, v0, v4
600 ; GFX9-NEXT: v_pk_sub_i16 v4, 0, v1
601 ; GFX9-NEXT: v_pk_max_i16 v1, v1, v4
602 ; GFX9-NEXT: v_pk_sub_i16 v4, 0, v2
603 ; GFX9-NEXT: v_pk_max_i16 v2, v2, v4
604 ; GFX9-NEXT: v_pk_sub_i16 v4, 0, v3
605 ; GFX9-NEXT: v_pk_max_i16 v3, v3, v4
606 ; GFX9-NEXT: s_setpc_b64 s[30:31]
608 ; GFX10-LABEL: v_abs_v8i16:
610 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
611 ; GFX10-NEXT: v_pk_sub_i16 v4, 0, v0
612 ; GFX10-NEXT: v_pk_sub_i16 v5, 0, v1
613 ; GFX10-NEXT: v_pk_sub_i16 v6, 0, v2
614 ; GFX10-NEXT: v_pk_sub_i16 v7, 0, v3
615 ; GFX10-NEXT: v_pk_max_i16 v0, v0, v4
616 ; GFX10-NEXT: v_pk_max_i16 v1, v1, v5
617 ; GFX10-NEXT: v_pk_max_i16 v2, v2, v6
618 ; GFX10-NEXT: v_pk_max_i16 v3, v3, v7
619 ; GFX10-NEXT: s_setpc_b64 s[30:31]
621 ; GFX11-LABEL: v_abs_v8i16:
623 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
624 ; GFX11-NEXT: v_pk_sub_i16 v4, 0, v0
625 ; GFX11-NEXT: v_pk_sub_i16 v5, 0, v1
626 ; GFX11-NEXT: v_pk_sub_i16 v6, 0, v2
627 ; GFX11-NEXT: v_pk_sub_i16 v7, 0, v3
628 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
629 ; GFX11-NEXT: v_pk_max_i16 v0, v0, v4
630 ; GFX11-NEXT: v_pk_max_i16 v1, v1, v5
631 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
632 ; GFX11-NEXT: v_pk_max_i16 v2, v2, v6
633 ; GFX11-NEXT: v_pk_max_i16 v3, v3, v7
634 ; GFX11-NEXT: s_setpc_b64 s[30:31]
636 ; GFX12-LABEL: v_abs_v8i16:
638 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
639 ; GFX12-NEXT: s_wait_expcnt 0x0
640 ; GFX12-NEXT: s_wait_samplecnt 0x0
641 ; GFX12-NEXT: s_wait_bvhcnt 0x0
642 ; GFX12-NEXT: s_wait_kmcnt 0x0
643 ; GFX12-NEXT: v_pk_sub_i16 v4, 0, v0
644 ; GFX12-NEXT: v_pk_sub_i16 v5, 0, v1
645 ; GFX12-NEXT: v_pk_sub_i16 v6, 0, v2
646 ; GFX12-NEXT: v_pk_sub_i16 v7, 0, v3
647 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
648 ; GFX12-NEXT: v_pk_max_i16 v0, v0, v4
649 ; GFX12-NEXT: v_pk_max_i16 v1, v1, v5
650 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
651 ; GFX12-NEXT: v_pk_max_i16 v2, v2, v6
652 ; GFX12-NEXT: v_pk_max_i16 v3, v3, v7
653 ; GFX12-NEXT: s_setpc_b64 s[30:31]
654 %res = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %arg, i1 false)
659 define <16 x i16> @v_abs_v16i16(<16 x i16> %arg) {
660 ; GFX6-LABEL: v_abs_v16i16:
662 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
663 ; GFX6-NEXT: v_bfe_i32 v14, v14, 0, 16
664 ; GFX6-NEXT: v_bfe_i32 v15, v15, 0, 16
665 ; GFX6-NEXT: v_sub_i32_e32 v16, vcc, 0, v14
666 ; GFX6-NEXT: v_max_i32_e32 v14, v14, v16
667 ; GFX6-NEXT: v_sub_i32_e32 v16, vcc, 0, v15
668 ; GFX6-NEXT: v_max_i32_e32 v15, v15, v16
669 ; GFX6-NEXT: v_bfe_i32 v12, v12, 0, 16
670 ; GFX6-NEXT: v_lshlrev_b32_e32 v15, 16, v15
671 ; GFX6-NEXT: v_bfe_i32 v13, v13, 0, 16
672 ; GFX6-NEXT: v_or_b32_e32 v14, v14, v15
673 ; GFX6-NEXT: v_sub_i32_e32 v15, vcc, 0, v12
674 ; GFX6-NEXT: v_max_i32_e32 v12, v12, v15
675 ; GFX6-NEXT: v_sub_i32_e32 v15, vcc, 0, v13
676 ; GFX6-NEXT: v_max_i32_e32 v13, v13, v15
677 ; GFX6-NEXT: v_bfe_i32 v10, v10, 0, 16
678 ; GFX6-NEXT: v_lshlrev_b32_e32 v13, 16, v13
679 ; GFX6-NEXT: v_bfe_i32 v11, v11, 0, 16
680 ; GFX6-NEXT: v_or_b32_e32 v12, v12, v13
681 ; GFX6-NEXT: v_sub_i32_e32 v13, vcc, 0, v10
682 ; GFX6-NEXT: v_max_i32_e32 v10, v10, v13
683 ; GFX6-NEXT: v_sub_i32_e32 v13, vcc, 0, v11
684 ; GFX6-NEXT: v_max_i32_e32 v11, v11, v13
685 ; GFX6-NEXT: v_bfe_i32 v8, v8, 0, 16
686 ; GFX6-NEXT: v_lshlrev_b32_e32 v11, 16, v11
687 ; GFX6-NEXT: v_bfe_i32 v9, v9, 0, 16
688 ; GFX6-NEXT: v_or_b32_e32 v10, v10, v11
689 ; GFX6-NEXT: v_sub_i32_e32 v11, vcc, 0, v8
690 ; GFX6-NEXT: v_max_i32_e32 v8, v8, v11
691 ; GFX6-NEXT: v_sub_i32_e32 v11, vcc, 0, v9
692 ; GFX6-NEXT: v_max_i32_e32 v9, v9, v11
693 ; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 16
694 ; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v9
695 ; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 16
696 ; GFX6-NEXT: v_or_b32_e32 v8, v8, v9
697 ; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 0, v6
698 ; GFX6-NEXT: v_max_i32_e32 v6, v6, v9
699 ; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 0, v7
700 ; GFX6-NEXT: v_max_i32_e32 v7, v7, v9
701 ; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16
702 ; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7
703 ; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16
704 ; GFX6-NEXT: v_or_b32_e32 v6, v6, v7
705 ; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 0, v4
706 ; GFX6-NEXT: v_max_i32_e32 v4, v4, v7
707 ; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 0, v5
708 ; GFX6-NEXT: v_max_i32_e32 v5, v5, v7
709 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
710 ; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5
711 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16
712 ; GFX6-NEXT: v_or_b32_e32 v4, v4, v5
713 ; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
714 ; GFX6-NEXT: v_max_i32_e32 v2, v2, v5
715 ; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
716 ; GFX6-NEXT: v_max_i32_e32 v3, v3, v5
717 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
718 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
719 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
720 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
721 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
722 ; GFX6-NEXT: v_max_i32_e32 v0, v0, v3
723 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
724 ; GFX6-NEXT: v_max_i32_e32 v1, v1, v3
725 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
726 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
727 ; GFX6-NEXT: v_alignbit_b32 v1, v2, v0, 16
728 ; GFX6-NEXT: v_alignbit_b32 v5, v6, v4, 16
729 ; GFX6-NEXT: v_alignbit_b32 v9, v10, v8, 16
730 ; GFX6-NEXT: v_alignbit_b32 v13, v14, v12, 16
731 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
732 ; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v6
733 ; GFX6-NEXT: v_lshrrev_b32_e32 v11, 16, v10
734 ; GFX6-NEXT: v_lshrrev_b32_e32 v15, 16, v14
735 ; GFX6-NEXT: s_setpc_b64 s[30:31]
737 ; GFX7-LABEL: v_abs_v16i16:
739 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
740 ; GFX7-NEXT: v_bfe_i32 v14, v14, 0, 16
741 ; GFX7-NEXT: v_bfe_i32 v15, v15, 0, 16
742 ; GFX7-NEXT: v_sub_i32_e32 v16, vcc, 0, v14
743 ; GFX7-NEXT: v_max_i32_e32 v14, v14, v16
744 ; GFX7-NEXT: v_sub_i32_e32 v16, vcc, 0, v15
745 ; GFX7-NEXT: v_max_i32_e32 v15, v15, v16
746 ; GFX7-NEXT: v_bfe_i32 v12, v12, 0, 16
747 ; GFX7-NEXT: v_lshlrev_b32_e32 v15, 16, v15
748 ; GFX7-NEXT: v_bfe_i32 v13, v13, 0, 16
749 ; GFX7-NEXT: v_or_b32_e32 v14, v14, v15
750 ; GFX7-NEXT: v_sub_i32_e32 v15, vcc, 0, v12
751 ; GFX7-NEXT: v_max_i32_e32 v12, v12, v15
752 ; GFX7-NEXT: v_sub_i32_e32 v15, vcc, 0, v13
753 ; GFX7-NEXT: v_max_i32_e32 v13, v13, v15
754 ; GFX7-NEXT: v_bfe_i32 v10, v10, 0, 16
755 ; GFX7-NEXT: v_lshlrev_b32_e32 v13, 16, v13
756 ; GFX7-NEXT: v_bfe_i32 v11, v11, 0, 16
757 ; GFX7-NEXT: v_or_b32_e32 v12, v12, v13
758 ; GFX7-NEXT: v_sub_i32_e32 v13, vcc, 0, v10
759 ; GFX7-NEXT: v_max_i32_e32 v10, v10, v13
760 ; GFX7-NEXT: v_sub_i32_e32 v13, vcc, 0, v11
761 ; GFX7-NEXT: v_max_i32_e32 v11, v11, v13
762 ; GFX7-NEXT: v_bfe_i32 v8, v8, 0, 16
763 ; GFX7-NEXT: v_lshlrev_b32_e32 v11, 16, v11
764 ; GFX7-NEXT: v_bfe_i32 v9, v9, 0, 16
765 ; GFX7-NEXT: v_or_b32_e32 v10, v10, v11
766 ; GFX7-NEXT: v_sub_i32_e32 v11, vcc, 0, v8
767 ; GFX7-NEXT: v_max_i32_e32 v8, v8, v11
768 ; GFX7-NEXT: v_sub_i32_e32 v11, vcc, 0, v9
769 ; GFX7-NEXT: v_max_i32_e32 v9, v9, v11
770 ; GFX7-NEXT: v_bfe_i32 v6, v6, 0, 16
771 ; GFX7-NEXT: v_lshlrev_b32_e32 v9, 16, v9
772 ; GFX7-NEXT: v_bfe_i32 v7, v7, 0, 16
773 ; GFX7-NEXT: v_or_b32_e32 v8, v8, v9
774 ; GFX7-NEXT: v_sub_i32_e32 v9, vcc, 0, v6
775 ; GFX7-NEXT: v_max_i32_e32 v6, v6, v9
776 ; GFX7-NEXT: v_sub_i32_e32 v9, vcc, 0, v7
777 ; GFX7-NEXT: v_max_i32_e32 v7, v7, v9
778 ; GFX7-NEXT: v_bfe_i32 v4, v4, 0, 16
779 ; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7
780 ; GFX7-NEXT: v_bfe_i32 v5, v5, 0, 16
781 ; GFX7-NEXT: v_or_b32_e32 v6, v6, v7
782 ; GFX7-NEXT: v_sub_i32_e32 v7, vcc, 0, v4
783 ; GFX7-NEXT: v_max_i32_e32 v4, v4, v7
784 ; GFX7-NEXT: v_sub_i32_e32 v7, vcc, 0, v5
785 ; GFX7-NEXT: v_max_i32_e32 v5, v5, v7
786 ; GFX7-NEXT: v_bfe_i32 v2, v2, 0, 16
787 ; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5
788 ; GFX7-NEXT: v_bfe_i32 v3, v3, 0, 16
789 ; GFX7-NEXT: v_or_b32_e32 v4, v4, v5
790 ; GFX7-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
791 ; GFX7-NEXT: v_max_i32_e32 v2, v2, v5
792 ; GFX7-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
793 ; GFX7-NEXT: v_max_i32_e32 v3, v3, v5
794 ; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16
795 ; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
796 ; GFX7-NEXT: v_bfe_i32 v1, v1, 0, 16
797 ; GFX7-NEXT: v_or_b32_e32 v2, v2, v3
798 ; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
799 ; GFX7-NEXT: v_max_i32_e32 v0, v0, v3
800 ; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
801 ; GFX7-NEXT: v_max_i32_e32 v1, v1, v3
802 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
803 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
804 ; GFX7-NEXT: v_alignbit_b32 v1, v2, v0, 16
805 ; GFX7-NEXT: v_alignbit_b32 v5, v6, v4, 16
806 ; GFX7-NEXT: v_alignbit_b32 v9, v10, v8, 16
807 ; GFX7-NEXT: v_alignbit_b32 v13, v14, v12, 16
808 ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v2
809 ; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v6
810 ; GFX7-NEXT: v_lshrrev_b32_e32 v11, 16, v10
811 ; GFX7-NEXT: v_lshrrev_b32_e32 v15, 16, v14
812 ; GFX7-NEXT: s_setpc_b64 s[30:31]
814 ; GFX8-LABEL: v_abs_v16i16:
816 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
817 ; GFX8-NEXT: v_mov_b32_e32 v8, 0
818 ; GFX8-NEXT: v_sub_u16_sdwa v9, v8, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
819 ; GFX8-NEXT: v_sub_u16_sdwa v10, v8, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
820 ; GFX8-NEXT: v_sub_u16_sdwa v11, v8, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
821 ; GFX8-NEXT: v_sub_u16_sdwa v12, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
822 ; GFX8-NEXT: v_sub_u16_sdwa v13, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
823 ; GFX8-NEXT: v_sub_u16_sdwa v14, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
824 ; GFX8-NEXT: v_sub_u16_sdwa v15, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
825 ; GFX8-NEXT: v_sub_u16_sdwa v8, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
826 ; GFX8-NEXT: v_sub_u16_e32 v16, 0, v7
827 ; GFX8-NEXT: v_sub_u16_e32 v17, 0, v6
828 ; GFX8-NEXT: v_sub_u16_e32 v18, 0, v5
829 ; GFX8-NEXT: v_sub_u16_e32 v19, 0, v4
830 ; GFX8-NEXT: v_sub_u16_e32 v20, 0, v3
831 ; GFX8-NEXT: v_sub_u16_e32 v21, 0, v2
832 ; GFX8-NEXT: v_sub_u16_e32 v22, 0, v1
833 ; GFX8-NEXT: v_sub_u16_e32 v23, 0, v0
834 ; GFX8-NEXT: v_max_i16_sdwa v9, v7, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
835 ; GFX8-NEXT: v_max_i16_sdwa v10, v6, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
836 ; GFX8-NEXT: v_max_i16_sdwa v11, v5, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
837 ; GFX8-NEXT: v_max_i16_sdwa v12, v4, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
838 ; GFX8-NEXT: v_max_i16_sdwa v13, v3, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
839 ; GFX8-NEXT: v_max_i16_sdwa v14, v2, v14 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
840 ; GFX8-NEXT: v_max_i16_sdwa v15, v1, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
841 ; GFX8-NEXT: v_max_i16_sdwa v8, v0, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
842 ; GFX8-NEXT: v_max_i16_e32 v0, v0, v23
843 ; GFX8-NEXT: v_max_i16_e32 v1, v1, v22
844 ; GFX8-NEXT: v_max_i16_e32 v2, v2, v21
845 ; GFX8-NEXT: v_max_i16_e32 v3, v3, v20
846 ; GFX8-NEXT: v_max_i16_e32 v4, v4, v19
847 ; GFX8-NEXT: v_max_i16_e32 v5, v5, v18
848 ; GFX8-NEXT: v_max_i16_e32 v6, v6, v17
849 ; GFX8-NEXT: v_max_i16_e32 v7, v7, v16
850 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v8
851 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v15
852 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v14
853 ; GFX8-NEXT: v_or_b32_e32 v3, v3, v13
854 ; GFX8-NEXT: v_or_b32_e32 v4, v4, v12
855 ; GFX8-NEXT: v_or_b32_e32 v5, v5, v11
856 ; GFX8-NEXT: v_or_b32_e32 v6, v6, v10
857 ; GFX8-NEXT: v_or_b32_e32 v7, v7, v9
858 ; GFX8-NEXT: s_setpc_b64 s[30:31]
860 ; GFX9-LABEL: v_abs_v16i16:
862 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
863 ; GFX9-NEXT: v_pk_sub_i16 v8, 0, v0
864 ; GFX9-NEXT: v_pk_max_i16 v0, v0, v8
865 ; GFX9-NEXT: v_pk_sub_i16 v8, 0, v1
866 ; GFX9-NEXT: v_pk_max_i16 v1, v1, v8
867 ; GFX9-NEXT: v_pk_sub_i16 v8, 0, v2
868 ; GFX9-NEXT: v_pk_max_i16 v2, v2, v8
869 ; GFX9-NEXT: v_pk_sub_i16 v8, 0, v3
870 ; GFX9-NEXT: v_pk_max_i16 v3, v3, v8
871 ; GFX9-NEXT: v_pk_sub_i16 v8, 0, v4
872 ; GFX9-NEXT: v_pk_max_i16 v4, v4, v8
873 ; GFX9-NEXT: v_pk_sub_i16 v8, 0, v5
874 ; GFX9-NEXT: v_pk_max_i16 v5, v5, v8
875 ; GFX9-NEXT: v_pk_sub_i16 v8, 0, v6
876 ; GFX9-NEXT: v_pk_max_i16 v6, v6, v8
877 ; GFX9-NEXT: v_pk_sub_i16 v8, 0, v7
878 ; GFX9-NEXT: v_pk_max_i16 v7, v7, v8
879 ; GFX9-NEXT: s_setpc_b64 s[30:31]
881 ; GFX10-LABEL: v_abs_v16i16:
883 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
884 ; GFX10-NEXT: v_pk_sub_i16 v8, 0, v0
885 ; GFX10-NEXT: v_pk_sub_i16 v9, 0, v1
886 ; GFX10-NEXT: v_pk_sub_i16 v10, 0, v2
887 ; GFX10-NEXT: v_pk_sub_i16 v11, 0, v6
888 ; GFX10-NEXT: v_pk_sub_i16 v12, 0, v7
889 ; GFX10-NEXT: v_pk_max_i16 v0, v0, v8
890 ; GFX10-NEXT: v_pk_max_i16 v1, v1, v9
891 ; GFX10-NEXT: v_pk_max_i16 v2, v2, v10
892 ; GFX10-NEXT: v_pk_sub_i16 v8, 0, v3
893 ; GFX10-NEXT: v_pk_sub_i16 v9, 0, v4
894 ; GFX10-NEXT: v_pk_sub_i16 v10, 0, v5
895 ; GFX10-NEXT: v_pk_max_i16 v6, v6, v11
896 ; GFX10-NEXT: v_pk_max_i16 v7, v7, v12
897 ; GFX10-NEXT: v_pk_max_i16 v3, v3, v8
898 ; GFX10-NEXT: v_pk_max_i16 v4, v4, v9
899 ; GFX10-NEXT: v_pk_max_i16 v5, v5, v10
900 ; GFX10-NEXT: s_setpc_b64 s[30:31]
902 ; GFX11-LABEL: v_abs_v16i16:
904 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
905 ; GFX11-NEXT: v_pk_sub_i16 v8, 0, v0
906 ; GFX11-NEXT: v_pk_sub_i16 v9, 0, v1
907 ; GFX11-NEXT: v_pk_sub_i16 v10, 0, v2
908 ; GFX11-NEXT: v_pk_sub_i16 v11, 0, v6
909 ; GFX11-NEXT: v_pk_sub_i16 v12, 0, v7
910 ; GFX11-NEXT: v_pk_max_i16 v0, v0, v8
911 ; GFX11-NEXT: v_pk_max_i16 v1, v1, v9
912 ; GFX11-NEXT: v_pk_max_i16 v2, v2, v10
913 ; GFX11-NEXT: v_pk_sub_i16 v8, 0, v3
914 ; GFX11-NEXT: v_pk_sub_i16 v9, 0, v4
915 ; GFX11-NEXT: v_pk_sub_i16 v10, 0, v5
916 ; GFX11-NEXT: v_pk_max_i16 v6, v6, v11
917 ; GFX11-NEXT: v_pk_max_i16 v7, v7, v12
918 ; GFX11-NEXT: v_pk_max_i16 v3, v3, v8
919 ; GFX11-NEXT: v_pk_max_i16 v4, v4, v9
920 ; GFX11-NEXT: v_pk_max_i16 v5, v5, v10
921 ; GFX11-NEXT: s_setpc_b64 s[30:31]
923 ; GFX12-LABEL: v_abs_v16i16:
925 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
926 ; GFX12-NEXT: s_wait_expcnt 0x0
927 ; GFX12-NEXT: s_wait_samplecnt 0x0
928 ; GFX12-NEXT: s_wait_bvhcnt 0x0
929 ; GFX12-NEXT: s_wait_kmcnt 0x0
930 ; GFX12-NEXT: v_pk_sub_i16 v8, 0, v0
931 ; GFX12-NEXT: v_pk_sub_i16 v9, 0, v1
932 ; GFX12-NEXT: v_pk_sub_i16 v10, 0, v2
933 ; GFX12-NEXT: v_pk_sub_i16 v11, 0, v6
934 ; GFX12-NEXT: v_pk_sub_i16 v12, 0, v7
935 ; GFX12-NEXT: v_pk_max_i16 v0, v0, v8
936 ; GFX12-NEXT: v_pk_max_i16 v1, v1, v9
937 ; GFX12-NEXT: v_pk_max_i16 v2, v2, v10
938 ; GFX12-NEXT: v_pk_sub_i16 v8, 0, v3
939 ; GFX12-NEXT: v_pk_sub_i16 v9, 0, v4
940 ; GFX12-NEXT: v_pk_sub_i16 v10, 0, v5
941 ; GFX12-NEXT: v_pk_max_i16 v6, v6, v11
942 ; GFX12-NEXT: v_pk_max_i16 v7, v7, v12
943 ; GFX12-NEXT: v_pk_max_i16 v3, v3, v8
944 ; GFX12-NEXT: v_pk_max_i16 v4, v4, v9
945 ; GFX12-NEXT: v_pk_max_i16 v5, v5, v10
946 ; GFX12-NEXT: s_setpc_b64 s[30:31]
947 %res = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %arg, i1 false)
951 define <32 x i16> @v_abs_v32i16(<32 x i16> %arg) {
952 ; GFX6-LABEL: v_abs_v32i16:
954 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
955 ; GFX6-NEXT: v_bfe_i32 v28, v28, 0, 16
956 ; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v28
957 ; GFX6-NEXT: v_bfe_i32 v29, v29, 0, 16
958 ; GFX6-NEXT: v_max_i32_e32 v28, v28, v31
959 ; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v29
960 ; GFX6-NEXT: v_bfe_i32 v30, v30, 0, 16
961 ; GFX6-NEXT: v_max_i32_e32 v29, v29, v31
962 ; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v30
963 ; GFX6-NEXT: v_bfe_i32 v26, v26, 0, 16
964 ; GFX6-NEXT: v_max_i32_e32 v30, v30, v31
965 ; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v26
966 ; GFX6-NEXT: v_bfe_i32 v27, v27, 0, 16
967 ; GFX6-NEXT: v_max_i32_e32 v26, v26, v31
968 ; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v27
969 ; GFX6-NEXT: v_bfe_i32 v24, v24, 0, 16
970 ; GFX6-NEXT: v_max_i32_e32 v27, v27, v31
971 ; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v24
972 ; GFX6-NEXT: v_bfe_i32 v25, v25, 0, 16
973 ; GFX6-NEXT: v_max_i32_e32 v24, v24, v31
974 ; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v25
975 ; GFX6-NEXT: v_bfe_i32 v22, v22, 0, 16
976 ; GFX6-NEXT: v_max_i32_e32 v25, v25, v31
977 ; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v22
978 ; GFX6-NEXT: v_bfe_i32 v23, v23, 0, 16
979 ; GFX6-NEXT: v_max_i32_e32 v22, v22, v31
980 ; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v23
981 ; GFX6-NEXT: v_max_i32_e32 v23, v23, v31
982 ; GFX6-NEXT: buffer_load_dword v31, off, s[0:3], s32
983 ; GFX6-NEXT: v_lshlrev_b32_e32 v23, 16, v23
984 ; GFX6-NEXT: v_lshlrev_b32_e32 v25, 16, v25
985 ; GFX6-NEXT: v_or_b32_e32 v22, v22, v23
986 ; GFX6-NEXT: v_or_b32_e32 v24, v24, v25
987 ; GFX6-NEXT: v_bfe_i32 v21, v21, 0, 16
988 ; GFX6-NEXT: v_bfe_i32 v20, v20, 0, 16
989 ; GFX6-NEXT: v_lshlrev_b32_e32 v29, 16, v29
990 ; GFX6-NEXT: v_or_b32_e32 v28, v28, v29
991 ; GFX6-NEXT: v_sub_i32_e32 v29, vcc, 0, v20
992 ; GFX6-NEXT: v_max_i32_e32 v20, v20, v29
993 ; GFX6-NEXT: v_bfe_i32 v18, v18, 0, 16
994 ; GFX6-NEXT: v_bfe_i32 v19, v19, 0, 16
995 ; GFX6-NEXT: v_bfe_i32 v16, v16, 0, 16
996 ; GFX6-NEXT: v_bfe_i32 v17, v17, 0, 16
997 ; GFX6-NEXT: v_bfe_i32 v14, v14, 0, 16
998 ; GFX6-NEXT: v_bfe_i32 v15, v15, 0, 16
999 ; GFX6-NEXT: v_bfe_i32 v12, v12, 0, 16
1000 ; GFX6-NEXT: v_bfe_i32 v13, v13, 0, 16
1001 ; GFX6-NEXT: v_bfe_i32 v10, v10, 0, 16
1002 ; GFX6-NEXT: v_bfe_i32 v11, v11, 0, 16
1003 ; GFX6-NEXT: v_bfe_i32 v8, v8, 0, 16
1004 ; GFX6-NEXT: v_bfe_i32 v9, v9, 0, 16
1005 ; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 16
1006 ; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 16
1007 ; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16
1008 ; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16
1009 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
1010 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16
1011 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
1012 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
1013 ; GFX6-NEXT: v_lshlrev_b32_e32 v27, 16, v27
1014 ; GFX6-NEXT: v_or_b32_e32 v26, v26, v27
1015 ; GFX6-NEXT: v_lshrrev_b32_e32 v27, 16, v26
1016 ; GFX6-NEXT: s_waitcnt vmcnt(0)
1017 ; GFX6-NEXT: v_bfe_i32 v23, v31, 0, 16
1018 ; GFX6-NEXT: v_sub_i32_e32 v25, vcc, 0, v23
1019 ; GFX6-NEXT: v_max_i32_e32 v23, v23, v25
1020 ; GFX6-NEXT: v_lshlrev_b32_e32 v23, 16, v23
1021 ; GFX6-NEXT: v_or_b32_e32 v30, v30, v23
1022 ; GFX6-NEXT: v_sub_i32_e32 v23, vcc, 0, v21
1023 ; GFX6-NEXT: v_max_i32_e32 v21, v21, v23
1024 ; GFX6-NEXT: v_lshlrev_b32_e32 v21, 16, v21
1025 ; GFX6-NEXT: v_or_b32_e32 v20, v20, v21
1026 ; GFX6-NEXT: v_sub_i32_e32 v21, vcc, 0, v18
1027 ; GFX6-NEXT: v_max_i32_e32 v18, v18, v21
1028 ; GFX6-NEXT: v_sub_i32_e32 v21, vcc, 0, v19
1029 ; GFX6-NEXT: v_max_i32_e32 v19, v19, v21
1030 ; GFX6-NEXT: v_lshlrev_b32_e32 v19, 16, v19
1031 ; GFX6-NEXT: v_or_b32_e32 v18, v18, v19
1032 ; GFX6-NEXT: v_sub_i32_e32 v19, vcc, 0, v16
1033 ; GFX6-NEXT: v_max_i32_e32 v16, v16, v19
1034 ; GFX6-NEXT: v_sub_i32_e32 v19, vcc, 0, v17
1035 ; GFX6-NEXT: v_max_i32_e32 v17, v17, v19
1036 ; GFX6-NEXT: v_lshlrev_b32_e32 v17, 16, v17
1037 ; GFX6-NEXT: v_or_b32_e32 v16, v16, v17
1038 ; GFX6-NEXT: v_sub_i32_e32 v17, vcc, 0, v14
1039 ; GFX6-NEXT: v_max_i32_e32 v14, v14, v17
1040 ; GFX6-NEXT: v_sub_i32_e32 v17, vcc, 0, v15
1041 ; GFX6-NEXT: v_max_i32_e32 v15, v15, v17
1042 ; GFX6-NEXT: v_lshlrev_b32_e32 v15, 16, v15
1043 ; GFX6-NEXT: v_or_b32_e32 v14, v14, v15
1044 ; GFX6-NEXT: v_sub_i32_e32 v15, vcc, 0, v12
1045 ; GFX6-NEXT: v_max_i32_e32 v12, v12, v15
1046 ; GFX6-NEXT: v_sub_i32_e32 v15, vcc, 0, v13
1047 ; GFX6-NEXT: v_max_i32_e32 v13, v13, v15
1048 ; GFX6-NEXT: v_lshlrev_b32_e32 v13, 16, v13
1049 ; GFX6-NEXT: v_or_b32_e32 v12, v12, v13
1050 ; GFX6-NEXT: v_sub_i32_e32 v13, vcc, 0, v10
1051 ; GFX6-NEXT: v_max_i32_e32 v10, v10, v13
1052 ; GFX6-NEXT: v_sub_i32_e32 v13, vcc, 0, v11
1053 ; GFX6-NEXT: v_max_i32_e32 v11, v11, v13
1054 ; GFX6-NEXT: v_lshlrev_b32_e32 v11, 16, v11
1055 ; GFX6-NEXT: v_or_b32_e32 v10, v10, v11
1056 ; GFX6-NEXT: v_sub_i32_e32 v11, vcc, 0, v8
1057 ; GFX6-NEXT: v_max_i32_e32 v8, v8, v11
1058 ; GFX6-NEXT: v_sub_i32_e32 v11, vcc, 0, v9
1059 ; GFX6-NEXT: v_max_i32_e32 v9, v9, v11
1060 ; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v9
1061 ; GFX6-NEXT: v_or_b32_e32 v8, v8, v9
1062 ; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 0, v6
1063 ; GFX6-NEXT: v_max_i32_e32 v6, v6, v9
1064 ; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 0, v7
1065 ; GFX6-NEXT: v_max_i32_e32 v7, v7, v9
1066 ; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7
1067 ; GFX6-NEXT: v_or_b32_e32 v6, v6, v7
1068 ; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 0, v4
1069 ; GFX6-NEXT: v_max_i32_e32 v4, v4, v7
1070 ; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 0, v5
1071 ; GFX6-NEXT: v_max_i32_e32 v5, v5, v7
1072 ; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5
1073 ; GFX6-NEXT: v_or_b32_e32 v4, v4, v5
1074 ; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
1075 ; GFX6-NEXT: v_max_i32_e32 v2, v2, v5
1076 ; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
1077 ; GFX6-NEXT: v_max_i32_e32 v3, v3, v5
1078 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1079 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
1080 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
1081 ; GFX6-NEXT: v_max_i32_e32 v0, v0, v3
1082 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
1083 ; GFX6-NEXT: v_max_i32_e32 v1, v1, v3
1084 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1085 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
1086 ; GFX6-NEXT: v_alignbit_b32 v1, v2, v0, 16
1087 ; GFX6-NEXT: v_alignbit_b32 v5, v6, v4, 16
1088 ; GFX6-NEXT: v_alignbit_b32 v9, v10, v8, 16
1089 ; GFX6-NEXT: v_alignbit_b32 v13, v14, v12, 16
1090 ; GFX6-NEXT: v_alignbit_b32 v17, v18, v16, 16
1091 ; GFX6-NEXT: v_alignbit_b32 v21, v22, v20, 16
1092 ; GFX6-NEXT: v_alignbit_b32 v25, v26, v24, 16
1093 ; GFX6-NEXT: v_alignbit_b32 v29, v30, v28, 16
1094 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
1095 ; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v6
1096 ; GFX6-NEXT: v_lshrrev_b32_e32 v11, 16, v10
1097 ; GFX6-NEXT: v_lshrrev_b32_e32 v15, 16, v14
1098 ; GFX6-NEXT: v_lshrrev_b32_e32 v19, 16, v18
1099 ; GFX6-NEXT: v_lshrrev_b32_e32 v23, 16, v22
1100 ; GFX6-NEXT: v_lshrrev_b32_e32 v31, 16, v30
1101 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1103 ; GFX7-LABEL: v_abs_v32i16:
1105 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1106 ; GFX7-NEXT: v_bfe_i32 v28, v28, 0, 16
1107 ; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v28
1108 ; GFX7-NEXT: v_bfe_i32 v29, v29, 0, 16
1109 ; GFX7-NEXT: v_max_i32_e32 v28, v28, v31
1110 ; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v29
1111 ; GFX7-NEXT: v_bfe_i32 v30, v30, 0, 16
1112 ; GFX7-NEXT: v_max_i32_e32 v29, v29, v31
1113 ; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v30
1114 ; GFX7-NEXT: v_bfe_i32 v26, v26, 0, 16
1115 ; GFX7-NEXT: v_max_i32_e32 v30, v30, v31
1116 ; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v26
1117 ; GFX7-NEXT: v_bfe_i32 v27, v27, 0, 16
1118 ; GFX7-NEXT: v_max_i32_e32 v26, v26, v31
1119 ; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v27
1120 ; GFX7-NEXT: v_bfe_i32 v24, v24, 0, 16
1121 ; GFX7-NEXT: v_max_i32_e32 v27, v27, v31
1122 ; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v24
1123 ; GFX7-NEXT: v_bfe_i32 v25, v25, 0, 16
1124 ; GFX7-NEXT: v_max_i32_e32 v24, v24, v31
1125 ; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v25
1126 ; GFX7-NEXT: v_bfe_i32 v22, v22, 0, 16
1127 ; GFX7-NEXT: v_max_i32_e32 v25, v25, v31
1128 ; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v22
1129 ; GFX7-NEXT: v_bfe_i32 v23, v23, 0, 16
1130 ; GFX7-NEXT: v_max_i32_e32 v22, v22, v31
1131 ; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v23
1132 ; GFX7-NEXT: v_max_i32_e32 v23, v23, v31
1133 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32
1134 ; GFX7-NEXT: v_lshlrev_b32_e32 v23, 16, v23
1135 ; GFX7-NEXT: v_lshlrev_b32_e32 v25, 16, v25
1136 ; GFX7-NEXT: v_or_b32_e32 v22, v22, v23
1137 ; GFX7-NEXT: v_or_b32_e32 v24, v24, v25
1138 ; GFX7-NEXT: v_bfe_i32 v21, v21, 0, 16
1139 ; GFX7-NEXT: v_bfe_i32 v20, v20, 0, 16
1140 ; GFX7-NEXT: v_lshlrev_b32_e32 v29, 16, v29
1141 ; GFX7-NEXT: v_or_b32_e32 v28, v28, v29
1142 ; GFX7-NEXT: v_sub_i32_e32 v29, vcc, 0, v20
1143 ; GFX7-NEXT: v_max_i32_e32 v20, v20, v29
1144 ; GFX7-NEXT: v_bfe_i32 v18, v18, 0, 16
1145 ; GFX7-NEXT: v_bfe_i32 v19, v19, 0, 16
1146 ; GFX7-NEXT: v_bfe_i32 v16, v16, 0, 16
1147 ; GFX7-NEXT: v_bfe_i32 v17, v17, 0, 16
1148 ; GFX7-NEXT: v_bfe_i32 v14, v14, 0, 16
1149 ; GFX7-NEXT: v_bfe_i32 v15, v15, 0, 16
1150 ; GFX7-NEXT: v_bfe_i32 v12, v12, 0, 16
1151 ; GFX7-NEXT: v_bfe_i32 v13, v13, 0, 16
1152 ; GFX7-NEXT: v_bfe_i32 v10, v10, 0, 16
1153 ; GFX7-NEXT: v_bfe_i32 v11, v11, 0, 16
1154 ; GFX7-NEXT: v_bfe_i32 v8, v8, 0, 16
1155 ; GFX7-NEXT: v_bfe_i32 v9, v9, 0, 16
1156 ; GFX7-NEXT: v_bfe_i32 v6, v6, 0, 16
1157 ; GFX7-NEXT: v_bfe_i32 v7, v7, 0, 16
1158 ; GFX7-NEXT: v_bfe_i32 v4, v4, 0, 16
1159 ; GFX7-NEXT: v_bfe_i32 v5, v5, 0, 16
1160 ; GFX7-NEXT: v_bfe_i32 v2, v2, 0, 16
1161 ; GFX7-NEXT: v_bfe_i32 v3, v3, 0, 16
1162 ; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16
1163 ; GFX7-NEXT: v_bfe_i32 v1, v1, 0, 16
1164 ; GFX7-NEXT: v_lshlrev_b32_e32 v27, 16, v27
1165 ; GFX7-NEXT: v_or_b32_e32 v26, v26, v27
1166 ; GFX7-NEXT: v_lshrrev_b32_e32 v27, 16, v26
1167 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1168 ; GFX7-NEXT: v_bfe_i32 v23, v31, 0, 16
1169 ; GFX7-NEXT: v_sub_i32_e32 v25, vcc, 0, v23
1170 ; GFX7-NEXT: v_max_i32_e32 v23, v23, v25
1171 ; GFX7-NEXT: v_lshlrev_b32_e32 v23, 16, v23
1172 ; GFX7-NEXT: v_or_b32_e32 v30, v30, v23
1173 ; GFX7-NEXT: v_sub_i32_e32 v23, vcc, 0, v21
1174 ; GFX7-NEXT: v_max_i32_e32 v21, v21, v23
1175 ; GFX7-NEXT: v_lshlrev_b32_e32 v21, 16, v21
1176 ; GFX7-NEXT: v_or_b32_e32 v20, v20, v21
1177 ; GFX7-NEXT: v_sub_i32_e32 v21, vcc, 0, v18
1178 ; GFX7-NEXT: v_max_i32_e32 v18, v18, v21
1179 ; GFX7-NEXT: v_sub_i32_e32 v21, vcc, 0, v19
1180 ; GFX7-NEXT: v_max_i32_e32 v19, v19, v21
1181 ; GFX7-NEXT: v_lshlrev_b32_e32 v19, 16, v19
1182 ; GFX7-NEXT: v_or_b32_e32 v18, v18, v19
1183 ; GFX7-NEXT: v_sub_i32_e32 v19, vcc, 0, v16
1184 ; GFX7-NEXT: v_max_i32_e32 v16, v16, v19
1185 ; GFX7-NEXT: v_sub_i32_e32 v19, vcc, 0, v17
1186 ; GFX7-NEXT: v_max_i32_e32 v17, v17, v19
1187 ; GFX7-NEXT: v_lshlrev_b32_e32 v17, 16, v17
1188 ; GFX7-NEXT: v_or_b32_e32 v16, v16, v17
1189 ; GFX7-NEXT: v_sub_i32_e32 v17, vcc, 0, v14
1190 ; GFX7-NEXT: v_max_i32_e32 v14, v14, v17
1191 ; GFX7-NEXT: v_sub_i32_e32 v17, vcc, 0, v15
1192 ; GFX7-NEXT: v_max_i32_e32 v15, v15, v17
1193 ; GFX7-NEXT: v_lshlrev_b32_e32 v15, 16, v15
1194 ; GFX7-NEXT: v_or_b32_e32 v14, v14, v15
1195 ; GFX7-NEXT: v_sub_i32_e32 v15, vcc, 0, v12
1196 ; GFX7-NEXT: v_max_i32_e32 v12, v12, v15
1197 ; GFX7-NEXT: v_sub_i32_e32 v15, vcc, 0, v13
1198 ; GFX7-NEXT: v_max_i32_e32 v13, v13, v15
1199 ; GFX7-NEXT: v_lshlrev_b32_e32 v13, 16, v13
1200 ; GFX7-NEXT: v_or_b32_e32 v12, v12, v13
1201 ; GFX7-NEXT: v_sub_i32_e32 v13, vcc, 0, v10
1202 ; GFX7-NEXT: v_max_i32_e32 v10, v10, v13
1203 ; GFX7-NEXT: v_sub_i32_e32 v13, vcc, 0, v11
1204 ; GFX7-NEXT: v_max_i32_e32 v11, v11, v13
1205 ; GFX7-NEXT: v_lshlrev_b32_e32 v11, 16, v11
1206 ; GFX7-NEXT: v_or_b32_e32 v10, v10, v11
1207 ; GFX7-NEXT: v_sub_i32_e32 v11, vcc, 0, v8
1208 ; GFX7-NEXT: v_max_i32_e32 v8, v8, v11
1209 ; GFX7-NEXT: v_sub_i32_e32 v11, vcc, 0, v9
1210 ; GFX7-NEXT: v_max_i32_e32 v9, v9, v11
1211 ; GFX7-NEXT: v_lshlrev_b32_e32 v9, 16, v9
1212 ; GFX7-NEXT: v_or_b32_e32 v8, v8, v9
1213 ; GFX7-NEXT: v_sub_i32_e32 v9, vcc, 0, v6
1214 ; GFX7-NEXT: v_max_i32_e32 v6, v6, v9
1215 ; GFX7-NEXT: v_sub_i32_e32 v9, vcc, 0, v7
1216 ; GFX7-NEXT: v_max_i32_e32 v7, v7, v9
1217 ; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7
1218 ; GFX7-NEXT: v_or_b32_e32 v6, v6, v7
1219 ; GFX7-NEXT: v_sub_i32_e32 v7, vcc, 0, v4
1220 ; GFX7-NEXT: v_max_i32_e32 v4, v4, v7
1221 ; GFX7-NEXT: v_sub_i32_e32 v7, vcc, 0, v5
1222 ; GFX7-NEXT: v_max_i32_e32 v5, v5, v7
1223 ; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5
1224 ; GFX7-NEXT: v_or_b32_e32 v4, v4, v5
1225 ; GFX7-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
1226 ; GFX7-NEXT: v_max_i32_e32 v2, v2, v5
1227 ; GFX7-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
1228 ; GFX7-NEXT: v_max_i32_e32 v3, v3, v5
1229 ; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1230 ; GFX7-NEXT: v_or_b32_e32 v2, v2, v3
1231 ; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
1232 ; GFX7-NEXT: v_max_i32_e32 v0, v0, v3
1233 ; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
1234 ; GFX7-NEXT: v_max_i32_e32 v1, v1, v3
1235 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1236 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
1237 ; GFX7-NEXT: v_alignbit_b32 v1, v2, v0, 16
1238 ; GFX7-NEXT: v_alignbit_b32 v5, v6, v4, 16
1239 ; GFX7-NEXT: v_alignbit_b32 v9, v10, v8, 16
1240 ; GFX7-NEXT: v_alignbit_b32 v13, v14, v12, 16
1241 ; GFX7-NEXT: v_alignbit_b32 v17, v18, v16, 16
1242 ; GFX7-NEXT: v_alignbit_b32 v21, v22, v20, 16
1243 ; GFX7-NEXT: v_alignbit_b32 v25, v26, v24, 16
1244 ; GFX7-NEXT: v_alignbit_b32 v29, v30, v28, 16
1245 ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v2
1246 ; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v6
1247 ; GFX7-NEXT: v_lshrrev_b32_e32 v11, 16, v10
1248 ; GFX7-NEXT: v_lshrrev_b32_e32 v15, 16, v14
1249 ; GFX7-NEXT: v_lshrrev_b32_e32 v19, 16, v18
1250 ; GFX7-NEXT: v_lshrrev_b32_e32 v23, 16, v22
1251 ; GFX7-NEXT: v_lshrrev_b32_e32 v31, 16, v30
1252 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1254 ; GFX8-LABEL: v_abs_v32i16:
1256 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1257 ; GFX8-NEXT: v_mov_b32_e32 v16, 0
1258 ; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1259 ; GFX8-NEXT: v_sub_u16_e32 v20, 0, v0
1260 ; GFX8-NEXT: v_max_i16_sdwa v19, v0, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1261 ; GFX8-NEXT: v_max_i16_e32 v0, v0, v20
1262 ; GFX8-NEXT: v_sub_u16_sdwa v20, v16, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1263 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v19
1264 ; GFX8-NEXT: v_sub_u16_e32 v19, 0, v1
1265 ; GFX8-NEXT: v_max_i16_sdwa v20, v1, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1266 ; GFX8-NEXT: v_max_i16_e32 v1, v1, v19
1267 ; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1268 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v20
1269 ; GFX8-NEXT: v_sub_u16_e32 v20, 0, v2
1270 ; GFX8-NEXT: v_max_i16_sdwa v19, v2, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1271 ; GFX8-NEXT: v_max_i16_e32 v2, v2, v20
1272 ; GFX8-NEXT: v_sub_u16_sdwa v20, v16, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1273 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v19
1274 ; GFX8-NEXT: v_sub_u16_e32 v19, 0, v3
1275 ; GFX8-NEXT: v_max_i16_sdwa v20, v3, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1276 ; GFX8-NEXT: v_max_i16_e32 v3, v3, v19
1277 ; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1278 ; GFX8-NEXT: v_or_b32_e32 v3, v3, v20
1279 ; GFX8-NEXT: v_sub_u16_e32 v20, 0, v4
1280 ; GFX8-NEXT: v_max_i16_sdwa v19, v4, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1281 ; GFX8-NEXT: v_max_i16_e32 v4, v4, v20
1282 ; GFX8-NEXT: v_sub_u16_sdwa v20, v16, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1283 ; GFX8-NEXT: v_or_b32_e32 v4, v4, v19
1284 ; GFX8-NEXT: v_sub_u16_e32 v19, 0, v5
1285 ; GFX8-NEXT: v_max_i16_sdwa v20, v5, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1286 ; GFX8-NEXT: v_max_i16_e32 v5, v5, v19
1287 ; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1288 ; GFX8-NEXT: v_or_b32_e32 v5, v5, v20
1289 ; GFX8-NEXT: v_sub_u16_e32 v20, 0, v6
1290 ; GFX8-NEXT: v_max_i16_sdwa v19, v6, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1291 ; GFX8-NEXT: v_max_i16_e32 v6, v6, v20
1292 ; GFX8-NEXT: v_sub_u16_sdwa v20, v16, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1293 ; GFX8-NEXT: v_or_b32_e32 v6, v6, v19
1294 ; GFX8-NEXT: v_sub_u16_e32 v19, 0, v7
1295 ; GFX8-NEXT: v_max_i16_sdwa v20, v7, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1296 ; GFX8-NEXT: v_max_i16_e32 v7, v7, v19
1297 ; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1298 ; GFX8-NEXT: v_or_b32_e32 v7, v7, v20
1299 ; GFX8-NEXT: v_sub_u16_e32 v20, 0, v8
1300 ; GFX8-NEXT: v_max_i16_sdwa v19, v8, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1301 ; GFX8-NEXT: v_max_i16_e32 v8, v8, v20
1302 ; GFX8-NEXT: v_sub_u16_sdwa v20, v16, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1303 ; GFX8-NEXT: v_or_b32_e32 v8, v8, v19
1304 ; GFX8-NEXT: v_sub_u16_e32 v19, 0, v9
1305 ; GFX8-NEXT: v_max_i16_sdwa v20, v9, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1306 ; GFX8-NEXT: v_max_i16_e32 v9, v9, v19
1307 ; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1308 ; GFX8-NEXT: v_or_b32_e32 v9, v9, v20
1309 ; GFX8-NEXT: v_sub_u16_e32 v20, 0, v10
1310 ; GFX8-NEXT: v_max_i16_sdwa v19, v10, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1311 ; GFX8-NEXT: v_max_i16_e32 v10, v10, v20
1312 ; GFX8-NEXT: v_sub_u16_sdwa v20, v16, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1313 ; GFX8-NEXT: v_or_b32_e32 v10, v10, v19
1314 ; GFX8-NEXT: v_sub_u16_e32 v19, 0, v11
1315 ; GFX8-NEXT: v_max_i16_sdwa v20, v11, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1316 ; GFX8-NEXT: v_max_i16_e32 v11, v11, v19
1317 ; GFX8-NEXT: v_sub_u16_sdwa v17, v16, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1318 ; GFX8-NEXT: v_sub_u16_sdwa v18, v16, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1319 ; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1320 ; GFX8-NEXT: v_sub_u16_sdwa v16, v16, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1321 ; GFX8-NEXT: v_or_b32_e32 v11, v11, v20
1322 ; GFX8-NEXT: v_sub_u16_e32 v20, 0, v12
1323 ; GFX8-NEXT: v_max_i16_sdwa v16, v12, v16 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1324 ; GFX8-NEXT: v_max_i16_e32 v12, v12, v20
1325 ; GFX8-NEXT: v_or_b32_e32 v12, v12, v16
1326 ; GFX8-NEXT: v_sub_u16_e32 v16, 0, v13
1327 ; GFX8-NEXT: v_max_i16_sdwa v19, v13, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1328 ; GFX8-NEXT: v_sub_u16_e32 v20, 0, v15
1329 ; GFX8-NEXT: v_max_i16_e32 v13, v13, v16
1330 ; GFX8-NEXT: v_sub_u16_e32 v16, 0, v14
1331 ; GFX8-NEXT: v_max_i16_sdwa v17, v15, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1332 ; GFX8-NEXT: v_max_i16_sdwa v18, v14, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1333 ; GFX8-NEXT: v_max_i16_e32 v14, v14, v16
1334 ; GFX8-NEXT: v_max_i16_e32 v15, v15, v20
1335 ; GFX8-NEXT: v_or_b32_e32 v13, v13, v19
1336 ; GFX8-NEXT: v_or_b32_e32 v14, v14, v18
1337 ; GFX8-NEXT: v_or_b32_e32 v15, v15, v17
1338 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1340 ; GFX9-LABEL: v_abs_v32i16:
1342 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1343 ; GFX9-NEXT: v_pk_sub_i16 v16, 0, v0
1344 ; GFX9-NEXT: v_pk_max_i16 v0, v0, v16
1345 ; GFX9-NEXT: v_pk_sub_i16 v16, 0, v1
1346 ; GFX9-NEXT: v_pk_max_i16 v1, v1, v16
1347 ; GFX9-NEXT: v_pk_sub_i16 v16, 0, v2
1348 ; GFX9-NEXT: v_pk_max_i16 v2, v2, v16
1349 ; GFX9-NEXT: v_pk_sub_i16 v16, 0, v3
1350 ; GFX9-NEXT: v_pk_max_i16 v3, v3, v16
1351 ; GFX9-NEXT: v_pk_sub_i16 v16, 0, v4
1352 ; GFX9-NEXT: v_pk_max_i16 v4, v4, v16
1353 ; GFX9-NEXT: v_pk_sub_i16 v16, 0, v5
1354 ; GFX9-NEXT: v_pk_max_i16 v5, v5, v16
1355 ; GFX9-NEXT: v_pk_sub_i16 v16, 0, v6
1356 ; GFX9-NEXT: v_pk_max_i16 v6, v6, v16
1357 ; GFX9-NEXT: v_pk_sub_i16 v16, 0, v7
1358 ; GFX9-NEXT: v_pk_max_i16 v7, v7, v16
1359 ; GFX9-NEXT: v_pk_sub_i16 v16, 0, v8
1360 ; GFX9-NEXT: v_pk_max_i16 v8, v8, v16
1361 ; GFX9-NEXT: v_pk_sub_i16 v16, 0, v9
1362 ; GFX9-NEXT: v_pk_max_i16 v9, v9, v16
1363 ; GFX9-NEXT: v_pk_sub_i16 v16, 0, v10
1364 ; GFX9-NEXT: v_pk_max_i16 v10, v10, v16
1365 ; GFX9-NEXT: v_pk_sub_i16 v16, 0, v11
1366 ; GFX9-NEXT: v_pk_max_i16 v11, v11, v16
1367 ; GFX9-NEXT: v_pk_sub_i16 v16, 0, v12
1368 ; GFX9-NEXT: v_pk_max_i16 v12, v12, v16
1369 ; GFX9-NEXT: v_pk_sub_i16 v16, 0, v13
1370 ; GFX9-NEXT: v_pk_max_i16 v13, v13, v16
1371 ; GFX9-NEXT: v_pk_sub_i16 v16, 0, v14
1372 ; GFX9-NEXT: v_pk_max_i16 v14, v14, v16
1373 ; GFX9-NEXT: v_pk_sub_i16 v16, 0, v15
1374 ; GFX9-NEXT: v_pk_max_i16 v15, v15, v16
1375 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1377 ; GFX10-LABEL: v_abs_v32i16:
1379 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1380 ; GFX10-NEXT: v_pk_sub_i16 v16, 0, v0
1381 ; GFX10-NEXT: v_pk_sub_i16 v17, 0, v2
1382 ; GFX10-NEXT: v_pk_sub_i16 v18, 0, v3
1383 ; GFX10-NEXT: v_pk_sub_i16 v19, 0, v4
1384 ; GFX10-NEXT: v_pk_sub_i16 v20, 0, v5
1385 ; GFX10-NEXT: v_pk_max_i16 v0, v0, v16
1386 ; GFX10-NEXT: v_pk_sub_i16 v16, 0, v1
1387 ; GFX10-NEXT: v_pk_max_i16 v2, v2, v17
1388 ; GFX10-NEXT: v_pk_max_i16 v3, v3, v18
1389 ; GFX10-NEXT: v_pk_max_i16 v4, v4, v19
1390 ; GFX10-NEXT: v_pk_max_i16 v5, v5, v20
1391 ; GFX10-NEXT: v_pk_max_i16 v1, v1, v16
1392 ; GFX10-NEXT: v_pk_sub_i16 v16, 0, v6
1393 ; GFX10-NEXT: v_pk_sub_i16 v17, 0, v7
1394 ; GFX10-NEXT: v_pk_sub_i16 v18, 0, v8
1395 ; GFX10-NEXT: v_pk_sub_i16 v19, 0, v9
1396 ; GFX10-NEXT: v_pk_sub_i16 v20, 0, v10
1397 ; GFX10-NEXT: v_pk_max_i16 v6, v6, v16
1398 ; GFX10-NEXT: v_pk_max_i16 v7, v7, v17
1399 ; GFX10-NEXT: v_pk_max_i16 v8, v8, v18
1400 ; GFX10-NEXT: v_pk_max_i16 v9, v9, v19
1401 ; GFX10-NEXT: v_pk_max_i16 v10, v10, v20
1402 ; GFX10-NEXT: v_pk_sub_i16 v16, 0, v11
1403 ; GFX10-NEXT: v_pk_sub_i16 v17, 0, v12
1404 ; GFX10-NEXT: v_pk_sub_i16 v18, 0, v13
1405 ; GFX10-NEXT: v_pk_sub_i16 v19, 0, v14
1406 ; GFX10-NEXT: v_pk_sub_i16 v20, 0, v15
1407 ; GFX10-NEXT: v_pk_max_i16 v11, v11, v16
1408 ; GFX10-NEXT: v_pk_max_i16 v12, v12, v17
1409 ; GFX10-NEXT: v_pk_max_i16 v13, v13, v18
1410 ; GFX10-NEXT: v_pk_max_i16 v14, v14, v19
1411 ; GFX10-NEXT: v_pk_max_i16 v15, v15, v20
1412 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1414 ; GFX11-LABEL: v_abs_v32i16:
1416 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1417 ; GFX11-NEXT: v_pk_sub_i16 v16, 0, v0
1418 ; GFX11-NEXT: v_pk_sub_i16 v17, 0, v2
1419 ; GFX11-NEXT: v_pk_sub_i16 v18, 0, v3
1420 ; GFX11-NEXT: v_pk_sub_i16 v19, 0, v4
1421 ; GFX11-NEXT: v_pk_sub_i16 v20, 0, v5
1422 ; GFX11-NEXT: v_pk_max_i16 v0, v0, v16
1423 ; GFX11-NEXT: v_pk_sub_i16 v16, 0, v1
1424 ; GFX11-NEXT: v_pk_max_i16 v2, v2, v17
1425 ; GFX11-NEXT: v_pk_max_i16 v3, v3, v18
1426 ; GFX11-NEXT: v_pk_max_i16 v4, v4, v19
1427 ; GFX11-NEXT: v_pk_max_i16 v5, v5, v20
1428 ; GFX11-NEXT: v_pk_max_i16 v1, v1, v16
1429 ; GFX11-NEXT: v_pk_sub_i16 v16, 0, v6
1430 ; GFX11-NEXT: v_pk_sub_i16 v17, 0, v7
1431 ; GFX11-NEXT: v_pk_sub_i16 v18, 0, v8
1432 ; GFX11-NEXT: v_pk_sub_i16 v19, 0, v9
1433 ; GFX11-NEXT: v_pk_sub_i16 v20, 0, v10
1434 ; GFX11-NEXT: v_pk_max_i16 v6, v6, v16
1435 ; GFX11-NEXT: v_pk_max_i16 v7, v7, v17
1436 ; GFX11-NEXT: v_pk_max_i16 v8, v8, v18
1437 ; GFX11-NEXT: v_pk_max_i16 v9, v9, v19
1438 ; GFX11-NEXT: v_pk_max_i16 v10, v10, v20
1439 ; GFX11-NEXT: v_pk_sub_i16 v16, 0, v11
1440 ; GFX11-NEXT: v_pk_sub_i16 v17, 0, v12
1441 ; GFX11-NEXT: v_pk_sub_i16 v18, 0, v13
1442 ; GFX11-NEXT: v_pk_sub_i16 v19, 0, v14
1443 ; GFX11-NEXT: v_pk_sub_i16 v20, 0, v15
1444 ; GFX11-NEXT: v_pk_max_i16 v11, v11, v16
1445 ; GFX11-NEXT: v_pk_max_i16 v12, v12, v17
1446 ; GFX11-NEXT: v_pk_max_i16 v13, v13, v18
1447 ; GFX11-NEXT: v_pk_max_i16 v14, v14, v19
1448 ; GFX11-NEXT: v_pk_max_i16 v15, v15, v20
1449 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1451 ; GFX12-LABEL: v_abs_v32i16:
1453 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1454 ; GFX12-NEXT: s_wait_expcnt 0x0
1455 ; GFX12-NEXT: s_wait_samplecnt 0x0
1456 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1457 ; GFX12-NEXT: s_wait_kmcnt 0x0
1458 ; GFX12-NEXT: v_pk_sub_i16 v16, 0, v0
1459 ; GFX12-NEXT: v_pk_sub_i16 v17, 0, v2
1460 ; GFX12-NEXT: v_pk_sub_i16 v18, 0, v3
1461 ; GFX12-NEXT: v_pk_sub_i16 v19, 0, v4
1462 ; GFX12-NEXT: v_pk_sub_i16 v20, 0, v5
1463 ; GFX12-NEXT: v_pk_max_i16 v0, v0, v16
1464 ; GFX12-NEXT: v_pk_sub_i16 v16, 0, v1
1465 ; GFX12-NEXT: v_pk_max_i16 v2, v2, v17
1466 ; GFX12-NEXT: v_pk_max_i16 v3, v3, v18
1467 ; GFX12-NEXT: v_pk_max_i16 v4, v4, v19
1468 ; GFX12-NEXT: v_pk_max_i16 v5, v5, v20
1469 ; GFX12-NEXT: v_pk_max_i16 v1, v1, v16
1470 ; GFX12-NEXT: v_pk_sub_i16 v16, 0, v6
1471 ; GFX12-NEXT: v_pk_sub_i16 v17, 0, v7
1472 ; GFX12-NEXT: v_pk_sub_i16 v18, 0, v8
1473 ; GFX12-NEXT: v_pk_sub_i16 v19, 0, v9
1474 ; GFX12-NEXT: v_pk_sub_i16 v20, 0, v10
1475 ; GFX12-NEXT: v_pk_max_i16 v6, v6, v16
1476 ; GFX12-NEXT: v_pk_max_i16 v7, v7, v17
1477 ; GFX12-NEXT: v_pk_max_i16 v8, v8, v18
1478 ; GFX12-NEXT: v_pk_max_i16 v9, v9, v19
1479 ; GFX12-NEXT: v_pk_max_i16 v10, v10, v20
1480 ; GFX12-NEXT: v_pk_sub_i16 v16, 0, v11
1481 ; GFX12-NEXT: v_pk_sub_i16 v17, 0, v12
1482 ; GFX12-NEXT: v_pk_sub_i16 v18, 0, v13
1483 ; GFX12-NEXT: v_pk_sub_i16 v19, 0, v14
1484 ; GFX12-NEXT: v_pk_sub_i16 v20, 0, v15
1485 ; GFX12-NEXT: v_pk_max_i16 v11, v11, v16
1486 ; GFX12-NEXT: v_pk_max_i16 v12, v12, v17
1487 ; GFX12-NEXT: v_pk_max_i16 v13, v13, v18
1488 ; GFX12-NEXT: v_pk_max_i16 v14, v14, v19
1489 ; GFX12-NEXT: v_pk_max_i16 v15, v15, v20
1490 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1491 %res = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %arg, i1 false)