1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-SAFE %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI,VI-SAFE %s
4 ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefixes=GFX9,GFX9-SAFE %s
5 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-SAFE %s
7 ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=CI,CI-NSZ %s
8 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=VI,VI-NSZ %s
9 ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX9,GFX9-NSZ %s
10 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX11,GFX11-NSZ %s
12 define <2 x half> @add_select_fabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y, <2 x half> %z) {
13 ; CI-LABEL: add_select_fabs_fabs_v2f16:
15 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
17 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
18 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
19 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
20 ; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
21 ; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
22 ; CI-NEXT: v_cvt_f32_f16_e64 v3, |v3|
23 ; CI-NEXT: v_cvt_f32_f16_e64 v5, |v5|
24 ; CI-NEXT: v_cvt_f32_f16_e64 v2, |v2|
25 ; CI-NEXT: v_cvt_f32_f16_e64 v4, |v4|
26 ; CI-NEXT: v_cvt_f32_f16_e32 v7, v7
27 ; CI-NEXT: v_cvt_f32_f16_e32 v6, v6
28 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
29 ; CI-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc
30 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
31 ; CI-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
32 ; CI-NEXT: v_add_f32_e32 v0, v0, v6
33 ; CI-NEXT: v_add_f32_e32 v1, v1, v7
34 ; CI-NEXT: s_setpc_b64 s[30:31]
36 ; VI-LABEL: add_select_fabs_fabs_v2f16:
38 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39 ; VI-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
40 ; VI-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
41 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
42 ; VI-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
43 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
44 ; VI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
45 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
46 ; VI-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
47 ; VI-NEXT: v_add_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
48 ; VI-NEXT: v_add_f16_e32 v0, v0, v4
49 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
50 ; VI-NEXT: s_setpc_b64 s[30:31]
52 ; GFX9-LABEL: add_select_fabs_fabs_v2f16:
54 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55 ; GFX9-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
56 ; GFX9-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
57 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
58 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
59 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
60 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
61 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
62 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
63 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
64 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
65 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
66 ; GFX9-NEXT: s_setpc_b64 s[30:31]
68 ; GFX11-LABEL: add_select_fabs_fabs_v2f16:
70 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
71 ; GFX11-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
72 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
73 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
74 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v3
75 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
76 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2
77 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
78 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
79 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
80 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
81 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
82 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
83 ; GFX11-NEXT: v_pk_add_f16 v0, v0, v4
84 ; GFX11-NEXT: s_setpc_b64 s[30:31]
85 %cmp = icmp eq <2 x i32> %c, zeroinitializer
86 %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
87 %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
88 %select = select <2 x i1> %cmp, <2 x half> %fabs.x, <2 x half> %fabs.y
89 %add = fadd <2 x half> %select, %z
93 define { <2 x half>, <2 x half> } @add_select_multi_use_lhs_fabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y, <2 x half> %w, <2 x half> %z) {
94 ; CI-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
96 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
98 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
99 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
100 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
101 ; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
102 ; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
103 ; CI-NEXT: v_cvt_f16_f32_e32 v9, v9
104 ; CI-NEXT: v_cvt_f16_f32_e32 v8, v8
105 ; CI-NEXT: v_cvt_f32_f16_e64 v3, |v3|
106 ; CI-NEXT: v_cvt_f32_f16_e64 v5, |v5|
107 ; CI-NEXT: v_cvt_f32_f16_e64 v2, |v2|
108 ; CI-NEXT: v_cvt_f32_f16_e64 v4, |v4|
109 ; CI-NEXT: v_cvt_f32_f16_e32 v7, v7
110 ; CI-NEXT: v_cvt_f32_f16_e32 v6, v6
111 ; CI-NEXT: v_cvt_f32_f16_e32 v9, v9
112 ; CI-NEXT: v_cvt_f32_f16_e32 v8, v8
113 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
114 ; CI-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc
115 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
116 ; CI-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
117 ; CI-NEXT: v_add_f32_e32 v0, v0, v8
118 ; CI-NEXT: v_add_f32_e32 v1, v1, v9
119 ; CI-NEXT: v_add_f32_e32 v2, v2, v6
120 ; CI-NEXT: v_add_f32_e32 v3, v3, v7
121 ; CI-NEXT: s_setpc_b64 s[30:31]
123 ; VI-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
125 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126 ; VI-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
127 ; VI-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
128 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
129 ; VI-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
130 ; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v2
131 ; VI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
132 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
133 ; VI-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
134 ; VI-NEXT: v_add_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
135 ; VI-NEXT: v_add_f16_e32 v0, v0, v5
136 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
137 ; VI-NEXT: v_add_f16_sdwa v1, v6, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
138 ; VI-NEXT: v_add_f16_e32 v2, v2, v4
139 ; VI-NEXT: v_or_b32_e32 v1, v2, v1
140 ; VI-NEXT: s_setpc_b64 s[30:31]
142 ; GFX9-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
144 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145 ; GFX9-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
146 ; GFX9-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
147 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
148 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
149 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v2
150 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
151 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
152 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
153 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
154 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
155 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v5
156 ; GFX9-NEXT: v_pk_add_f16 v1, v2, v4
157 ; GFX9-NEXT: s_setpc_b64 s[30:31]
159 ; GFX11-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
161 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
162 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
163 ; GFX11-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
164 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
165 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
166 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v2
167 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v3
168 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
169 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
170 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
171 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
172 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
173 ; GFX11-NEXT: v_pk_add_f16 v1, v2, v4
174 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
175 ; GFX11-NEXT: v_pk_add_f16 v0, v0, v5
176 ; GFX11-NEXT: s_setpc_b64 s[30:31]
177 %cmp = icmp eq <2 x i32> %c, zeroinitializer
178 %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
179 %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
180 %select = select <2 x i1> %cmp, <2 x half> %fabs.x, <2 x half> %fabs.y
181 %add0 = fadd <2 x half> %select, %z
182 %add1 = fadd <2 x half> %fabs.x, %w
183 %insert.0 = insertvalue { <2 x half>, <2 x half> } poison, <2 x half> %add0, 0
184 %insert.1 = insertvalue { <2 x half>, <2 x half> } %insert.0, <2 x half> %add1, 1
185 ret { <2 x half>, <2 x half> } %insert.1
188 define { <2 x half>, <2 x half> } @add_select_multi_store_use_lhs_fabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y, <2 x half> %z) {
189 ; CI-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
191 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
192 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
193 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
194 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
195 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
196 ; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
197 ; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
198 ; CI-NEXT: v_cvt_f32_f16_e64 v3, |v3|
199 ; CI-NEXT: v_cvt_f32_f16_e64 v5, |v5|
200 ; CI-NEXT: v_cvt_f32_f16_e64 v4, |v4|
201 ; CI-NEXT: v_cvt_f32_f16_e64 v2, |v2|
202 ; CI-NEXT: v_cvt_f32_f16_e32 v7, v7
203 ; CI-NEXT: v_cvt_f32_f16_e32 v6, v6
204 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
205 ; CI-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc
206 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
207 ; CI-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
208 ; CI-NEXT: v_add_f32_e32 v0, v0, v6
209 ; CI-NEXT: v_add_f32_e32 v1, v1, v7
210 ; CI-NEXT: s_setpc_b64 s[30:31]
212 ; VI-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
214 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
215 ; VI-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
216 ; VI-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
217 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
218 ; VI-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
219 ; VI-NEXT: v_lshrrev_b32_e32 v5, 16, v2
220 ; VI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
221 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
222 ; VI-NEXT: v_cndmask_b32_e32 v1, v3, v5, vcc
223 ; VI-NEXT: v_add_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
224 ; VI-NEXT: v_add_f16_e32 v0, v0, v4
225 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
226 ; VI-NEXT: v_mov_b32_e32 v1, v2
227 ; VI-NEXT: s_setpc_b64 s[30:31]
229 ; GFX9-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
231 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232 ; GFX9-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
233 ; GFX9-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
234 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
235 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
236 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v2
237 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
238 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
239 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v5, vcc
240 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
241 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
242 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
243 ; GFX9-NEXT: v_mov_b32_e32 v1, v2
244 ; GFX9-NEXT: s_setpc_b64 s[30:31]
246 ; GFX11-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
248 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
249 ; GFX11-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
250 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
251 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
252 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v3
253 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
254 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2
255 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
256 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
257 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
258 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
259 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
260 ; GFX11-NEXT: v_mov_b32_e32 v1, v2
261 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
262 ; GFX11-NEXT: v_pk_add_f16 v0, v0, v4
263 ; GFX11-NEXT: s_setpc_b64 s[30:31]
264 %cmp = icmp eq <2 x i32> %c, zeroinitializer
265 %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
266 %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
267 %select = select <2 x i1> %cmp, <2 x half> %fabs.x, <2 x half> %fabs.y
268 %add0 = fadd <2 x half> %select, %z
269 %insert.0 = insertvalue { <2 x half>, <2 x half> } poison, <2 x half> %add0, 0
270 %insert.1 = insertvalue { <2 x half>, <2 x half> } %insert.0, <2 x half> %fabs.x, 1
271 ret { <2 x half>, <2 x half> } %insert.1
274 define { <2 x half>, <2 x half> } @add_select_multi_use_rhs_fabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y, <2 x half> %z, <2 x half> %w) {
275 ; CI-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
277 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
278 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
279 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
280 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
281 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
282 ; CI-NEXT: v_cvt_f16_f32_e32 v9, v9
283 ; CI-NEXT: v_cvt_f16_f32_e32 v8, v8
284 ; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
285 ; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
286 ; CI-NEXT: v_cvt_f32_f16_e64 v3, |v3|
287 ; CI-NEXT: v_cvt_f32_f16_e64 v5, |v5|
288 ; CI-NEXT: v_cvt_f32_f16_e64 v2, |v2|
289 ; CI-NEXT: v_cvt_f32_f16_e64 v4, |v4|
290 ; CI-NEXT: v_cvt_f32_f16_e32 v9, v9
291 ; CI-NEXT: v_cvt_f32_f16_e32 v8, v8
292 ; CI-NEXT: v_cvt_f32_f16_e32 v7, v7
293 ; CI-NEXT: v_cvt_f32_f16_e32 v6, v6
294 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
295 ; CI-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc
296 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
297 ; CI-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
298 ; CI-NEXT: v_add_f32_e32 v0, v0, v6
299 ; CI-NEXT: v_add_f32_e32 v1, v1, v7
300 ; CI-NEXT: v_add_f32_e32 v2, v4, v8
301 ; CI-NEXT: v_add_f32_e32 v3, v5, v9
302 ; CI-NEXT: s_setpc_b64 s[30:31]
304 ; VI-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
306 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
307 ; VI-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
308 ; VI-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
309 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
310 ; VI-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
311 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
312 ; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v3
313 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
314 ; VI-NEXT: v_cndmask_b32_e32 v1, v6, v2, vcc
315 ; VI-NEXT: v_add_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
316 ; VI-NEXT: v_add_f16_e32 v0, v0, v4
317 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
318 ; VI-NEXT: v_add_f16_sdwa v1, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
319 ; VI-NEXT: v_add_f16_e32 v2, v3, v5
320 ; VI-NEXT: v_or_b32_e32 v1, v2, v1
321 ; VI-NEXT: s_setpc_b64 s[30:31]
323 ; GFX9-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
325 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
326 ; GFX9-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
327 ; GFX9-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
328 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
329 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
330 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
331 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3
332 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
333 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v2, vcc
334 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
335 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
336 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
337 ; GFX9-NEXT: v_pk_add_f16 v1, v3, v5
338 ; GFX9-NEXT: s_setpc_b64 s[30:31]
340 ; GFX11-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
342 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
343 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
344 ; GFX11-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
345 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
346 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
347 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v2
348 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v3
349 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
350 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
351 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
352 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
353 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
354 ; GFX11-NEXT: v_pk_add_f16 v1, v3, v5
355 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
356 ; GFX11-NEXT: v_pk_add_f16 v0, v0, v4
357 ; GFX11-NEXT: s_setpc_b64 s[30:31]
358 %cmp = icmp eq <2 x i32> %c, zeroinitializer
359 %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
360 %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
361 %select = select <2 x i1> %cmp, <2 x half> %fabs.x, <2 x half> %fabs.y
362 %add0 = fadd <2 x half> %select, %z
363 %add1 = fadd <2 x half> %fabs.y, %w
364 %insert.0 = insertvalue { <2 x half>, <2 x half> } poison, <2 x half> %add0, 0
365 %insert.1 = insertvalue { <2 x half>, <2 x half> } %insert.0, <2 x half> %add1, 1
366 ret { <2 x half>, <2 x half> } %insert.1
369 define <2 x half> @add_select_fabs_var_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y, <2 x half> %z) {
370 ; CI-LABEL: add_select_fabs_var_v2f16:
372 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
373 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
374 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
375 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
376 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
377 ; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
378 ; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
379 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
380 ; CI-NEXT: v_cvt_f32_f16_e64 v3, |v3|
381 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
382 ; CI-NEXT: v_cvt_f32_f16_e64 v2, |v2|
383 ; CI-NEXT: v_cvt_f32_f16_e32 v7, v7
384 ; CI-NEXT: v_cvt_f32_f16_e32 v6, v6
385 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
386 ; CI-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc
387 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
388 ; CI-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
389 ; CI-NEXT: v_add_f32_e32 v0, v0, v6
390 ; CI-NEXT: v_add_f32_e32 v1, v1, v7
391 ; CI-NEXT: s_setpc_b64 s[30:31]
393 ; VI-LABEL: add_select_fabs_var_v2f16:
395 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
396 ; VI-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
397 ; VI-NEXT: v_lshrrev_b32_e32 v5, 16, v3
398 ; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v2
399 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
400 ; VI-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
401 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
402 ; VI-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
403 ; VI-NEXT: v_add_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
404 ; VI-NEXT: v_add_f16_e32 v0, v0, v4
405 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
406 ; VI-NEXT: s_setpc_b64 s[30:31]
408 ; GFX9-LABEL: add_select_fabs_var_v2f16:
410 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
411 ; GFX9-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
412 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v3
413 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v2
414 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
415 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
416 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
417 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
418 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
419 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
420 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
421 ; GFX9-NEXT: s_setpc_b64 s[30:31]
423 ; GFX11-LABEL: add_select_fabs_var_v2f16:
425 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
426 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
427 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v3
428 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
429 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
430 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v2
431 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo
432 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
433 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
434 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
435 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
436 ; GFX11-NEXT: v_pk_add_f16 v0, v0, v4
437 ; GFX11-NEXT: s_setpc_b64 s[30:31]
438 %cmp = icmp eq <2 x i32> %c, zeroinitializer
439 %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
440 %select = select <2 x i1> %cmp, <2 x half> %fabs.x, <2 x half> %y
441 %add = fadd <2 x half> %select, %z
445 define <2 x half> @add_select_fabs_negk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y) {
446 ; CI-LABEL: add_select_fabs_negk_v2f16:
448 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
449 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
450 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
451 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
452 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
453 ; CI-NEXT: v_cvt_f32_f16_e64 v3, |v3|
454 ; CI-NEXT: v_cvt_f32_f16_e64 v2, |v2|
455 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
456 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
457 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
458 ; CI-NEXT: v_cndmask_b32_e32 v1, -1.0, v3, vcc
459 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
460 ; CI-NEXT: v_cndmask_b32_e32 v0, -1.0, v2, vcc
461 ; CI-NEXT: v_add_f32_e32 v0, v0, v4
462 ; CI-NEXT: v_add_f32_e32 v1, v1, v5
463 ; CI-NEXT: s_setpc_b64 s[30:31]
465 ; VI-LABEL: add_select_fabs_negk_v2f16:
467 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
468 ; VI-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
469 ; VI-NEXT: v_mov_b32_e32 v4, 0xbc00
470 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
471 ; VI-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
472 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
473 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
474 ; VI-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
475 ; VI-NEXT: v_add_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
476 ; VI-NEXT: v_add_f16_e32 v0, v0, v3
477 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
478 ; VI-NEXT: s_setpc_b64 s[30:31]
480 ; GFX9-LABEL: add_select_fabs_negk_v2f16:
482 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
483 ; GFX9-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
484 ; GFX9-NEXT: v_mov_b32_e32 v4, 0xbc00
485 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
486 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
487 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
488 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
489 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
490 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
491 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
492 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v3
493 ; GFX9-NEXT: s_setpc_b64 s[30:31]
495 ; GFX11-LABEL: add_select_fabs_negk_v2f16:
497 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
498 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
499 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
500 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
501 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2
502 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
503 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
504 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
505 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
506 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
507 ; GFX11-NEXT: v_pk_add_f16 v0, v0, v3
508 ; GFX11-NEXT: s_setpc_b64 s[30:31]
509 %cmp = icmp eq <2 x i32> %c, zeroinitializer
510 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
511 %select = select <2 x i1> %cmp, <2 x half> %fabs, <2 x half> <half -1.0, half -1.0>
512 %add = fadd <2 x half> %select, %y
516 ; FIXME: fabs should fold away
517 define <2 x half> @add_select_fabs_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x) {
518 ; CI-LABEL: add_select_fabs_negk_negk_v2f16:
520 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
522 ; CI-NEXT: v_cndmask_b32_e64 v0, -1.0, -2.0, vcc
523 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
524 ; CI-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc
525 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
526 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
527 ; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
528 ; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
529 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
530 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
531 ; CI-NEXT: v_cvt_f32_f16_e64 v0, |v0|
532 ; CI-NEXT: v_cvt_f32_f16_e64 v1, |v1|
533 ; CI-NEXT: v_add_f32_e32 v0, v0, v2
534 ; CI-NEXT: v_add_f32_e32 v1, v1, v3
535 ; CI-NEXT: s_setpc_b64 s[30:31]
537 ; VI-LABEL: add_select_fabs_negk_negk_v2f16:
539 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
540 ; VI-NEXT: v_mov_b32_e32 v3, 0xbc00
541 ; VI-NEXT: v_mov_b32_e32 v4, 0xc000
542 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
543 ; VI-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
544 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
545 ; VI-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
546 ; VI-NEXT: v_add_f16_sdwa v1, |v1|, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
547 ; VI-NEXT: v_add_f16_e64 v0, |v0|, v2
548 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
549 ; VI-NEXT: s_setpc_b64 s[30:31]
551 ; GFX9-LABEL: add_select_fabs_negk_negk_v2f16:
553 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
554 ; GFX9-NEXT: v_mov_b32_e32 v3, 0xbc00
555 ; GFX9-NEXT: v_mov_b32_e32 v4, 0xc000
556 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
557 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
558 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
559 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
560 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1
561 ; GFX9-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
562 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v2
563 ; GFX9-NEXT: s_setpc_b64 s[30:31]
565 ; GFX11-LABEL: add_select_fabs_negk_negk_v2f16:
567 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
568 ; GFX11-NEXT: v_mov_b32_e32 v3, 0xc000
569 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
570 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
571 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
572 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
573 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
574 ; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
575 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
576 ; GFX11-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
577 ; GFX11-NEXT: v_pk_add_f16 v0, v0, v2
578 ; GFX11-NEXT: s_setpc_b64 s[30:31]
579 %cmp = icmp eq <2 x i32> %c, zeroinitializer
580 %select = select <2 x i1> %cmp, <2 x half> <half -2.0, half -2.0>, <2 x half> <half -1.0, half -1.0>
581 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %select)
582 %add = fadd <2 x half> %fabs, %x
586 define <2 x half> @add_select_posk_posk_v2f16(<2 x i32> %c, <2 x half> %x) {
587 ; CI-LABEL: add_select_posk_posk_v2f16:
589 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
590 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
591 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
592 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
593 ; CI-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc
594 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
595 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
596 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
597 ; CI-NEXT: v_cndmask_b32_e64 v0, 1.0, 2.0, vcc
598 ; CI-NEXT: v_add_f32_e32 v1, v1, v3
599 ; CI-NEXT: v_add_f32_e32 v0, v0, v2
600 ; CI-NEXT: s_setpc_b64 s[30:31]
602 ; VI-LABEL: add_select_posk_posk_v2f16:
604 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
605 ; VI-NEXT: v_mov_b32_e32 v3, 0x3c00
606 ; VI-NEXT: v_mov_b32_e32 v4, 0x4000
607 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
608 ; VI-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
609 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
610 ; VI-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
611 ; VI-NEXT: v_add_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
612 ; VI-NEXT: v_add_f16_e32 v0, v0, v2
613 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
614 ; VI-NEXT: s_setpc_b64 s[30:31]
616 ; GFX9-LABEL: add_select_posk_posk_v2f16:
618 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
619 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x3c00
620 ; GFX9-NEXT: v_mov_b32_e32 v4, 0x4000
621 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
622 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
623 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
624 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
625 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1
626 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v2
627 ; GFX9-NEXT: s_setpc_b64 s[30:31]
629 ; GFX11-LABEL: add_select_posk_posk_v2f16:
631 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
632 ; GFX11-NEXT: v_mov_b32_e32 v3, 0x4000
633 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
634 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
635 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
636 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
637 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo
638 ; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
639 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
640 ; GFX11-NEXT: v_pk_add_f16 v0, v0, v2
641 ; GFX11-NEXT: s_setpc_b64 s[30:31]
642 %cmp = icmp eq <2 x i32> %c, zeroinitializer
643 %select = select <2 x i1> %cmp, <2 x half> <half 2.0, half 2.0>, <2 x half> <half 1.0, half 1.0>
644 %add = fadd <2 x half> %select, %x
648 define <2 x half> @add_select_negk_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y) {
649 ; CI-LABEL: add_select_negk_fabs_v2f16:
651 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
652 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
653 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
654 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
655 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
656 ; CI-NEXT: v_cvt_f32_f16_e64 v3, |v3|
657 ; CI-NEXT: v_cvt_f32_f16_e64 v2, |v2|
658 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
659 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
660 ; CI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
661 ; CI-NEXT: v_cndmask_b32_e32 v1, -1.0, v3, vcc
662 ; CI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
663 ; CI-NEXT: v_cndmask_b32_e32 v0, -1.0, v2, vcc
664 ; CI-NEXT: v_add_f32_e32 v0, v0, v4
665 ; CI-NEXT: v_add_f32_e32 v1, v1, v5
666 ; CI-NEXT: s_setpc_b64 s[30:31]
668 ; VI-LABEL: add_select_negk_fabs_v2f16:
670 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
671 ; VI-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
672 ; VI-NEXT: v_mov_b32_e32 v4, 0xbc00
673 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
674 ; VI-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
675 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
676 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
677 ; VI-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
678 ; VI-NEXT: v_add_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
679 ; VI-NEXT: v_add_f16_e32 v0, v0, v3
680 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
681 ; VI-NEXT: s_setpc_b64 s[30:31]
683 ; GFX9-LABEL: add_select_negk_fabs_v2f16:
685 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
686 ; GFX9-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
687 ; GFX9-NEXT: v_mov_b32_e32 v4, 0xbc00
688 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
689 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
690 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
691 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
692 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
693 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
694 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
695 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v3
696 ; GFX9-NEXT: s_setpc_b64 s[30:31]
698 ; GFX11-LABEL: add_select_negk_fabs_v2f16:
700 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
701 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
702 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
703 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
704 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2
705 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
706 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
707 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
708 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
709 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
710 ; GFX11-NEXT: v_pk_add_f16 v0, v0, v3
711 ; GFX11-NEXT: s_setpc_b64 s[30:31]
712 %cmp = icmp eq <2 x i32> %c, zeroinitializer
713 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
714 %select = select <2 x i1> %cmp, <2 x half> <half -1.0, half -1.0>, <2 x half> %fabs
715 %add = fadd <2 x half> %select, %y
719 define <2 x half> @add_select_negliteralk_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y) {
720 ; CI-LABEL: add_select_negliteralk_fabs_v2f16:
722 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
723 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
724 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
725 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
726 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
727 ; CI-NEXT: v_cvt_f32_f16_e64 v3, |v3|
728 ; CI-NEXT: v_cvt_f32_f16_e64 v2, |v2|
729 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
730 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
731 ; CI-NEXT: v_mov_b32_e32 v6, 0xc4800000
732 ; CI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
733 ; CI-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc
734 ; CI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
735 ; CI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc
736 ; CI-NEXT: v_add_f32_e32 v0, v0, v4
737 ; CI-NEXT: v_add_f32_e32 v1, v1, v5
738 ; CI-NEXT: s_setpc_b64 s[30:31]
740 ; VI-LABEL: add_select_negliteralk_fabs_v2f16:
742 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
743 ; VI-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
744 ; VI-NEXT: v_mov_b32_e32 v4, 0xe400
745 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
746 ; VI-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
747 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
748 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
749 ; VI-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
750 ; VI-NEXT: v_add_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
751 ; VI-NEXT: v_add_f16_e32 v0, v0, v3
752 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
753 ; VI-NEXT: s_setpc_b64 s[30:31]
755 ; GFX9-LABEL: add_select_negliteralk_fabs_v2f16:
757 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
758 ; GFX9-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
759 ; GFX9-NEXT: v_mov_b32_e32 v4, 0xe400
760 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
761 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
762 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
763 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
764 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
765 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
766 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
767 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v3
768 ; GFX9-NEXT: s_setpc_b64 s[30:31]
770 ; GFX11-LABEL: add_select_negliteralk_fabs_v2f16:
772 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
773 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
774 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
775 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
776 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2
777 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xe400, v2, vcc_lo
778 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
779 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0xe400, v4, vcc_lo
780 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
781 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
782 ; GFX11-NEXT: v_pk_add_f16 v0, v0, v3
783 ; GFX11-NEXT: s_setpc_b64 s[30:31]
784 %cmp = icmp eq <2 x i32> %c, zeroinitializer
785 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
786 %select = select <2 x i1> %cmp, <2 x half> <half -1024.0, half -1024.0>, <2 x half> %fabs
787 %add = fadd <2 x half> %select, %y
791 define <2 x half> @add_select_fabs_posk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y) {
792 ; CI-LABEL: add_select_fabs_posk_v2f16:
794 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
795 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
796 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
797 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
798 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
799 ; CI-NEXT: v_cvt_f32_f16_e64 v3, |v3|
800 ; CI-NEXT: v_cvt_f32_f16_e64 v2, |v2|
801 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
802 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
803 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
804 ; CI-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
805 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
806 ; CI-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
807 ; CI-NEXT: v_add_f32_e32 v0, v0, v4
808 ; CI-NEXT: v_add_f32_e32 v1, v1, v5
809 ; CI-NEXT: s_setpc_b64 s[30:31]
811 ; VI-LABEL: add_select_fabs_posk_v2f16:
813 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
814 ; VI-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
815 ; VI-NEXT: v_mov_b32_e32 v4, 0x3c00
816 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
817 ; VI-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
818 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
819 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
820 ; VI-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
821 ; VI-NEXT: v_add_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
822 ; VI-NEXT: v_add_f16_e32 v0, v0, v3
823 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
824 ; VI-NEXT: s_setpc_b64 s[30:31]
826 ; GFX9-LABEL: add_select_fabs_posk_v2f16:
828 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
829 ; GFX9-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
830 ; GFX9-NEXT: v_mov_b32_e32 v4, 0x3c00
831 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
832 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
833 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
834 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
835 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
836 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
837 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
838 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v3
839 ; GFX9-NEXT: s_setpc_b64 s[30:31]
841 ; GFX11-LABEL: add_select_fabs_posk_v2f16:
843 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
844 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
845 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
846 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
847 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2
848 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
849 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
850 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
851 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
852 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
853 ; GFX11-NEXT: v_pk_add_f16 v0, v0, v3
854 ; GFX11-NEXT: s_setpc_b64 s[30:31]
855 %cmp = icmp eq <2 x i32> %c, zeroinitializer
856 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
857 %select = select <2 x i1> %cmp, <2 x half> %fabs, <2 x half> <half 1.0, half 1.0>
858 %add = fadd <2 x half> %select, %y
862 define <2 x half> @add_select_posk_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y) {
863 ; CI-LABEL: add_select_posk_fabs_v2f16:
865 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
866 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
867 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
868 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
869 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
870 ; CI-NEXT: v_cvt_f32_f16_e64 v3, |v3|
871 ; CI-NEXT: v_cvt_f32_f16_e64 v2, |v2|
872 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
873 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
874 ; CI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
875 ; CI-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
876 ; CI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
877 ; CI-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
878 ; CI-NEXT: v_add_f32_e32 v0, v0, v4
879 ; CI-NEXT: v_add_f32_e32 v1, v1, v5
880 ; CI-NEXT: s_setpc_b64 s[30:31]
882 ; VI-LABEL: add_select_posk_fabs_v2f16:
884 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
885 ; VI-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
886 ; VI-NEXT: v_mov_b32_e32 v4, 0x3c00
887 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
888 ; VI-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
889 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
890 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
891 ; VI-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
892 ; VI-NEXT: v_add_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
893 ; VI-NEXT: v_add_f16_e32 v0, v0, v3
894 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
895 ; VI-NEXT: s_setpc_b64 s[30:31]
897 ; GFX9-LABEL: add_select_posk_fabs_v2f16:
899 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
900 ; GFX9-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
901 ; GFX9-NEXT: v_mov_b32_e32 v4, 0x3c00
902 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
903 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
904 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
905 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
906 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
907 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
908 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
909 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v3
910 ; GFX9-NEXT: s_setpc_b64 s[30:31]
912 ; GFX11-LABEL: add_select_posk_fabs_v2f16:
914 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
915 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
916 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
917 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
918 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2
919 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
920 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
921 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
922 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
923 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
924 ; GFX11-NEXT: v_pk_add_f16 v0, v0, v3
925 ; GFX11-NEXT: s_setpc_b64 s[30:31]
926 %cmp = icmp eq <2 x i32> %c, zeroinitializer
927 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
928 %select = select <2 x i1> %cmp, <2 x half> <half 1.0, half 1.0>, <2 x half> %fabs
929 %add = fadd <2 x half> %select, %y
933 define <2 x half> @add_select_fneg_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y, <2 x half> %z) {
934 ; CI-LABEL: add_select_fneg_fneg_v2f16:
936 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
937 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
938 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
939 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
940 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
941 ; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
942 ; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
943 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
944 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
945 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
946 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
947 ; CI-NEXT: v_cvt_f32_f16_e32 v7, v7
948 ; CI-NEXT: v_cvt_f32_f16_e32 v6, v6
949 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
950 ; CI-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc
951 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
952 ; CI-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
953 ; CI-NEXT: v_sub_f32_e32 v0, v6, v0
954 ; CI-NEXT: v_sub_f32_e32 v1, v7, v1
955 ; CI-NEXT: s_setpc_b64 s[30:31]
957 ; VI-LABEL: add_select_fneg_fneg_v2f16:
959 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
960 ; VI-NEXT: v_lshrrev_b32_e32 v5, 16, v2
961 ; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v3
962 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
963 ; VI-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc
964 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
965 ; VI-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
966 ; VI-NEXT: v_sub_f16_sdwa v1, v4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
967 ; VI-NEXT: v_sub_f16_e32 v0, v4, v0
968 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
969 ; VI-NEXT: s_setpc_b64 s[30:31]
971 ; GFX9-LABEL: add_select_fneg_fneg_v2f16:
973 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
974 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v2
975 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3
976 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
977 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc
978 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
979 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
980 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
981 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
982 ; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
983 ; GFX9-NEXT: s_setpc_b64 s[30:31]
985 ; GFX11-LABEL: add_select_fneg_fneg_v2f16:
987 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
988 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2
989 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v3
990 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
991 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
992 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
993 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
994 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
995 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
996 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
997 ; GFX11-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
998 ; GFX11-NEXT: s_setpc_b64 s[30:31]
999 %cmp = icmp eq <2 x i32> %c, zeroinitializer
1000 %fneg.x = fneg <2 x half> %x
1001 %fneg.y = fneg <2 x half> %y
1002 %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> %fneg.y
1003 %add = fadd <2 x half> %select, %z
1007 define { <2 x half>, <2 x half> } @add_select_multi_use_lhs_fneg_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y, <2 x half> %z, <2 x half> %w) {
1008 ; CI-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
1010 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1011 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1012 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
1013 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1014 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
1015 ; CI-NEXT: v_cvt_f16_f32_e32 v9, v9
1016 ; CI-NEXT: v_cvt_f16_f32_e32 v8, v8
1017 ; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
1018 ; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
1019 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1020 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
1021 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1022 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
1023 ; CI-NEXT: v_cvt_f32_f16_e32 v9, v9
1024 ; CI-NEXT: v_cvt_f32_f16_e32 v8, v8
1025 ; CI-NEXT: v_cvt_f32_f16_e32 v7, v7
1026 ; CI-NEXT: v_cvt_f32_f16_e32 v6, v6
1027 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1028 ; CI-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc
1029 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1030 ; CI-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
1031 ; CI-NEXT: v_sub_f32_e32 v0, v6, v0
1032 ; CI-NEXT: v_sub_f32_e32 v1, v7, v1
1033 ; CI-NEXT: v_sub_f32_e32 v2, v8, v2
1034 ; CI-NEXT: v_sub_f32_e32 v3, v9, v3
1035 ; CI-NEXT: s_setpc_b64 s[30:31]
1037 ; VI-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
1039 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1040 ; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v2
1041 ; VI-NEXT: v_lshrrev_b32_e32 v7, 16, v3
1042 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1043 ; VI-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
1044 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1045 ; VI-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
1046 ; VI-NEXT: v_sub_f16_sdwa v1, v4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1047 ; VI-NEXT: v_sub_f16_e32 v0, v4, v0
1048 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
1049 ; VI-NEXT: v_sub_f16_sdwa v1, v5, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1050 ; VI-NEXT: v_sub_f16_e32 v2, v5, v2
1051 ; VI-NEXT: v_or_b32_e32 v1, v2, v1
1052 ; VI-NEXT: s_setpc_b64 s[30:31]
1054 ; GFX9-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
1056 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1057 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v2
1058 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v3
1059 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1060 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
1061 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1062 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
1063 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
1064 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
1065 ; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
1066 ; GFX9-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
1067 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1069 ; GFX11-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
1071 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1072 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
1073 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v2
1074 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v3
1075 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
1076 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
1077 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
1078 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
1079 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
1080 ; GFX11-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
1081 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
1082 ; GFX11-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
1083 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1084 %cmp = icmp eq <2 x i32> %c, zeroinitializer
1085 %fneg.x = fneg <2 x half> %x
1086 %fneg.y = fneg <2 x half> %y
1087 %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> %fneg.y
1088 %add0 = fadd <2 x half> %select, %z
1089 %add1 = fadd <2 x half> %fneg.x, %w
1090 %insert.0 = insertvalue { <2 x half>, <2 x half> } poison, <2 x half> %add0, 0
1091 %insert.1 = insertvalue { <2 x half>, <2 x half> } %insert.0, <2 x half> %add1, 1
1092 ret { <2 x half>, <2 x half> } %insert.1
1095 define { <2 x half>, <2 x half> } @add_select_multi_store_use_lhs_fneg_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y, <2 x half> %z) {
1096 ; CI-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
1098 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1099 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1100 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
1101 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1102 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
1103 ; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
1104 ; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
1105 ; CI-NEXT: v_cvt_f32_f16_e32 v8, v3
1106 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
1107 ; CI-NEXT: v_cvt_f32_f16_e32 v9, v2
1108 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
1109 ; CI-NEXT: v_cvt_f32_f16_e32 v7, v7
1110 ; CI-NEXT: v_cvt_f32_f16_e32 v6, v6
1111 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1112 ; CI-NEXT: v_cvt_f32_f16_e64 v2, -v2
1113 ; CI-NEXT: v_cvt_f32_f16_e64 v3, -v3
1114 ; CI-NEXT: v_cndmask_b32_e32 v1, v5, v8, vcc
1115 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1116 ; CI-NEXT: v_cndmask_b32_e32 v0, v4, v9, vcc
1117 ; CI-NEXT: v_sub_f32_e32 v0, v6, v0
1118 ; CI-NEXT: v_sub_f32_e32 v1, v7, v1
1119 ; CI-NEXT: s_setpc_b64 s[30:31]
1121 ; VI-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
1123 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1124 ; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v2
1125 ; VI-NEXT: v_lshrrev_b32_e32 v7, 16, v3
1126 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1127 ; VI-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
1128 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1129 ; VI-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
1130 ; VI-NEXT: v_xor_b32_e32 v5, 0x80008000, v2
1131 ; VI-NEXT: v_sub_f16_sdwa v1, v4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1132 ; VI-NEXT: v_sub_f16_e32 v0, v4, v0
1133 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
1134 ; VI-NEXT: v_mov_b32_e32 v1, v5
1135 ; VI-NEXT: s_setpc_b64 s[30:31]
1137 ; GFX9-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
1139 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1140 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v2
1141 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v3
1142 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1143 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
1144 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1145 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
1146 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
1147 ; GFX9-NEXT: v_xor_b32_e32 v5, 0x80008000, v2
1148 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
1149 ; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
1150 ; GFX9-NEXT: v_mov_b32_e32 v1, v5
1151 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1153 ; GFX11-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
1155 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1156 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
1157 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2
1158 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v3
1159 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
1160 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
1161 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
1162 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
1163 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
1164 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
1165 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
1166 ; GFX11-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
1167 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1168 %cmp = icmp eq <2 x i32> %c, zeroinitializer
1169 %fneg.x = fneg <2 x half> %x
1170 %fneg.y = fneg <2 x half> %y
1171 %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> %fneg.y
1172 %add0 = fadd <2 x half> %select, %z
1173 %insert.0 = insertvalue { <2 x half>, <2 x half> } poison, <2 x half> %add0, 0
1174 %insert.1 = insertvalue { <2 x half>, <2 x half> } %insert.0, <2 x half> %fneg.x, 1
1175 ret { <2 x half>, <2 x half> } %insert.1
1178 define { <2 x half>, <2 x half> } @add_select_multi_use_rhs_fneg_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y, <2 x half> %z, <2 x half> %w) {
1179 ; CI-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
1181 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1182 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1183 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
1184 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1185 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
1186 ; CI-NEXT: v_cvt_f16_f32_e32 v9, v9
1187 ; CI-NEXT: v_cvt_f16_f32_e32 v8, v8
1188 ; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
1189 ; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
1190 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1191 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
1192 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1193 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
1194 ; CI-NEXT: v_cvt_f32_f16_e32 v9, v9
1195 ; CI-NEXT: v_cvt_f32_f16_e32 v8, v8
1196 ; CI-NEXT: v_cvt_f32_f16_e32 v7, v7
1197 ; CI-NEXT: v_cvt_f32_f16_e32 v6, v6
1198 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1199 ; CI-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc
1200 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1201 ; CI-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
1202 ; CI-NEXT: v_sub_f32_e32 v0, v6, v0
1203 ; CI-NEXT: v_sub_f32_e32 v1, v7, v1
1204 ; CI-NEXT: v_sub_f32_e32 v2, v8, v4
1205 ; CI-NEXT: v_sub_f32_e32 v3, v9, v5
1206 ; CI-NEXT: s_setpc_b64 s[30:31]
1208 ; VI-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
1210 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1211 ; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v2
1212 ; VI-NEXT: v_lshrrev_b32_e32 v7, 16, v3
1213 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1214 ; VI-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
1215 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1216 ; VI-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
1217 ; VI-NEXT: v_sub_f16_sdwa v1, v4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1218 ; VI-NEXT: v_sub_f16_e32 v0, v4, v0
1219 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
1220 ; VI-NEXT: v_sub_f16_sdwa v1, v5, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1221 ; VI-NEXT: v_sub_f16_e32 v2, v5, v3
1222 ; VI-NEXT: v_or_b32_e32 v1, v2, v1
1223 ; VI-NEXT: s_setpc_b64 s[30:31]
1225 ; GFX9-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
1227 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1228 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v2
1229 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v3
1230 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1231 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
1232 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1233 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
1234 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
1235 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
1236 ; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
1237 ; GFX9-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
1238 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1240 ; GFX11-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
1242 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1243 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
1244 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v2
1245 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v3
1246 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
1247 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
1248 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
1249 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
1250 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
1251 ; GFX11-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
1252 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
1253 ; GFX11-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
1254 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1255 %cmp = icmp eq <2 x i32> %c, zeroinitializer
1256 %fneg.x = fneg <2 x half> %x
1257 %fneg.y = fneg <2 x half> %y
1258 %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> %fneg.y
1259 %add0 = fadd <2 x half> %select, %z
1260 %add1 = fadd <2 x half> %fneg.y, %w
1261 %insert.0 = insertvalue { <2 x half>, <2 x half> } poison, <2 x half> %add0, 0
1262 %insert.1 = insertvalue { <2 x half>, <2 x half> } %insert.0, <2 x half> %add1, 1
1263 ret { <2 x half>, <2 x half> } %insert.1
1266 define <2 x half> @add_select_fneg_var_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y, <2 x half> %z) {
1267 ; CI-LABEL: add_select_fneg_var_v2f16:
1269 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1270 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1271 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1272 ; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
1273 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
1274 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1275 ; CI-NEXT: v_or_b32_e32 v2, v2, v3
1276 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v5
1277 ; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
1278 ; CI-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
1279 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v7
1280 ; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v2
1281 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1282 ; CI-NEXT: v_cvt_f32_f16_e32 v7, v7
1283 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
1284 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1285 ; CI-NEXT: v_cvt_f32_f16_e32 v6, v6
1286 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1287 ; CI-NEXT: v_cndmask_b32_e32 v1, v3, v7, vcc
1288 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1289 ; CI-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
1290 ; CI-NEXT: v_add_f32_e32 v0, v0, v6
1291 ; CI-NEXT: v_add_f32_e32 v1, v1, v5
1292 ; CI-NEXT: s_setpc_b64 s[30:31]
1294 ; VI-LABEL: add_select_fneg_var_v2f16:
1296 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1297 ; VI-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
1298 ; VI-NEXT: v_lshrrev_b32_e32 v5, 16, v3
1299 ; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v2
1300 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1301 ; VI-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
1302 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1303 ; VI-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
1304 ; VI-NEXT: v_add_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1305 ; VI-NEXT: v_add_f16_e32 v0, v0, v4
1306 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
1307 ; VI-NEXT: s_setpc_b64 s[30:31]
1309 ; GFX9-LABEL: add_select_fneg_var_v2f16:
1311 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1312 ; GFX9-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
1313 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v3
1314 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v2
1315 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1316 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
1317 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1318 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
1319 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
1320 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
1321 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
1322 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1324 ; GFX11-LABEL: add_select_fneg_var_v2f16:
1326 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1327 ; GFX11-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
1328 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v3
1329 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
1330 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
1331 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v2
1332 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo
1333 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
1334 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
1335 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1336 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
1337 ; GFX11-NEXT: v_pk_add_f16 v0, v0, v4
1338 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1339 %cmp = icmp eq <2 x i32> %c, zeroinitializer
1340 %fneg.x = fneg <2 x half> %x
1341 %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> %y
1342 %add = fadd <2 x half> %select, %z
1346 define <2 x half> @add_select_fneg_negk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y) {
1347 ; CI-LABEL: add_select_fneg_negk_v2f16:
1349 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1350 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1351 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1352 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
1353 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
1354 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1355 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1356 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
1357 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
1358 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1359 ; CI-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
1360 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1361 ; CI-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
1362 ; CI-NEXT: v_sub_f32_e32 v0, v4, v0
1363 ; CI-NEXT: v_sub_f32_e32 v1, v5, v1
1364 ; CI-NEXT: s_setpc_b64 s[30:31]
1366 ; VI-LABEL: add_select_fneg_negk_v2f16:
1368 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1369 ; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1370 ; VI-NEXT: v_mov_b32_e32 v5, 0x3c00
1371 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1372 ; VI-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
1373 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1374 ; VI-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
1375 ; VI-NEXT: v_sub_f16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1376 ; VI-NEXT: v_sub_f16_e32 v0, v3, v0
1377 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
1378 ; VI-NEXT: s_setpc_b64 s[30:31]
1380 ; GFX9-LABEL: add_select_fneg_negk_v2f16:
1382 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1383 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1384 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x3c00
1385 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1386 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
1387 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1388 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
1389 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
1390 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
1391 ; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
1392 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1394 ; GFX11-LABEL: add_select_fneg_negk_v2f16:
1396 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1397 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1398 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
1399 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
1400 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
1401 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
1402 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
1403 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
1404 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1405 ; GFX11-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
1406 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1407 %cmp = icmp eq <2 x i32> %c, zeroinitializer
1408 %fneg.x = fneg <2 x half> %x
1409 %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half -1.0, half -1.0>
1410 %add = fadd <2 x half> %select, %y
1414 define <2 x half> @add_select_fneg_inv2pi_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y) {
1415 ; CI-LABEL: add_select_fneg_inv2pi_v2f16:
1417 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1418 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1419 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1420 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
1421 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
1422 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1423 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1424 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
1425 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
1426 ; CI-NEXT: v_mov_b32_e32 v6, 0xbe230000
1427 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1428 ; CI-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc
1429 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1430 ; CI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc
1431 ; CI-NEXT: v_sub_f32_e32 v0, v4, v0
1432 ; CI-NEXT: v_sub_f32_e32 v1, v5, v1
1433 ; CI-NEXT: s_setpc_b64 s[30:31]
1435 ; VI-LABEL: add_select_fneg_inv2pi_v2f16:
1437 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1438 ; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1439 ; VI-NEXT: v_mov_b32_e32 v5, 0xb118
1440 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1441 ; VI-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
1442 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1443 ; VI-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
1444 ; VI-NEXT: v_sub_f16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1445 ; VI-NEXT: v_sub_f16_e32 v0, v3, v0
1446 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
1447 ; VI-NEXT: s_setpc_b64 s[30:31]
1449 ; GFX9-LABEL: add_select_fneg_inv2pi_v2f16:
1451 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1452 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1453 ; GFX9-NEXT: v_mov_b32_e32 v5, 0xb118
1454 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1455 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
1456 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1457 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
1458 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
1459 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
1460 ; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
1461 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1463 ; GFX11-LABEL: add_select_fneg_inv2pi_v2f16:
1465 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1466 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1467 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
1468 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
1469 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0xb118, v4, vcc_lo
1470 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
1471 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xb118, v2, vcc_lo
1472 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
1473 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1474 ; GFX11-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
1475 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1476 %cmp = icmp eq <2 x i32> %c, zeroinitializer
1477 %fneg.x = fneg <2 x half> %x
1478 %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half 0xH3118, half 0xH3118>
1479 %add = fadd <2 x half> %select, %y
1483 define <2 x half> @add_select_fneg_neginv2pi_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y) {
1484 ; CI-LABEL: add_select_fneg_neginv2pi_v2f16:
1486 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1487 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1488 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1489 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
1490 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
1491 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1492 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1493 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
1494 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
1495 ; CI-NEXT: v_mov_b32_e32 v6, 0x3e230000
1496 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1497 ; CI-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc
1498 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1499 ; CI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc
1500 ; CI-NEXT: v_sub_f32_e32 v0, v4, v0
1501 ; CI-NEXT: v_sub_f32_e32 v1, v5, v1
1502 ; CI-NEXT: s_setpc_b64 s[30:31]
1504 ; VI-LABEL: add_select_fneg_neginv2pi_v2f16:
1506 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1507 ; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1508 ; VI-NEXT: v_mov_b32_e32 v5, 0x3118
1509 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1510 ; VI-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
1511 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1512 ; VI-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
1513 ; VI-NEXT: v_sub_f16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1514 ; VI-NEXT: v_sub_f16_e32 v0, v3, v0
1515 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
1516 ; VI-NEXT: s_setpc_b64 s[30:31]
1518 ; GFX9-LABEL: add_select_fneg_neginv2pi_v2f16:
1520 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1521 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1522 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x3118
1523 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1524 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
1525 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1526 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
1527 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
1528 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
1529 ; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
1530 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1532 ; GFX11-LABEL: add_select_fneg_neginv2pi_v2f16:
1534 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1535 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1536 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
1537 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
1538 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x3118, v4, vcc_lo
1539 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
1540 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3118, v2, vcc_lo
1541 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
1542 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1543 ; GFX11-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
1544 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1545 %cmp = icmp eq <2 x i32> %c, zeroinitializer
1546 %fneg.x = fneg <2 x half> %x
1547 %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half 0xHB118, half 0xHB118>
1548 %add = fadd <2 x half> %select, %y
1552 define <2 x half> @add_select_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x) {
1553 ; CI-LABEL: add_select_negk_negk_v2f16:
1555 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1556 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1557 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1558 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1559 ; CI-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc
1560 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1561 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1562 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1563 ; CI-NEXT: v_cndmask_b32_e64 v0, -1.0, -2.0, vcc
1564 ; CI-NEXT: v_add_f32_e32 v1, v1, v3
1565 ; CI-NEXT: v_add_f32_e32 v0, v0, v2
1566 ; CI-NEXT: s_setpc_b64 s[30:31]
1568 ; VI-LABEL: add_select_negk_negk_v2f16:
1570 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1571 ; VI-NEXT: v_mov_b32_e32 v3, 0xbc00
1572 ; VI-NEXT: v_mov_b32_e32 v4, 0xc000
1573 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1574 ; VI-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
1575 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1576 ; VI-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
1577 ; VI-NEXT: v_add_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1578 ; VI-NEXT: v_add_f16_e32 v0, v0, v2
1579 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
1580 ; VI-NEXT: s_setpc_b64 s[30:31]
1582 ; GFX9-LABEL: add_select_negk_negk_v2f16:
1584 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1585 ; GFX9-NEXT: v_mov_b32_e32 v3, 0xbc00
1586 ; GFX9-NEXT: v_mov_b32_e32 v4, 0xc000
1587 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1588 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
1589 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1590 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
1591 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1
1592 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v2
1593 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1595 ; GFX11-LABEL: add_select_negk_negk_v2f16:
1597 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1598 ; GFX11-NEXT: v_mov_b32_e32 v3, 0xc000
1599 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
1600 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
1601 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
1602 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
1603 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
1604 ; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
1605 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1606 ; GFX11-NEXT: v_pk_add_f16 v0, v0, v2
1607 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1608 %cmp = icmp eq <2 x i32> %c, zeroinitializer
1609 %select = select <2 x i1> %cmp, <2 x half> <half -2.0, half -2.0>, <2 x half> <half -1.0, half -1.0>
1610 %add = fadd <2 x half> %select, %x
1614 define <2 x half> @add_select_negliteralk_negliteralk_v2f16(<2 x i32> %c, <2 x half> %x) {
1615 ; CI-LABEL: add_select_negliteralk_negliteralk_v2f16:
1617 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1618 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1619 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1620 ; CI-NEXT: v_mov_b32_e32 v4, 0xc5800000
1621 ; CI-NEXT: v_mov_b32_e32 v5, 0xc5000000
1622 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1623 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1624 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1625 ; CI-NEXT: v_cndmask_b32_e32 v1, v4, v5, vcc
1626 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1627 ; CI-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
1628 ; CI-NEXT: v_add_f32_e32 v0, v0, v2
1629 ; CI-NEXT: v_add_f32_e32 v1, v1, v3
1630 ; CI-NEXT: s_setpc_b64 s[30:31]
1632 ; VI-LABEL: add_select_negliteralk_negliteralk_v2f16:
1634 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1635 ; VI-NEXT: v_mov_b32_e32 v3, 0xec00
1636 ; VI-NEXT: v_mov_b32_e32 v4, 0xe800
1637 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1638 ; VI-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
1639 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1640 ; VI-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
1641 ; VI-NEXT: v_add_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1642 ; VI-NEXT: v_add_f16_e32 v0, v0, v2
1643 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
1644 ; VI-NEXT: s_setpc_b64 s[30:31]
1646 ; GFX9-LABEL: add_select_negliteralk_negliteralk_v2f16:
1648 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1649 ; GFX9-NEXT: v_mov_b32_e32 v3, 0xec00
1650 ; GFX9-NEXT: v_mov_b32_e32 v4, 0xe800
1651 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1652 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
1653 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1654 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
1655 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1
1656 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v2
1657 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1659 ; GFX11-LABEL: add_select_negliteralk_negliteralk_v2f16:
1661 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1662 ; GFX11-NEXT: v_mov_b32_e32 v3, 0xe800
1663 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
1664 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
1665 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0xec00, v3, vcc_lo
1666 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
1667 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xec00, v3, vcc_lo
1668 ; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
1669 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1670 ; GFX11-NEXT: v_pk_add_f16 v0, v0, v2
1671 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1672 %cmp = icmp eq <2 x i32> %c, zeroinitializer
1673 %select = select <2 x i1> %cmp, <2 x half> <half -2048.0, half -2048.0>, <2 x half> <half -4096.0, half -4096.0>
1674 %add = fadd <2 x half> %select, %x
1678 define <2 x half> @add_select_fneg_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x) {
1679 ; CI-LABEL: add_select_fneg_negk_negk_v2f16:
1681 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1682 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1683 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1684 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1685 ; CI-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc
1686 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1687 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1688 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1689 ; CI-NEXT: v_cndmask_b32_e64 v0, -1.0, -2.0, vcc
1690 ; CI-NEXT: v_sub_f32_e32 v1, v3, v1
1691 ; CI-NEXT: v_sub_f32_e32 v0, v2, v0
1692 ; CI-NEXT: s_setpc_b64 s[30:31]
1694 ; VI-LABEL: add_select_fneg_negk_negk_v2f16:
1696 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1697 ; VI-NEXT: v_mov_b32_e32 v3, 0xbc00
1698 ; VI-NEXT: v_mov_b32_e32 v4, 0xc000
1699 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1700 ; VI-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
1701 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1702 ; VI-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
1703 ; VI-NEXT: v_sub_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1704 ; VI-NEXT: v_sub_f16_e32 v0, v2, v0
1705 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
1706 ; VI-NEXT: s_setpc_b64 s[30:31]
1708 ; GFX9-LABEL: add_select_fneg_negk_negk_v2f16:
1710 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1711 ; GFX9-NEXT: v_mov_b32_e32 v3, 0xbc00
1712 ; GFX9-NEXT: v_mov_b32_e32 v4, 0xc000
1713 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1714 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
1715 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1716 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
1717 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1
1718 ; GFX9-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
1719 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1721 ; GFX11-LABEL: add_select_fneg_negk_negk_v2f16:
1723 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1724 ; GFX11-NEXT: v_mov_b32_e32 v3, 0xc000
1725 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
1726 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
1727 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
1728 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
1729 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
1730 ; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
1731 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1732 ; GFX11-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
1733 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1734 %cmp = icmp eq <2 x i32> %c, zeroinitializer
1735 %select = select <2 x i1> %cmp, <2 x half> <half -2.0, half -2.0>, <2 x half> <half -1.0, half -1.0>
1736 %fneg.x = fneg <2 x half> %select
1737 %add = fadd <2 x half> %fneg.x, %x
1741 define <2 x half> @add_select_negk_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y) {
1742 ; CI-LABEL: add_select_negk_fneg_v2f16:
1744 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1745 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1746 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1747 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
1748 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
1749 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1750 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1751 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
1752 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
1753 ; CI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
1754 ; CI-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
1755 ; CI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
1756 ; CI-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
1757 ; CI-NEXT: v_sub_f32_e32 v0, v4, v0
1758 ; CI-NEXT: v_sub_f32_e32 v1, v5, v1
1759 ; CI-NEXT: s_setpc_b64 s[30:31]
1761 ; VI-LABEL: add_select_negk_fneg_v2f16:
1763 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1764 ; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1765 ; VI-NEXT: v_mov_b32_e32 v5, 0x3c00
1766 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
1767 ; VI-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
1768 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
1769 ; VI-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
1770 ; VI-NEXT: v_sub_f16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1771 ; VI-NEXT: v_sub_f16_e32 v0, v3, v0
1772 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
1773 ; VI-NEXT: s_setpc_b64 s[30:31]
1775 ; GFX9-LABEL: add_select_negk_fneg_v2f16:
1777 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1778 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1779 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x3c00
1780 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
1781 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
1782 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
1783 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
1784 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
1785 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
1786 ; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
1787 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1789 ; GFX11-LABEL: add_select_negk_fneg_v2f16:
1791 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1792 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1793 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
1794 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
1795 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
1796 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
1797 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
1798 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
1799 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1800 ; GFX11-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
1801 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1802 %cmp = icmp eq <2 x i32> %c, zeroinitializer
1803 %fneg.x = fneg <2 x half> %x
1804 %select = select <2 x i1> %cmp, <2 x half> <half -1.0, half -1.0>, <2 x half> %fneg.x
1805 %add = fadd <2 x half> %select, %y
1809 define <2 x half> @add_select_fneg_posk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y) {
1810 ; CI-LABEL: add_select_fneg_posk_v2f16:
1812 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1813 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1814 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1815 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
1816 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
1817 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1818 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1819 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
1820 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
1821 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1822 ; CI-NEXT: v_cndmask_b32_e32 v1, -1.0, v3, vcc
1823 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1824 ; CI-NEXT: v_cndmask_b32_e32 v0, -1.0, v2, vcc
1825 ; CI-NEXT: v_sub_f32_e32 v0, v4, v0
1826 ; CI-NEXT: v_sub_f32_e32 v1, v5, v1
1827 ; CI-NEXT: s_setpc_b64 s[30:31]
1829 ; VI-LABEL: add_select_fneg_posk_v2f16:
1831 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1832 ; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1833 ; VI-NEXT: v_mov_b32_e32 v5, 0xbc00
1834 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1835 ; VI-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
1836 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1837 ; VI-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
1838 ; VI-NEXT: v_sub_f16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1839 ; VI-NEXT: v_sub_f16_e32 v0, v3, v0
1840 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
1841 ; VI-NEXT: s_setpc_b64 s[30:31]
1843 ; GFX9-LABEL: add_select_fneg_posk_v2f16:
1845 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1846 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1847 ; GFX9-NEXT: v_mov_b32_e32 v5, 0xbc00
1848 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1849 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
1850 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1851 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
1852 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
1853 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
1854 ; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
1855 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1857 ; GFX11-LABEL: add_select_fneg_posk_v2f16:
1859 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1860 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1861 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
1862 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
1863 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
1864 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
1865 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
1866 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
1867 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1868 ; GFX11-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
1869 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1870 %cmp = icmp eq <2 x i32> %c, zeroinitializer
1871 %fneg.x = fneg <2 x half> %x
1872 %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half 1.0, half 1.0>
1873 %add = fadd <2 x half> %select, %y
1877 define <2 x half> @add_select_posk_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y) {
1878 ; CI-LABEL: add_select_posk_fneg_v2f16:
1880 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1881 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1882 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1883 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
1884 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
1885 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
1886 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1887 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
1888 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
1889 ; CI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
1890 ; CI-NEXT: v_cndmask_b32_e32 v1, -1.0, v3, vcc
1891 ; CI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
1892 ; CI-NEXT: v_cndmask_b32_e32 v0, -1.0, v2, vcc
1893 ; CI-NEXT: v_sub_f32_e32 v0, v4, v0
1894 ; CI-NEXT: v_sub_f32_e32 v1, v5, v1
1895 ; CI-NEXT: s_setpc_b64 s[30:31]
1897 ; VI-LABEL: add_select_posk_fneg_v2f16:
1899 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1900 ; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1901 ; VI-NEXT: v_mov_b32_e32 v5, 0xbc00
1902 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
1903 ; VI-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
1904 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
1905 ; VI-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
1906 ; VI-NEXT: v_sub_f16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1907 ; VI-NEXT: v_sub_f16_e32 v0, v3, v0
1908 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
1909 ; VI-NEXT: s_setpc_b64 s[30:31]
1911 ; GFX9-LABEL: add_select_posk_fneg_v2f16:
1913 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1914 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1915 ; GFX9-NEXT: v_mov_b32_e32 v5, 0xbc00
1916 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
1917 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
1918 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
1919 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
1920 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
1921 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
1922 ; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
1923 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1925 ; GFX11-LABEL: add_select_posk_fneg_v2f16:
1927 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1928 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1929 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
1930 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
1931 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
1932 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
1933 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
1934 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
1935 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1936 ; GFX11-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
1937 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1938 %cmp = icmp eq <2 x i32> %c, zeroinitializer
1939 %fneg.x = fneg <2 x half> %x
1940 %select = select <2 x i1> %cmp, <2 x half> <half 1.0, half 1.0>, <2 x half> %fneg.x
1941 %add = fadd <2 x half> %select, %y
1945 define <2 x half> @add_select_negfabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y, <2 x half> %z) {
1946 ; CI-LABEL: add_select_negfabs_fabs_v2f16:
1948 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1949 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
1950 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
1951 ; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
1952 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
1953 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1954 ; CI-NEXT: v_or_b32_e32 v2, v2, v3
1955 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
1956 ; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
1957 ; CI-NEXT: v_or_b32_e32 v2, 0x80008000, v2
1958 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v7
1959 ; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v2
1960 ; CI-NEXT: v_cvt_f32_f16_e32 v7, v7
1961 ; CI-NEXT: v_cvt_f32_f16_e64 v5, |v5|
1962 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
1963 ; CI-NEXT: v_cvt_f32_f16_e64 v4, |v4|
1964 ; CI-NEXT: v_cvt_f32_f16_e32 v6, v6
1965 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1966 ; CI-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc
1967 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1968 ; CI-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
1969 ; CI-NEXT: v_add_f32_e32 v0, v0, v6
1970 ; CI-NEXT: v_add_f32_e32 v1, v1, v3
1971 ; CI-NEXT: s_setpc_b64 s[30:31]
1973 ; VI-LABEL: add_select_negfabs_fabs_v2f16:
1975 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1976 ; VI-NEXT: v_or_b32_e32 v2, 0x80008000, v2
1977 ; VI-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
1978 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1979 ; VI-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
1980 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
1981 ; VI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
1982 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1983 ; VI-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
1984 ; VI-NEXT: v_add_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1985 ; VI-NEXT: v_add_f16_e32 v0, v0, v4
1986 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
1987 ; VI-NEXT: s_setpc_b64 s[30:31]
1989 ; GFX9-LABEL: add_select_negfabs_fabs_v2f16:
1991 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1992 ; GFX9-NEXT: v_or_b32_e32 v2, 0x80008000, v2
1993 ; GFX9-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
1994 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1995 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
1996 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
1997 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
1998 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1999 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
2000 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
2001 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
2002 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
2003 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2005 ; GFX11-LABEL: add_select_negfabs_fabs_v2f16:
2007 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2008 ; GFX11-NEXT: v_or_b32_e32 v2, 0x80008000, v2
2009 ; GFX11-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
2010 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
2011 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2012 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2
2013 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v3
2014 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
2015 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
2016 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
2017 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
2018 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
2019 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2020 ; GFX11-NEXT: v_pk_add_f16 v0, v0, v4
2021 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2022 %cmp = icmp eq <2 x i32> %c, zeroinitializer
2023 %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
2024 %fneg.fabs.x = fneg <2 x half> %fabs.x
2025 %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
2026 %select = select <2 x i1> %cmp, <2 x half> %fneg.fabs.x, <2 x half> %fabs.y
2027 %add = fadd <2 x half> %select, %z
2031 define <2 x half> @add_select_fabs_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y, <2 x half> %z) {
2032 ; CI-LABEL: add_select_fabs_negfabs_v2f16:
2034 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2035 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
2036 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
2037 ; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
2038 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
2039 ; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5
2040 ; CI-NEXT: v_or_b32_e32 v4, v4, v5
2041 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
2042 ; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
2043 ; CI-NEXT: v_or_b32_e32 v4, 0x80008000, v4
2044 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v7
2045 ; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v4
2046 ; CI-NEXT: v_cvt_f32_f16_e64 v3, |v3|
2047 ; CI-NEXT: v_cvt_f32_f16_e32 v7, v7
2048 ; CI-NEXT: v_cvt_f32_f16_e64 v2, |v2|
2049 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
2050 ; CI-NEXT: v_cvt_f32_f16_e32 v6, v6
2051 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2052 ; CI-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
2053 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2054 ; CI-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
2055 ; CI-NEXT: v_add_f32_e32 v0, v0, v6
2056 ; CI-NEXT: v_add_f32_e32 v1, v1, v5
2057 ; CI-NEXT: s_setpc_b64 s[30:31]
2059 ; VI-LABEL: add_select_fabs_negfabs_v2f16:
2061 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2062 ; VI-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
2063 ; VI-NEXT: v_or_b32_e32 v3, 0x80008000, v3
2064 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2065 ; VI-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
2066 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2067 ; VI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
2068 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2069 ; VI-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
2070 ; VI-NEXT: v_add_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
2071 ; VI-NEXT: v_add_f16_e32 v0, v0, v4
2072 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
2073 ; VI-NEXT: s_setpc_b64 s[30:31]
2075 ; GFX9-LABEL: add_select_fabs_negfabs_v2f16:
2077 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2078 ; GFX9-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
2079 ; GFX9-NEXT: v_or_b32_e32 v3, 0x80008000, v3
2080 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2081 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
2082 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2083 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
2084 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2085 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
2086 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
2087 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
2088 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
2089 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2091 ; GFX11-LABEL: add_select_fabs_negfabs_v2f16:
2093 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2094 ; GFX11-NEXT: v_or_b32_e32 v3, 0x80008000, v3
2095 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
2096 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
2097 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v3
2098 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
2099 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2
2100 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
2101 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
2102 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
2103 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
2104 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
2105 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2106 ; GFX11-NEXT: v_pk_add_f16 v0, v0, v4
2107 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2108 %cmp = icmp eq <2 x i32> %c, zeroinitializer
2109 %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
2110 %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
2111 %fneg.fabs.y = fneg <2 x half> %fabs.y
2112 %select = select <2 x i1> %cmp, <2 x half> %fabs.x, <2 x half> %fneg.fabs.y
2113 %add = fadd <2 x half> %select, %z
2117 define <2 x half> @add_select_neg_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y, <2 x half> %z) {
2118 ; CI-LABEL: add_select_neg_fabs_v2f16:
2120 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2121 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
2122 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
2123 ; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
2124 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
2125 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
2126 ; CI-NEXT: v_or_b32_e32 v2, v2, v3
2127 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
2128 ; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
2129 ; CI-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
2130 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v7
2131 ; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v2
2132 ; CI-NEXT: v_cvt_f32_f16_e32 v7, v7
2133 ; CI-NEXT: v_cvt_f32_f16_e64 v5, |v5|
2134 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
2135 ; CI-NEXT: v_cvt_f32_f16_e64 v4, |v4|
2136 ; CI-NEXT: v_cvt_f32_f16_e32 v6, v6
2137 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2138 ; CI-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc
2139 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2140 ; CI-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
2141 ; CI-NEXT: v_add_f32_e32 v0, v0, v6
2142 ; CI-NEXT: v_add_f32_e32 v1, v1, v3
2143 ; CI-NEXT: s_setpc_b64 s[30:31]
2145 ; VI-LABEL: add_select_neg_fabs_v2f16:
2147 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2148 ; VI-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
2149 ; VI-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
2150 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2151 ; VI-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
2152 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2153 ; VI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
2154 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2155 ; VI-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
2156 ; VI-NEXT: v_add_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
2157 ; VI-NEXT: v_add_f16_e32 v0, v0, v4
2158 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
2159 ; VI-NEXT: s_setpc_b64 s[30:31]
2161 ; GFX9-LABEL: add_select_neg_fabs_v2f16:
2163 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2164 ; GFX9-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
2165 ; GFX9-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
2166 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2167 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
2168 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2169 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
2170 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2171 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
2172 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
2173 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
2174 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
2175 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2177 ; GFX11-LABEL: add_select_neg_fabs_v2f16:
2179 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2180 ; GFX11-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
2181 ; GFX11-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
2182 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
2183 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2184 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2
2185 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v3
2186 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
2187 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
2188 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
2189 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
2190 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
2191 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2192 ; GFX11-NEXT: v_pk_add_f16 v0, v0, v4
2193 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2194 %cmp = icmp eq <2 x i32> %c, zeroinitializer
2195 %fneg.x = fneg <2 x half> %x
2196 %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
2197 %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> %fabs.y
2198 %add = fadd <2 x half> %select, %z
2202 define <2 x half> @add_select_fabs_neg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y, <2 x half> %z) {
2203 ; CI-LABEL: add_select_fabs_neg_v2f16:
2205 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2206 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
2207 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
2208 ; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
2209 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
2210 ; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5
2211 ; CI-NEXT: v_or_b32_e32 v4, v4, v5
2212 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
2213 ; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
2214 ; CI-NEXT: v_xor_b32_e32 v4, 0x80008000, v4
2215 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v7
2216 ; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v4
2217 ; CI-NEXT: v_cvt_f32_f16_e64 v3, |v3|
2218 ; CI-NEXT: v_cvt_f32_f16_e32 v7, v7
2219 ; CI-NEXT: v_cvt_f32_f16_e64 v2, |v2|
2220 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
2221 ; CI-NEXT: v_cvt_f32_f16_e32 v6, v6
2222 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2223 ; CI-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
2224 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2225 ; CI-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
2226 ; CI-NEXT: v_add_f32_e32 v0, v0, v6
2227 ; CI-NEXT: v_add_f32_e32 v1, v1, v5
2228 ; CI-NEXT: s_setpc_b64 s[30:31]
2230 ; VI-LABEL: add_select_fabs_neg_v2f16:
2232 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2233 ; VI-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
2234 ; VI-NEXT: v_xor_b32_e32 v3, 0x80008000, v3
2235 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2236 ; VI-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
2237 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2238 ; VI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
2239 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2240 ; VI-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
2241 ; VI-NEXT: v_add_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
2242 ; VI-NEXT: v_add_f16_e32 v0, v0, v4
2243 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
2244 ; VI-NEXT: s_setpc_b64 s[30:31]
2246 ; GFX9-LABEL: add_select_fabs_neg_v2f16:
2248 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2249 ; GFX9-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
2250 ; GFX9-NEXT: v_xor_b32_e32 v3, 0x80008000, v3
2251 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2252 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
2253 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2254 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
2255 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2256 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
2257 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
2258 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
2259 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
2260 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2262 ; GFX11-LABEL: add_select_fabs_neg_v2f16:
2264 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2265 ; GFX11-NEXT: v_xor_b32_e32 v3, 0x80008000, v3
2266 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
2267 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
2268 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v3
2269 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
2270 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2
2271 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
2272 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
2273 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
2274 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
2275 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
2276 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2277 ; GFX11-NEXT: v_pk_add_f16 v0, v0, v4
2278 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2279 %cmp = icmp eq <2 x i32> %c, zeroinitializer
2280 %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
2281 %fneg.y = fneg <2 x half> %y
2282 %select = select <2 x i1> %cmp, <2 x half> %fabs.x, <2 x half> %fneg.y
2283 %add = fadd <2 x half> %select, %z
2287 define <2 x half> @add_select_neg_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y, <2 x half> %z) {
2288 ; CI-LABEL: add_select_neg_negfabs_v2f16:
2290 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2291 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
2292 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
2293 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
2294 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
2295 ; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
2296 ; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
2297 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
2298 ; CI-NEXT: v_cvt_f32_f16_e64 v5, |v5|
2299 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
2300 ; CI-NEXT: v_cvt_f32_f16_e64 v4, |v4|
2301 ; CI-NEXT: v_cvt_f32_f16_e32 v7, v7
2302 ; CI-NEXT: v_cvt_f32_f16_e32 v6, v6
2303 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2304 ; CI-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc
2305 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2306 ; CI-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
2307 ; CI-NEXT: v_sub_f32_e32 v0, v6, v0
2308 ; CI-NEXT: v_sub_f32_e32 v1, v7, v1
2309 ; CI-NEXT: s_setpc_b64 s[30:31]
2311 ; VI-LABEL: add_select_neg_negfabs_v2f16:
2313 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2314 ; VI-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
2315 ; VI-NEXT: v_lshrrev_b32_e32 v5, 16, v2
2316 ; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v3
2317 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2318 ; VI-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc
2319 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2320 ; VI-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
2321 ; VI-NEXT: v_sub_f16_sdwa v1, v4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2322 ; VI-NEXT: v_sub_f16_e32 v0, v4, v0
2323 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
2324 ; VI-NEXT: s_setpc_b64 s[30:31]
2326 ; GFX9-LABEL: add_select_neg_negfabs_v2f16:
2328 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2329 ; GFX9-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
2330 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v2
2331 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3
2332 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2333 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc
2334 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2335 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
2336 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
2337 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
2338 ; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
2339 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2341 ; GFX11-LABEL: add_select_neg_negfabs_v2f16:
2343 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2344 ; GFX11-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
2345 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2
2346 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
2347 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
2348 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v3
2349 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
2350 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
2351 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
2352 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2353 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
2354 ; GFX11-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
2355 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2356 %cmp = icmp eq <2 x i32> %c, zeroinitializer
2357 %fneg.x = fneg <2 x half> %x
2358 %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
2359 %fneg.fabs.y = fneg <2 x half> %fabs.y
2360 %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> %fneg.fabs.y
2361 %add = fadd <2 x half> %select, %z
2365 define <2 x half> @add_select_negfabs_neg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y, <2 x half> %z) {
2366 ; CI-LABEL: add_select_negfabs_neg_v2f16:
2368 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2369 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
2370 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
2371 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
2372 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
2373 ; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
2374 ; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
2375 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
2376 ; CI-NEXT: v_cvt_f32_f16_e64 v3, |v3|
2377 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
2378 ; CI-NEXT: v_cvt_f32_f16_e64 v2, |v2|
2379 ; CI-NEXT: v_cvt_f32_f16_e32 v7, v7
2380 ; CI-NEXT: v_cvt_f32_f16_e32 v6, v6
2381 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2382 ; CI-NEXT: v_cndmask_b32_e32 v1, v3, v5, vcc
2383 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2384 ; CI-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc
2385 ; CI-NEXT: v_sub_f32_e32 v0, v6, v0
2386 ; CI-NEXT: v_sub_f32_e32 v1, v7, v1
2387 ; CI-NEXT: s_setpc_b64 s[30:31]
2389 ; VI-LABEL: add_select_negfabs_neg_v2f16:
2391 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2392 ; VI-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
2393 ; VI-NEXT: v_lshrrev_b32_e32 v5, 16, v3
2394 ; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v2
2395 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2396 ; VI-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc
2397 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2398 ; VI-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
2399 ; VI-NEXT: v_sub_f16_sdwa v1, v4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2400 ; VI-NEXT: v_sub_f16_e32 v0, v4, v0
2401 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
2402 ; VI-NEXT: s_setpc_b64 s[30:31]
2404 ; GFX9-LABEL: add_select_negfabs_neg_v2f16:
2406 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2407 ; GFX9-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
2408 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v3
2409 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v2
2410 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2411 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc
2412 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2413 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
2414 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
2415 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
2416 ; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
2417 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2419 ; GFX11-LABEL: add_select_negfabs_neg_v2f16:
2421 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2422 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v3
2423 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
2424 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
2425 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2426 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v2
2427 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
2428 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
2429 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo
2430 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2431 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
2432 ; GFX11-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
2433 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2434 %cmp = icmp eq <2 x i32> %c, zeroinitializer
2435 %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
2436 %fneg.fabs.x = fneg <2 x half> %fabs.x
2437 %fneg.y = fneg <2 x half> %y
2438 %select = select <2 x i1> %cmp, <2 x half> %fneg.y, <2 x half> %fneg.fabs.x
2439 %add = fadd <2 x half> %select, %z
2443 define <2 x half> @mul_select_negfabs_posk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y) {
2444 ; CI-LABEL: mul_select_negfabs_posk_v2f16:
2446 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2447 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
2448 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
2449 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
2450 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
2451 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
2452 ; CI-NEXT: v_or_b32_e32 v2, v2, v3
2453 ; CI-NEXT: v_or_b32_e32 v2, 0x80008000, v2
2454 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v5
2455 ; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v2
2456 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
2457 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
2458 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
2459 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2460 ; CI-NEXT: v_cndmask_b32_e32 v1, 4.0, v5, vcc
2461 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2462 ; CI-NEXT: v_cndmask_b32_e32 v0, 4.0, v2, vcc
2463 ; CI-NEXT: v_mul_f32_e32 v0, v0, v4
2464 ; CI-NEXT: v_mul_f32_e32 v1, v1, v3
2465 ; CI-NEXT: s_setpc_b64 s[30:31]
2467 ; VI-LABEL: mul_select_negfabs_posk_v2f16:
2469 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2470 ; VI-NEXT: v_or_b32_e32 v2, 0x80008000, v2
2471 ; VI-NEXT: v_mov_b32_e32 v4, 0x4400
2472 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2473 ; VI-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
2474 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2475 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2476 ; VI-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
2477 ; VI-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
2478 ; VI-NEXT: v_mul_f16_e32 v0, v0, v3
2479 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
2480 ; VI-NEXT: s_setpc_b64 s[30:31]
2482 ; GFX9-LABEL: mul_select_negfabs_posk_v2f16:
2484 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2485 ; GFX9-NEXT: v_or_b32_e32 v2, 0x80008000, v2
2486 ; GFX9-NEXT: v_mov_b32_e32 v4, 0x4400
2487 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2488 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
2489 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2490 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2491 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
2492 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
2493 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
2494 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3
2495 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2497 ; GFX11-LABEL: mul_select_negfabs_posk_v2f16:
2499 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2500 ; GFX11-NEXT: v_or_b32_e32 v2, 0x80008000, v2
2501 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
2502 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
2503 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2
2504 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
2505 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
2506 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
2507 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2508 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
2509 ; GFX11-NEXT: v_pk_mul_f16 v0, v0, v3
2510 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2511 %cmp = icmp eq <2 x i32> %c, zeroinitializer
2512 %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
2513 %fneg.fabs.x = fneg <2 x half> %fabs.x
2514 %select = select <2 x i1> %cmp, <2 x half> %fneg.fabs.x, <2 x half> <half 4.0, half 4.0>
2515 %add = fmul <2 x half> %select, %y
2519 define <2 x half> @mul_select_posk_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y) {
2520 ; CI-LABEL: mul_select_posk_negfabs_v2f16:
2522 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2523 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
2524 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
2525 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
2526 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
2527 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
2528 ; CI-NEXT: v_or_b32_e32 v2, v2, v3
2529 ; CI-NEXT: v_or_b32_e32 v2, 0x80008000, v2
2530 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v5
2531 ; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v2
2532 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
2533 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
2534 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
2535 ; CI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
2536 ; CI-NEXT: v_cndmask_b32_e32 v1, 4.0, v5, vcc
2537 ; CI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
2538 ; CI-NEXT: v_cndmask_b32_e32 v0, 4.0, v2, vcc
2539 ; CI-NEXT: v_mul_f32_e32 v0, v0, v4
2540 ; CI-NEXT: v_mul_f32_e32 v1, v1, v3
2541 ; CI-NEXT: s_setpc_b64 s[30:31]
2543 ; VI-LABEL: mul_select_posk_negfabs_v2f16:
2545 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2546 ; VI-NEXT: v_or_b32_e32 v2, 0x80008000, v2
2547 ; VI-NEXT: v_mov_b32_e32 v4, 0x4400
2548 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
2549 ; VI-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
2550 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2551 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
2552 ; VI-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
2553 ; VI-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
2554 ; VI-NEXT: v_mul_f16_e32 v0, v0, v3
2555 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
2556 ; VI-NEXT: s_setpc_b64 s[30:31]
2558 ; GFX9-LABEL: mul_select_posk_negfabs_v2f16:
2560 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2561 ; GFX9-NEXT: v_or_b32_e32 v2, 0x80008000, v2
2562 ; GFX9-NEXT: v_mov_b32_e32 v4, 0x4400
2563 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
2564 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
2565 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2566 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
2567 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
2568 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
2569 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
2570 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3
2571 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2573 ; GFX11-LABEL: mul_select_posk_negfabs_v2f16:
2575 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2576 ; GFX11-NEXT: v_or_b32_e32 v2, 0x80008000, v2
2577 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
2578 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
2579 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2
2580 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
2581 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
2582 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
2583 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2584 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
2585 ; GFX11-NEXT: v_pk_mul_f16 v0, v0, v3
2586 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2587 %cmp = icmp eq <2 x i32> %c, zeroinitializer
2588 %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
2589 %fneg.fabs.x = fneg <2 x half> %fabs.x
2590 %select = select <2 x i1> %cmp, <2 x half> <half 4.0, half 4.0>, <2 x half> %fneg.fabs.x
2591 %add = fmul <2 x half> %select, %y
2595 define <2 x half> @mul_select_negfabs_negk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y) {
2596 ; CI-LABEL: mul_select_negfabs_negk_v2f16:
2598 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2599 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
2600 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
2601 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
2602 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
2603 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
2604 ; CI-NEXT: v_or_b32_e32 v2, v2, v3
2605 ; CI-NEXT: v_or_b32_e32 v2, 0x80008000, v2
2606 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v5
2607 ; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v2
2608 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
2609 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
2610 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
2611 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2612 ; CI-NEXT: v_cndmask_b32_e32 v1, -4.0, v5, vcc
2613 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2614 ; CI-NEXT: v_cndmask_b32_e32 v0, -4.0, v2, vcc
2615 ; CI-NEXT: v_mul_f32_e32 v0, v0, v4
2616 ; CI-NEXT: v_mul_f32_e32 v1, v1, v3
2617 ; CI-NEXT: s_setpc_b64 s[30:31]
2619 ; VI-LABEL: mul_select_negfabs_negk_v2f16:
2621 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2622 ; VI-NEXT: v_or_b32_e32 v2, 0x80008000, v2
2623 ; VI-NEXT: v_mov_b32_e32 v4, 0xc400
2624 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2625 ; VI-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
2626 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2627 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2628 ; VI-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
2629 ; VI-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
2630 ; VI-NEXT: v_mul_f16_e32 v0, v0, v3
2631 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
2632 ; VI-NEXT: s_setpc_b64 s[30:31]
2634 ; GFX9-LABEL: mul_select_negfabs_negk_v2f16:
2636 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2637 ; GFX9-NEXT: v_or_b32_e32 v2, 0x80008000, v2
2638 ; GFX9-NEXT: v_mov_b32_e32 v4, 0xc400
2639 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2640 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
2641 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2642 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2643 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
2644 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
2645 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
2646 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3
2647 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2649 ; GFX11-LABEL: mul_select_negfabs_negk_v2f16:
2651 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2652 ; GFX11-NEXT: v_or_b32_e32 v2, 0x80008000, v2
2653 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
2654 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
2655 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2
2656 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
2657 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
2658 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
2659 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2660 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
2661 ; GFX11-NEXT: v_pk_mul_f16 v0, v0, v3
2662 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2663 %cmp = icmp eq <2 x i32> %c, zeroinitializer
2664 %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
2665 %fneg.fabs.x = fneg <2 x half> %fabs.x
2666 %select = select <2 x i1> %cmp, <2 x half> %fneg.fabs.x, <2 x half> <half -4.0, half -4.0>
2667 %add = fmul <2 x half> %select, %y
2671 define <2 x half> @mul_select_negk_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y) {
2672 ; CI-LABEL: mul_select_negk_negfabs_v2f16:
2674 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2675 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
2676 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
2677 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
2678 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
2679 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
2680 ; CI-NEXT: v_or_b32_e32 v2, v2, v3
2681 ; CI-NEXT: v_or_b32_e32 v2, 0x80008000, v2
2682 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v5
2683 ; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v2
2684 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
2685 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
2686 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
2687 ; CI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
2688 ; CI-NEXT: v_cndmask_b32_e32 v1, -4.0, v5, vcc
2689 ; CI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
2690 ; CI-NEXT: v_cndmask_b32_e32 v0, -4.0, v2, vcc
2691 ; CI-NEXT: v_mul_f32_e32 v0, v0, v4
2692 ; CI-NEXT: v_mul_f32_e32 v1, v1, v3
2693 ; CI-NEXT: s_setpc_b64 s[30:31]
2695 ; VI-LABEL: mul_select_negk_negfabs_v2f16:
2697 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2698 ; VI-NEXT: v_or_b32_e32 v2, 0x80008000, v2
2699 ; VI-NEXT: v_mov_b32_e32 v4, 0xc400
2700 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
2701 ; VI-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
2702 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2703 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
2704 ; VI-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
2705 ; VI-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
2706 ; VI-NEXT: v_mul_f16_e32 v0, v0, v3
2707 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
2708 ; VI-NEXT: s_setpc_b64 s[30:31]
2710 ; GFX9-LABEL: mul_select_negk_negfabs_v2f16:
2712 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2713 ; GFX9-NEXT: v_or_b32_e32 v2, 0x80008000, v2
2714 ; GFX9-NEXT: v_mov_b32_e32 v4, 0xc400
2715 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
2716 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
2717 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2718 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
2719 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
2720 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
2721 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
2722 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3
2723 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2725 ; GFX11-LABEL: mul_select_negk_negfabs_v2f16:
2727 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2728 ; GFX11-NEXT: v_or_b32_e32 v2, 0x80008000, v2
2729 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
2730 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
2731 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2
2732 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
2733 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
2734 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
2735 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2736 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
2737 ; GFX11-NEXT: v_pk_mul_f16 v0, v0, v3
2738 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2739 %cmp = icmp eq <2 x i32> %c, zeroinitializer
2740 %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
2741 %fneg.fabs.x = fneg <2 x half> %fabs.x
2742 %select = select <2 x i1> %cmp, <2 x half> <half -4.0, half -4.0>, <2 x half> %fneg.fabs.x
2743 %add = fmul <2 x half> %select, %y
2747 ; --------------------------------------------------------------------------------
2748 ; Don't fold if fneg can fold into the source
2749 ; --------------------------------------------------------------------------------
2751 define <2 x half> @select_fneg_posk_src_add_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y) {
2752 ; CI-SAFE-LABEL: select_fneg_posk_src_add_v2f16:
2754 ; CI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2755 ; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
2756 ; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
2757 ; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2758 ; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
2759 ; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
2760 ; CI-SAFE-NEXT: v_add_f32_e32 v3, 4.0, v3
2761 ; CI-SAFE-NEXT: v_add_f32_e32 v2, 4.0, v2
2762 ; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
2763 ; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
2764 ; CI-SAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3
2765 ; CI-SAFE-NEXT: v_or_b32_e32 v2, v2, v3
2766 ; CI-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
2767 ; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v2
2768 ; CI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2769 ; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
2770 ; CI-SAFE-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
2771 ; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2772 ; CI-SAFE-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
2773 ; CI-SAFE-NEXT: s_setpc_b64 s[30:31]
2775 ; VI-SAFE-LABEL: select_fneg_posk_src_add_v2f16:
2777 ; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2778 ; VI-SAFE-NEXT: v_mov_b32_e32 v3, 0x4400
2779 ; VI-SAFE-NEXT: v_add_f16_sdwa v3, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2780 ; VI-SAFE-NEXT: v_add_f16_e32 v2, 4.0, v2
2781 ; VI-SAFE-NEXT: v_or_b32_e32 v2, v2, v3
2782 ; VI-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
2783 ; VI-SAFE-NEXT: v_mov_b32_e32 v3, 0x4000
2784 ; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2785 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
2786 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2787 ; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2788 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
2789 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2790 ; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2791 ; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
2793 ; GFX9-SAFE-LABEL: select_fneg_posk_src_add_v2f16:
2794 ; GFX9-SAFE: ; %bb.0:
2795 ; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2796 ; GFX9-SAFE-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0]
2797 ; GFX9-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
2798 ; GFX9-SAFE-NEXT: v_mov_b32_e32 v3, 0x4000
2799 ; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2800 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
2801 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2802 ; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2803 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
2804 ; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100
2805 ; GFX9-SAFE-NEXT: v_perm_b32 v0, v1, v0, s4
2806 ; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
2808 ; GFX11-SAFE-LABEL: select_fneg_posk_src_add_v2f16:
2809 ; GFX11-SAFE: ; %bb.0:
2810 ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2811 ; GFX11-SAFE-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0]
2812 ; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
2813 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
2814 ; GFX11-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
2815 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v2
2816 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
2817 ; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
2818 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
2819 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
2820 ; GFX11-SAFE-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
2821 ; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
2823 ; CI-NSZ-LABEL: select_fneg_posk_src_add_v2f16:
2825 ; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2826 ; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v2, v2
2827 ; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v3, v3
2828 ; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2829 ; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v2, v2
2830 ; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v3, v3
2831 ; CI-NSZ-NEXT: v_sub_f32_e32 v2, -4.0, v2
2832 ; CI-NSZ-NEXT: v_sub_f32_e32 v3, -4.0, v3
2833 ; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
2834 ; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2835 ; CI-NSZ-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
2836 ; CI-NSZ-NEXT: s_setpc_b64 s[30:31]
2838 ; VI-NSZ-LABEL: select_fneg_posk_src_add_v2f16:
2840 ; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2841 ; VI-NSZ-NEXT: v_mov_b32_e32 v3, 0xc400
2842 ; VI-NSZ-NEXT: v_sub_f16_sdwa v3, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
2843 ; VI-NSZ-NEXT: v_sub_f16_e32 v2, -4.0, v2
2844 ; VI-NSZ-NEXT: v_mov_b32_e32 v4, 0x4000
2845 ; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2846 ; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
2847 ; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2848 ; VI-NSZ-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
2849 ; VI-NSZ-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2850 ; VI-NSZ-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2851 ; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
2853 ; GFX9-NSZ-LABEL: select_fneg_posk_src_add_v2f16:
2854 ; GFX9-NSZ: ; %bb.0:
2855 ; GFX9-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2856 ; GFX9-NSZ-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
2857 ; GFX9-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000
2858 ; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2859 ; GFX9-NSZ-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
2860 ; GFX9-NSZ-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2861 ; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2862 ; GFX9-NSZ-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
2863 ; GFX9-NSZ-NEXT: s_mov_b32 s4, 0x5040100
2864 ; GFX9-NSZ-NEXT: v_perm_b32 v0, v1, v0, s4
2865 ; GFX9-NSZ-NEXT: s_setpc_b64 s[30:31]
2867 ; GFX11-NSZ-LABEL: select_fneg_posk_src_add_v2f16:
2868 ; GFX11-NSZ: ; %bb.0:
2869 ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2870 ; GFX11-NSZ-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
2871 ; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
2872 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
2873 ; GFX11-NSZ-NEXT: v_lshrrev_b32_e32 v3, 16, v2
2874 ; GFX11-NSZ-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
2875 ; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
2876 ; GFX11-NSZ-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
2877 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
2878 ; GFX11-NSZ-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
2879 ; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
2880 %cmp = icmp eq <2 x i32> %c, zeroinitializer
2881 %add = fadd <2 x half> %x, <half 4.0, half 4.0>
2882 %fneg = fneg <2 x half> %add
2883 %select = select <2 x i1> %cmp, <2 x half> %fneg, <2 x half> <half 2.0, half 2.0>
2884 ret <2 x half> %select
2887 define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) {
2888 ; CI-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
2890 ; CI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2891 ; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
2892 ; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
2893 ; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2894 ; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
2895 ; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
2896 ; CI-SAFE-NEXT: v_add_f32_e32 v3, -4.0, v3
2897 ; CI-SAFE-NEXT: v_add_f32_e32 v2, -4.0, v2
2898 ; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
2899 ; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
2900 ; CI-SAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3
2901 ; CI-SAFE-NEXT: v_or_b32_e32 v2, v2, v3
2902 ; CI-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
2903 ; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v2
2904 ; CI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2905 ; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
2906 ; CI-SAFE-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
2907 ; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2908 ; CI-SAFE-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
2909 ; CI-SAFE-NEXT: s_setpc_b64 s[30:31]
2911 ; VI-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
2913 ; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2914 ; VI-SAFE-NEXT: v_mov_b32_e32 v3, 0xc400
2915 ; VI-SAFE-NEXT: v_add_f16_sdwa v3, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2916 ; VI-SAFE-NEXT: v_add_f16_e32 v2, -4.0, v2
2917 ; VI-SAFE-NEXT: v_or_b32_e32 v2, v2, v3
2918 ; VI-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
2919 ; VI-SAFE-NEXT: v_mov_b32_e32 v3, 0x4000
2920 ; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2921 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
2922 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2923 ; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2924 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
2925 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2926 ; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2927 ; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
2929 ; GFX9-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
2930 ; GFX9-SAFE: ; %bb.0:
2931 ; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2932 ; GFX9-SAFE-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0]
2933 ; GFX9-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
2934 ; GFX9-SAFE-NEXT: v_mov_b32_e32 v3, 0x4000
2935 ; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2936 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
2937 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2938 ; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2939 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
2940 ; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100
2941 ; GFX9-SAFE-NEXT: v_perm_b32 v0, v1, v0, s4
2942 ; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
2944 ; GFX11-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
2945 ; GFX11-SAFE: ; %bb.0:
2946 ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2947 ; GFX11-SAFE-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0]
2948 ; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
2949 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
2950 ; GFX11-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
2951 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v2
2952 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
2953 ; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
2954 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
2955 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
2956 ; GFX11-SAFE-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
2957 ; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
2959 ; CI-NSZ-LABEL: select_fneg_posk_src_sub_v2f16:
2961 ; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2962 ; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v2, v2
2963 ; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v3, v3
2964 ; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2965 ; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v2, v2
2966 ; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v3, v3
2967 ; CI-NSZ-NEXT: v_sub_f32_e32 v2, 4.0, v2
2968 ; CI-NSZ-NEXT: v_sub_f32_e32 v3, 4.0, v3
2969 ; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
2970 ; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2971 ; CI-NSZ-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
2972 ; CI-NSZ-NEXT: s_setpc_b64 s[30:31]
2974 ; VI-NSZ-LABEL: select_fneg_posk_src_sub_v2f16:
2976 ; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2977 ; VI-NSZ-NEXT: v_mov_b32_e32 v3, 0x4400
2978 ; VI-NSZ-NEXT: v_sub_f16_sdwa v3, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
2979 ; VI-NSZ-NEXT: v_sub_f16_e32 v2, 4.0, v2
2980 ; VI-NSZ-NEXT: v_mov_b32_e32 v4, 0x4000
2981 ; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2982 ; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
2983 ; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2984 ; VI-NSZ-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
2985 ; VI-NSZ-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2986 ; VI-NSZ-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2987 ; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
2989 ; GFX9-NSZ-LABEL: select_fneg_posk_src_sub_v2f16:
2990 ; GFX9-NSZ: ; %bb.0:
2991 ; GFX9-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2992 ; GFX9-NSZ-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
2993 ; GFX9-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000
2994 ; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2995 ; GFX9-NSZ-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
2996 ; GFX9-NSZ-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2997 ; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2998 ; GFX9-NSZ-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
2999 ; GFX9-NSZ-NEXT: s_mov_b32 s4, 0x5040100
3000 ; GFX9-NSZ-NEXT: v_perm_b32 v0, v1, v0, s4
3001 ; GFX9-NSZ-NEXT: s_setpc_b64 s[30:31]
3003 ; GFX11-NSZ-LABEL: select_fneg_posk_src_sub_v2f16:
3004 ; GFX11-NSZ: ; %bb.0:
3005 ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3006 ; GFX11-NSZ-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
3007 ; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3008 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
3009 ; GFX11-NSZ-NEXT: v_lshrrev_b32_e32 v3, 16, v2
3010 ; GFX11-NSZ-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
3011 ; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
3012 ; GFX11-NSZ-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
3013 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
3014 ; GFX11-NSZ-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
3015 ; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
3016 %cmp = icmp eq <2 x i32> %c, zeroinitializer
3017 %add = fsub <2 x half> %x, <half 4.0, half 4.0>
3018 %fneg = fneg <2 x half> %add
3019 %select = select <2 x i1> %cmp, <2 x half> %fneg, <2 x half> <half 2.0, half 2.0>
3020 ret <2 x half> %select
3023 define <2 x half> @select_fneg_posk_src_mul_v2f16(<2 x i32> %c, <2 x half> %x) {
3024 ; CI-LABEL: select_fneg_posk_src_mul_v2f16:
3026 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3027 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
3028 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
3029 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3030 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
3031 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
3032 ; CI-NEXT: v_mul_f32_e32 v2, -4.0, v2
3033 ; CI-NEXT: v_mul_f32_e32 v3, -4.0, v3
3034 ; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
3035 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
3036 ; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
3037 ; CI-NEXT: s_setpc_b64 s[30:31]
3039 ; VI-LABEL: select_fneg_posk_src_mul_v2f16:
3041 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3042 ; VI-NEXT: v_mov_b32_e32 v3, 0xc400
3043 ; VI-NEXT: v_mul_f16_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
3044 ; VI-NEXT: v_mul_f16_e32 v2, -4.0, v2
3045 ; VI-NEXT: v_mov_b32_e32 v4, 0x4000
3046 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3047 ; VI-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
3048 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
3049 ; VI-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
3050 ; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
3051 ; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3052 ; VI-NEXT: s_setpc_b64 s[30:31]
3054 ; GFX9-LABEL: select_fneg_posk_src_mul_v2f16:
3056 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3057 ; GFX9-NEXT: v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0]
3058 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x4000
3059 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3060 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
3061 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
3062 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
3063 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
3064 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
3065 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
3066 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3068 ; GFX11-LABEL: select_fneg_posk_src_mul_v2f16:
3070 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3071 ; GFX11-NEXT: v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0]
3072 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3073 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
3074 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v2
3075 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
3076 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
3077 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
3078 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
3079 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
3080 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3081 %cmp = icmp eq <2 x i32> %c, zeroinitializer
3082 %mul = fmul <2 x half> %x, <half 4.0, half 4.0>
3083 %fneg = fneg <2 x half> %mul
3084 %select = select <2 x i1> %cmp, <2 x half> %fneg, <2 x half> <half 2.0, half 2.0>
3085 ret <2 x half> %select
3088 define <2 x half> @select_fneg_posk_src_fma_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %z) {
3089 ; CI-LABEL: select_fneg_posk_src_fma_v2f16:
3091 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3092 ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
3093 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
3094 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
3095 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
3096 ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
3097 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
3098 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
3099 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
3100 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3101 ; CI-NEXT: v_fma_f32 v3, v3, 4.0, v5
3102 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
3103 ; CI-NEXT: v_fma_f32 v2, v2, 4.0, v4
3104 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
3105 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
3106 ; CI-NEXT: v_or_b32_e32 v2, v2, v3
3107 ; CI-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
3108 ; CI-NEXT: v_cvt_f32_f16_e32 v3, v2
3109 ; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
3110 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
3111 ; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
3112 ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
3113 ; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
3114 ; CI-NEXT: s_setpc_b64 s[30:31]
3116 ; VI-SAFE-LABEL: select_fneg_posk_src_fma_v2f16:
3118 ; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3119 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v3
3120 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v2
3121 ; VI-SAFE-NEXT: v_fma_f16 v4, v5, 4.0, v4
3122 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v4
3123 ; VI-SAFE-NEXT: v_fma_f16 v2, v2, 4.0, v3
3124 ; VI-SAFE-NEXT: v_or_b32_e32 v2, v2, v4
3125 ; VI-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
3126 ; VI-SAFE-NEXT: v_mov_b32_e32 v3, 0x4000
3127 ; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3128 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
3129 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2
3130 ; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
3131 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
3132 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v1, 16, v1
3133 ; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3134 ; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
3136 ; GFX9-SAFE-LABEL: select_fneg_posk_src_fma_v2f16:
3137 ; GFX9-SAFE: ; %bb.0:
3138 ; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3139 ; GFX9-SAFE-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
3140 ; GFX9-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
3141 ; GFX9-SAFE-NEXT: v_mov_b32_e32 v3, 0x4000
3142 ; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3143 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
3144 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2
3145 ; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
3146 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
3147 ; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100
3148 ; GFX9-SAFE-NEXT: v_perm_b32 v0, v1, v0, s4
3149 ; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
3151 ; GFX11-SAFE-LABEL: select_fneg_posk_src_fma_v2f16:
3152 ; GFX11-SAFE: ; %bb.0:
3153 ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3154 ; GFX11-SAFE-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
3155 ; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3156 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3157 ; GFX11-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
3158 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v2
3159 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
3160 ; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
3161 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
3162 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
3163 ; GFX11-SAFE-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
3164 ; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
3166 ; VI-NSZ-LABEL: select_fneg_posk_src_fma_v2f16:
3168 ; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3169 ; VI-NSZ-NEXT: v_lshrrev_b32_e32 v4, 16, v3
3170 ; VI-NSZ-NEXT: v_lshrrev_b32_e32 v5, 16, v2
3171 ; VI-NSZ-NEXT: v_fma_f16 v2, v2, -4.0, -v3
3172 ; VI-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000
3173 ; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3174 ; VI-NSZ-NEXT: v_fma_f16 v4, v5, -4.0, -v4
3175 ; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
3176 ; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
3177 ; VI-NSZ-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
3178 ; VI-NSZ-NEXT: v_lshlrev_b32_e32 v1, 16, v1
3179 ; VI-NSZ-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3180 ; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
3182 ; GFX9-NSZ-LABEL: select_fneg_posk_src_fma_v2f16:
3183 ; GFX9-NSZ: ; %bb.0:
3184 ; GFX9-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3185 ; GFX9-NSZ-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
3186 ; GFX9-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000
3187 ; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3188 ; GFX9-NSZ-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
3189 ; GFX9-NSZ-NEXT: v_lshrrev_b32_e32 v2, 16, v2
3190 ; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
3191 ; GFX9-NSZ-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
3192 ; GFX9-NSZ-NEXT: s_mov_b32 s4, 0x5040100
3193 ; GFX9-NSZ-NEXT: v_perm_b32 v0, v1, v0, s4
3194 ; GFX9-NSZ-NEXT: s_setpc_b64 s[30:31]
3196 ; GFX11-NSZ-LABEL: select_fneg_posk_src_fma_v2f16:
3197 ; GFX11-NSZ: ; %bb.0:
3198 ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3199 ; GFX11-NSZ-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
3200 ; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3201 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
3202 ; GFX11-NSZ-NEXT: v_lshrrev_b32_e32 v3, 16, v2
3203 ; GFX11-NSZ-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
3204 ; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
3205 ; GFX11-NSZ-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
3206 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
3207 ; GFX11-NSZ-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
3208 ; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
3209 %cmp = icmp eq <2 x i32> %c, zeroinitializer
3210 %fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x, <2 x half> <half 4.0, half 4.0>, <2 x half> %z)
3211 %fneg = fneg <2 x half> %fma
3212 %select = select <2 x i1> %cmp, <2 x half> %fneg, <2 x half> <half 2.0, half 2.0>
3213 ret <2 x half> %select
3216 define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %z) {
3217 ; CI-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16:
3219 ; CI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3220 ; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
3221 ; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5
3222 ; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
3223 ; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4
3224 ; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
3225 ; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5
3226 ; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
3227 ; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4
3228 ; CI-SAFE-NEXT: v_mul_f32_e32 v3, 4.0, v3
3229 ; CI-SAFE-NEXT: v_add_f32_e32 v3, v3, v5
3230 ; CI-SAFE-NEXT: v_mul_f32_e32 v2, 4.0, v2
3231 ; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
3232 ; CI-SAFE-NEXT: v_add_f32_e32 v2, v2, v4
3233 ; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
3234 ; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3235 ; CI-SAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3
3236 ; CI-SAFE-NEXT: v_or_b32_e32 v2, v2, v3
3237 ; CI-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
3238 ; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v2
3239 ; CI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2
3240 ; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
3241 ; CI-SAFE-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
3242 ; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
3243 ; CI-SAFE-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
3244 ; CI-SAFE-NEXT: s_setpc_b64 s[30:31]
3246 ; VI-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16:
3248 ; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3249 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v3
3250 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v2
3251 ; VI-SAFE-NEXT: v_fma_f16 v4, v5, 4.0, v4
3252 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v4
3253 ; VI-SAFE-NEXT: v_fma_f16 v2, v2, 4.0, v3
3254 ; VI-SAFE-NEXT: v_or_b32_e32 v2, v2, v4
3255 ; VI-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
3256 ; VI-SAFE-NEXT: v_mov_b32_e32 v3, 0x4000
3257 ; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3258 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
3259 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2
3260 ; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
3261 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
3262 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v1, 16, v1
3263 ; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3264 ; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
3266 ; GFX9-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16:
3267 ; GFX9-SAFE: ; %bb.0:
3268 ; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3269 ; GFX9-SAFE-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
3270 ; GFX9-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
3271 ; GFX9-SAFE-NEXT: v_mov_b32_e32 v3, 0x4000
3272 ; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3273 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
3274 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2
3275 ; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
3276 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
3277 ; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100
3278 ; GFX9-SAFE-NEXT: v_perm_b32 v0, v1, v0, s4
3279 ; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
3281 ; GFX11-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16:
3282 ; GFX11-SAFE: ; %bb.0:
3283 ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3284 ; GFX11-SAFE-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
3285 ; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3286 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3287 ; GFX11-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
3288 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v2
3289 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
3290 ; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
3291 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
3292 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
3293 ; GFX11-SAFE-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
3294 ; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
3296 ; CI-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16:
3298 ; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3299 ; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v2, v2
3300 ; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v3, v3
3301 ; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v4, v4
3302 ; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v5, v5
3303 ; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v2, v2
3304 ; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v3, v3
3305 ; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v4, v4
3306 ; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v5, v5
3307 ; CI-NSZ-NEXT: v_mul_f32_e32 v2, -4.0, v2
3308 ; CI-NSZ-NEXT: v_mul_f32_e32 v3, -4.0, v3
3309 ; CI-NSZ-NEXT: v_sub_f32_e32 v2, v2, v4
3310 ; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3311 ; CI-NSZ-NEXT: v_sub_f32_e32 v3, v3, v5
3312 ; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
3313 ; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
3314 ; CI-NSZ-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
3315 ; CI-NSZ-NEXT: s_setpc_b64 s[30:31]
3317 ; VI-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16:
3319 ; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3320 ; VI-NSZ-NEXT: v_lshrrev_b32_e32 v4, 16, v3
3321 ; VI-NSZ-NEXT: v_lshrrev_b32_e32 v5, 16, v2
3322 ; VI-NSZ-NEXT: v_fma_f16 v2, v2, -4.0, -v3
3323 ; VI-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000
3324 ; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3325 ; VI-NSZ-NEXT: v_fma_f16 v4, v5, -4.0, -v4
3326 ; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
3327 ; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
3328 ; VI-NSZ-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
3329 ; VI-NSZ-NEXT: v_lshlrev_b32_e32 v1, 16, v1
3330 ; VI-NSZ-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3331 ; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
3333 ; GFX9-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16:
3334 ; GFX9-NSZ: ; %bb.0:
3335 ; GFX9-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3336 ; GFX9-NSZ-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
3337 ; GFX9-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000
3338 ; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
3339 ; GFX9-NSZ-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
3340 ; GFX9-NSZ-NEXT: v_lshrrev_b32_e32 v2, 16, v2
3341 ; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
3342 ; GFX9-NSZ-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
3343 ; GFX9-NSZ-NEXT: s_mov_b32 s4, 0x5040100
3344 ; GFX9-NSZ-NEXT: v_perm_b32 v0, v1, v0, s4
3345 ; GFX9-NSZ-NEXT: s_setpc_b64 s[30:31]
3347 ; GFX11-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16:
3348 ; GFX11-NSZ: ; %bb.0:
3349 ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3350 ; GFX11-NSZ-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
3351 ; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
3352 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
3353 ; GFX11-NSZ-NEXT: v_lshrrev_b32_e32 v3, 16, v2
3354 ; GFX11-NSZ-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
3355 ; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
3356 ; GFX11-NSZ-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
3357 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
3358 ; GFX11-NSZ-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
3359 ; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
3360 %cmp = icmp eq <2 x i32> %c, zeroinitializer
3361 %fmad = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> %x, <2 x half> <half 4.0, half 4.0>, <2 x half> %z)
3362 %fneg = fneg <2 x half> %fmad
3363 %select = select <2 x i1> %cmp, <2 x half> %fneg, <2 x half> <half 2.0, half 2.0>
3364 ret <2 x half> %select
3367 declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0
3368 declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) #0
3369 declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #0
3371 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }