1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s
6 define i32 @fneg_xor_select_i32(i1 %cond, i32 %arg0, i32 %arg1) {
7 ; GCN-LABEL: fneg_xor_select_i32:
9 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10 ; GCN-NEXT: v_and_b32_e32 v0, 1, v0
11 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
12 ; GCN-NEXT: v_cndmask_b32_e64 v0, -v2, -v1, vcc
13 ; GCN-NEXT: s_setpc_b64 s[30:31]
15 ; GFX11-LABEL: fneg_xor_select_i32:
17 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
19 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
20 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
21 ; GFX11-NEXT: v_cndmask_b32_e64 v0, -v2, -v1, vcc_lo
22 ; GFX11-NEXT: s_setpc_b64 s[30:31]
23 %select = select i1 %cond, i32 %arg0, i32 %arg1
24 %fneg = xor i32 %select, -2147483648
28 define <2 x i32> @fneg_xor_select_v2i32(<2 x i1> %cond, <2 x i32> %arg0, <2 x i32> %arg1) {
29 ; GCN-LABEL: fneg_xor_select_v2i32:
31 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
32 ; GCN-NEXT: v_and_b32_e32 v0, 1, v0
33 ; GCN-NEXT: v_and_b32_e32 v1, 1, v1
34 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
35 ; GCN-NEXT: v_cndmask_b32_e64 v0, -v4, -v2, vcc
36 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
37 ; GCN-NEXT: v_cndmask_b32_e64 v1, -v5, -v3, vcc
38 ; GCN-NEXT: s_setpc_b64 s[30:31]
40 ; GFX11-LABEL: fneg_xor_select_v2i32:
42 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
44 ; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
45 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
46 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
47 ; GFX11-NEXT: v_cndmask_b32_e64 v0, -v4, -v2, vcc_lo
48 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
49 ; GFX11-NEXT: v_cndmask_b32_e64 v1, -v5, -v3, vcc_lo
50 ; GFX11-NEXT: s_setpc_b64 s[30:31]
51 %select = select <2 x i1> %cond, <2 x i32> %arg0, <2 x i32> %arg1
52 %fneg = xor <2 x i32> %select, <i32 -2147483648, i32 -2147483648>
56 define i32 @fneg_xor_select_i32_multi_use(i1 %cond, i32 %arg0, i32 %arg1, ptr addrspace(1) %ptr) {
57 ; GFX7-LABEL: fneg_xor_select_i32_multi_use:
59 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60 ; GFX7-NEXT: v_and_b32_e32 v0, 1, v0
61 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
62 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
63 ; GFX7-NEXT: flat_store_dword v[3:4], v0
64 ; GFX7-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
65 ; GFX7-NEXT: s_waitcnt vmcnt(0)
66 ; GFX7-NEXT: s_setpc_b64 s[30:31]
68 ; GFX9-LABEL: fneg_xor_select_i32_multi_use:
70 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
71 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
72 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
73 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
74 ; GFX9-NEXT: global_store_dword v[3:4], v0, off
75 ; GFX9-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
76 ; GFX9-NEXT: s_waitcnt vmcnt(0)
77 ; GFX9-NEXT: s_setpc_b64 s[30:31]
79 ; GFX11-LABEL: fneg_xor_select_i32_multi_use:
81 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
83 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
84 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
85 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo
86 ; GFX11-NEXT: v_xor_b32_e32 v0, 0x80000000, v1
87 ; GFX11-NEXT: global_store_b32 v[3:4], v1, off
88 ; GFX11-NEXT: s_setpc_b64 s[30:31]
89 %select = select i1 %cond, i32 %arg0, i32 %arg1
90 store i32 %select, ptr addrspace(1) %ptr
91 %fneg = xor i32 %select, -2147483648
95 define i64 @fneg_xor_select_i64(i1 %cond, i64 %arg0, i64 %arg1) {
96 ; GCN-LABEL: fneg_xor_select_i64:
98 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99 ; GCN-NEXT: v_and_b32_e32 v0, 1, v0
100 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
101 ; GCN-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
102 ; GCN-NEXT: v_cndmask_b32_e64 v1, -v4, -v2, vcc
103 ; GCN-NEXT: s_setpc_b64 s[30:31]
105 ; GFX11-LABEL: fneg_xor_select_i64:
107 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
109 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
110 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
111 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc_lo
112 ; GFX11-NEXT: v_cndmask_b32_e64 v1, -v4, -v2, vcc_lo
113 ; GFX11-NEXT: s_setpc_b64 s[30:31]
114 %select = select i1 %cond, i64 %arg0, i64 %arg1
115 %fneg = xor i64 %select, 9223372036854775808
119 define <2 x i64> @fneg_xor_select_v2i64(<2 x i1> %cond, <2 x i64> %arg0, <2 x i64> %arg1) {
120 ; GCN-LABEL: fneg_xor_select_v2i64:
122 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123 ; GCN-NEXT: v_and_b32_e32 v1, 1, v1
124 ; GCN-NEXT: v_and_b32_e32 v0, 1, v0
125 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
126 ; GCN-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v1
127 ; GCN-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc
128 ; GCN-NEXT: v_cndmask_b32_e64 v2, v8, v4, s[4:5]
129 ; GCN-NEXT: v_cndmask_b32_e64 v1, -v7, -v3, vcc
130 ; GCN-NEXT: v_cndmask_b32_e64 v3, -v9, -v5, s[4:5]
131 ; GCN-NEXT: s_setpc_b64 s[30:31]
133 ; GFX11-LABEL: fneg_xor_select_v2i64:
135 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
137 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
138 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
139 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v6, v2 :: v_dual_and_b32 v1, 1, v1
140 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v1
141 ; GFX11-NEXT: v_cndmask_b32_e64 v1, -v7, -v3, vcc_lo
142 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
143 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, v4, s0
144 ; GFX11-NEXT: v_cndmask_b32_e64 v3, -v9, -v5, s0
145 ; GFX11-NEXT: s_setpc_b64 s[30:31]
146 %select = select <2 x i1> %cond, <2 x i64> %arg0, <2 x i64> %arg1
147 %fneg = xor <2 x i64> %select, <i64 9223372036854775808, i64 9223372036854775808>
151 define i16 @fneg_xor_select_i16(i1 %cond, i16 %arg0, i16 %arg1) {
152 ; GCN-LABEL: fneg_xor_select_i16:
154 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
155 ; GCN-NEXT: v_and_b32_e32 v0, 1, v0
156 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
157 ; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
158 ; GCN-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
159 ; GCN-NEXT: s_setpc_b64 s[30:31]
161 ; GFX11-LABEL: fneg_xor_select_i16:
163 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
165 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
166 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
167 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
168 ; GFX11-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
169 ; GFX11-NEXT: s_setpc_b64 s[30:31]
170 %select = select i1 %cond, i16 %arg0, i16 %arg1
171 %fneg = xor i16 %select, -32768
175 define <2 x i16> @fneg_xor_select_v2i16(<2 x i1> %cond, <2 x i16> %arg0, <2 x i16> %arg1) {
176 ; GFX7-LABEL: fneg_xor_select_v2i16:
178 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
179 ; GFX7-NEXT: v_and_b32_e32 v0, 1, v0
180 ; GFX7-NEXT: v_and_b32_e32 v1, 1, v1
181 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
182 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
183 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
184 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc
185 ; GFX7-NEXT: v_xor_b32_e32 v1, 0x8000, v1
186 ; GFX7-NEXT: v_xor_b32_e32 v0, 0x8000, v0
187 ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v1
188 ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
189 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v2
190 ; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1
191 ; GFX7-NEXT: s_setpc_b64 s[30:31]
193 ; GFX9-LABEL: fneg_xor_select_v2i16:
195 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
196 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
197 ; GFX9-NEXT: v_and_b32_e32 v1, 1, v1
198 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
199 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
200 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
201 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
202 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
203 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
204 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
205 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
206 ; GFX9-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
207 ; GFX9-NEXT: s_setpc_b64 s[30:31]
209 ; GFX11-LABEL: fneg_xor_select_v2i16:
211 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
213 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2
214 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v3
215 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
216 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
217 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v3, v2 :: v_dual_and_b32 v1, 1, v1
218 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
219 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
220 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc_lo
221 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
222 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
223 ; GFX11-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
224 ; GFX11-NEXT: s_setpc_b64 s[30:31]
225 %select = select <2 x i1> %cond, <2 x i16> %arg0, <2 x i16> %arg1
226 %fneg = xor <2 x i16> %select, <i16 -32768, i16 -32768>
230 define i16 @fneg_xor_select_i16_multi_use(i1 %cond, i16 %arg0, i16 %arg1, ptr addrspace(1) %ptr) {
231 ; GFX7-LABEL: fneg_xor_select_i16_multi_use:
233 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
234 ; GFX7-NEXT: v_and_b32_e32 v0, 1, v0
235 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
236 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
237 ; GFX7-NEXT: flat_store_short v[3:4], v0
238 ; GFX7-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
239 ; GFX7-NEXT: s_waitcnt vmcnt(0)
240 ; GFX7-NEXT: s_setpc_b64 s[30:31]
242 ; GFX9-LABEL: fneg_xor_select_i16_multi_use:
244 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
246 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
247 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
248 ; GFX9-NEXT: global_store_short v[3:4], v0, off
249 ; GFX9-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
250 ; GFX9-NEXT: s_waitcnt vmcnt(0)
251 ; GFX9-NEXT: s_setpc_b64 s[30:31]
253 ; GFX11-LABEL: fneg_xor_select_i16_multi_use:
255 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
256 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
257 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
258 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
259 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo
260 ; GFX11-NEXT: v_xor_b32_e32 v0, 0xffff8000, v1
261 ; GFX11-NEXT: global_store_b16 v[3:4], v1, off
262 ; GFX11-NEXT: s_setpc_b64 s[30:31]
263 %select = select i1 %cond, i16 %arg0, i16 %arg1
264 store i16 %select, ptr addrspace(1) %ptr
265 %fneg = xor i16 %select, -32768
269 define i64 @fneg_xor_select_i64_multi_user(i1 %cond, i64 %arg0, i64 %arg1, ptr addrspace(1) %ptr) {
270 ; GFX7-LABEL: fneg_xor_select_i64_multi_user:
272 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273 ; GFX7-NEXT: v_and_b32_e32 v0, 1, v0
274 ; GFX7-NEXT: v_mov_b32_e32 v7, v1
275 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
276 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
277 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v7, vcc
278 ; GFX7-NEXT: flat_store_dwordx2 v[5:6], v[0:1]
279 ; GFX7-NEXT: v_cndmask_b32_e64 v1, -v4, -v2, vcc
280 ; GFX7-NEXT: s_waitcnt vmcnt(0)
281 ; GFX7-NEXT: s_setpc_b64 s[30:31]
283 ; GFX9-LABEL: fneg_xor_select_i64_multi_user:
285 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
286 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
287 ; GFX9-NEXT: v_mov_b32_e32 v7, v1
288 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
289 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
290 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v7, vcc
291 ; GFX9-NEXT: global_store_dwordx2 v[5:6], v[0:1], off
292 ; GFX9-NEXT: v_cndmask_b32_e64 v1, -v4, -v2, vcc
293 ; GFX9-NEXT: s_waitcnt vmcnt(0)
294 ; GFX9-NEXT: s_setpc_b64 s[30:31]
296 ; GFX11-LABEL: fneg_xor_select_i64_multi_user:
298 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
299 ; GFX11-NEXT: v_dual_mov_b32 v7, v1 :: v_dual_and_b32 v0, 1, v0
300 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
301 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
302 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v4, v2 :: v_dual_cndmask_b32 v0, v3, v7
303 ; GFX11-NEXT: v_cndmask_b32_e64 v2, -v4, -v2, vcc_lo
304 ; GFX11-NEXT: global_store_b64 v[5:6], v[0:1], off
305 ; GFX11-NEXT: v_mov_b32_e32 v1, v2
306 ; GFX11-NEXT: s_setpc_b64 s[30:31]
307 %select = select i1 %cond, i64 %arg0, i64 %arg1
308 store i64 %select, ptr addrspace(1) %ptr
309 %fneg = xor i64 %select, 9223372036854775808
313 define i32 @select_fneg_xor_select_i32(i1 %cond0, i1 %cond1, i32 %arg0, i32 %arg1) {
314 ; GCN-LABEL: select_fneg_xor_select_i32:
316 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
317 ; GCN-NEXT: v_and_b32_e32 v0, 1, v0
318 ; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
319 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
320 ; GCN-NEXT: v_and_b32_e32 v1, 1, v1
321 ; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
322 ; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v0
323 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
324 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
325 ; GCN-NEXT: s_setpc_b64 s[30:31]
327 ; GFX11-LABEL: select_fneg_xor_select_i32:
329 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
330 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
331 ; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
332 ; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
333 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
334 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
335 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo
336 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
337 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
338 ; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v0
339 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
340 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
341 ; GFX11-NEXT: s_setpc_b64 s[30:31]
342 %fneg0 = xor i32 %arg0, -2147483648
343 %select0 = select i1 %cond0, i32 %arg1, i32 %fneg0
344 %fneg1 = xor i32 %select0, -2147483648
345 %select1 = select i1 %cond1, i32 %fneg1, i32 %select0
349 define float @select_fneg_select_f32(i1 %cond0, i1 %cond1, float %arg0, float %arg1) {
350 ; GCN-LABEL: select_fneg_select_f32:
352 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
353 ; GCN-NEXT: v_and_b32_e32 v0, 1, v0
354 ; GCN-NEXT: v_and_b32_e32 v1, 1, v1
355 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
356 ; GCN-NEXT: v_cndmask_b32_e64 v0, -v2, v3, vcc
357 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
358 ; GCN-NEXT: v_cndmask_b32_e64 v0, v0, -v0, vcc
359 ; GCN-NEXT: s_setpc_b64 s[30:31]
361 ; GFX11-LABEL: select_fneg_select_f32:
363 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
365 ; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
366 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
367 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
368 ; GFX11-NEXT: v_cndmask_b32_e64 v0, -v2, v3, vcc_lo
369 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
370 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
371 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, -v0, vcc_lo
372 ; GFX11-NEXT: s_setpc_b64 s[30:31]
373 %fneg0 = fneg float %arg0
374 %select0 = select i1 %cond0, float %arg1, float %fneg0
375 %fneg1 = fneg float %select0
376 %select1 = select i1 %cond1, float %fneg1, float %select0
380 define double @fneg_xor_select_f64(i1 %cond, double %arg0, double %arg1) {
381 ; GCN-LABEL: fneg_xor_select_f64:
383 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384 ; GCN-NEXT: v_and_b32_e32 v0, 1, v0
385 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
386 ; GCN-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
387 ; GCN-NEXT: v_cndmask_b32_e64 v1, -v4, -v2, vcc
388 ; GCN-NEXT: s_setpc_b64 s[30:31]
390 ; GFX11-LABEL: fneg_xor_select_f64:
392 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
393 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
394 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
395 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
396 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc_lo
397 ; GFX11-NEXT: v_cndmask_b32_e64 v1, -v4, -v2, vcc_lo
398 ; GFX11-NEXT: s_setpc_b64 s[30:31]
399 %select = select i1 %cond, double %arg0, double %arg1
400 %fneg = fneg double %select
404 define double @fneg_xor_select_f64_multi_user(i1 %cond, double %arg0, double %arg1, ptr addrspace(1) %ptr) {
405 ; GFX7-LABEL: fneg_xor_select_f64_multi_user:
407 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
408 ; GFX7-NEXT: v_and_b32_e32 v0, 1, v0
409 ; GFX7-NEXT: v_mov_b32_e32 v7, v1
410 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
411 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
412 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v7, vcc
413 ; GFX7-NEXT: flat_store_dwordx2 v[5:6], v[0:1]
414 ; GFX7-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
415 ; GFX7-NEXT: s_waitcnt vmcnt(0)
416 ; GFX7-NEXT: s_setpc_b64 s[30:31]
418 ; GFX9-LABEL: fneg_xor_select_f64_multi_user:
420 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
421 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
422 ; GFX9-NEXT: v_mov_b32_e32 v7, v1
423 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
424 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
425 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v7, vcc
426 ; GFX9-NEXT: global_store_dwordx2 v[5:6], v[0:1], off
427 ; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
428 ; GFX9-NEXT: s_waitcnt vmcnt(0)
429 ; GFX9-NEXT: s_setpc_b64 s[30:31]
431 ; GFX11-LABEL: fneg_xor_select_f64_multi_user:
433 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
434 ; GFX11-NEXT: v_dual_mov_b32 v7, v1 :: v_dual_and_b32 v0, 1, v0
435 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
436 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
437 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v4, v2 :: v_dual_cndmask_b32 v0, v3, v7
438 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
439 ; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v1
440 ; GFX11-NEXT: global_store_b64 v[5:6], v[0:1], off
441 ; GFX11-NEXT: v_mov_b32_e32 v1, v2
442 ; GFX11-NEXT: s_setpc_b64 s[30:31]
443 %select = select i1 %cond, double %arg0, double %arg1
444 store double %select, ptr addrspace(1) %ptr
445 %fneg = fneg double %select
449 define double @fneg_xor_select_i64_user_with_srcmods(i1 %cond, i64 %arg0, i64 %arg1) {
450 ; GCN-LABEL: fneg_xor_select_i64_user_with_srcmods:
452 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
453 ; GCN-NEXT: v_and_b32_e32 v0, 1, v0
454 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
455 ; GCN-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
456 ; GCN-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
457 ; GCN-NEXT: v_add_f64 v[0:1], -v[1:2], 2.0
458 ; GCN-NEXT: s_setpc_b64 s[30:31]
460 ; GFX11-LABEL: fneg_xor_select_i64_user_with_srcmods:
462 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
463 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
464 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
465 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
466 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v3, v1 :: v_dual_cndmask_b32 v2, v4, v2
467 ; GFX11-NEXT: v_add_f64 v[0:1], -v[1:2], 2.0
468 ; GFX11-NEXT: s_setpc_b64 s[30:31]
469 %select = select i1 %cond, i64 %arg0, i64 %arg1
470 %fneg = xor i64 %select, 9223372036854775808
471 %cast = bitcast i64 %fneg to double
472 %add = fadd double %cast, 2.0
476 define double @select_fneg_select_fneg_f64(i1 %cond0, i1 %cond1, double %arg0, double %arg1) {
477 ; GCN-LABEL: select_fneg_select_fneg_f64:
479 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
480 ; GCN-NEXT: v_and_b32_e32 v0, 1, v0
481 ; GCN-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
482 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
483 ; GCN-NEXT: v_and_b32_e32 v1, 1, v1
484 ; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc
485 ; GCN-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc
486 ; GCN-NEXT: v_xor_b32_e32 v3, 0x80000000, v2
487 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
488 ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
489 ; GCN-NEXT: s_setpc_b64 s[30:31]
491 ; GFX11-LABEL: select_fneg_select_fneg_f64:
493 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
494 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
495 ; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
496 ; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
497 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
498 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
499 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc_lo
500 ; GFX11-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc_lo
501 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
502 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
503 ; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v2
504 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
505 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo
506 ; GFX11-NEXT: s_setpc_b64 s[30:31]
507 %fneg0 = fneg double %arg0
508 %select0 = select i1 %cond0, double %arg1, double %fneg0
509 %fneg1 = fneg double %select0
510 %select1 = select i1 %cond1, double %fneg1, double %select0
514 define i64 @select_fneg_xor_select_i64(i1 %cond0, i1 %cond1, i64 %arg0, i64 %arg1) {
515 ; GCN-LABEL: select_fneg_xor_select_i64:
517 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
518 ; GCN-NEXT: v_and_b32_e32 v0, 1, v0
519 ; GCN-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
520 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
521 ; GCN-NEXT: v_and_b32_e32 v1, 1, v1
522 ; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc
523 ; GCN-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc
524 ; GCN-NEXT: v_xor_b32_e32 v3, 0x80000000, v2
525 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
526 ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
527 ; GCN-NEXT: s_setpc_b64 s[30:31]
529 ; GFX11-LABEL: select_fneg_xor_select_i64:
531 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
532 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
533 ; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
534 ; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
535 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
536 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
537 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc_lo
538 ; GFX11-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc_lo
539 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
540 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
541 ; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v2
542 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
543 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo
544 ; GFX11-NEXT: s_setpc_b64 s[30:31]
545 %fneg0 = xor i64 %arg0, 9223372036854775808
546 %select0 = select i1 %cond0, i64 %arg1, i64 %fneg0
547 %fneg1 = xor i64 %select0, 9223372036854775808
548 %select1 = select i1 %cond1, i64 %fneg1, i64 %select0
552 define half @select_fneg_select_f16(i1 %cond0, i1 %cond1, half %arg0, half %arg1) {
553 ; GFX7-LABEL: select_fneg_select_f16:
555 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
556 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
557 ; GFX7-NEXT: v_cvt_f16_f32_e64 v2, -v2
558 ; GFX7-NEXT: v_and_b32_e32 v0, 1, v0
559 ; GFX7-NEXT: v_and_b32_e32 v1, 1, v1
560 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
561 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
562 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
563 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
564 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
565 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, -v0, vcc
566 ; GFX7-NEXT: s_setpc_b64 s[30:31]
568 ; GFX9-LABEL: select_fneg_select_f16:
570 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
571 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
572 ; GFX9-NEXT: v_xor_b32_e32 v2, 0x8000, v2
573 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
574 ; GFX9-NEXT: v_and_b32_e32 v1, 1, v1
575 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
576 ; GFX9-NEXT: v_xor_b32_e32 v2, 0x8000, v0
577 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
578 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
579 ; GFX9-NEXT: s_setpc_b64 s[30:31]
581 ; GFX11-LABEL: select_fneg_select_f16:
583 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
584 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
585 ; GFX11-NEXT: v_xor_b32_e32 v2, 0x8000, v2
586 ; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
587 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
588 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
589 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo
590 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
591 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
592 ; GFX11-NEXT: v_xor_b32_e32 v2, 0x8000, v0
593 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
594 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
595 ; GFX11-NEXT: s_setpc_b64 s[30:31]
596 %fneg0 = fneg half %arg0
597 %select0 = select i1 %cond0, half %arg1, half %fneg0
598 %fneg1 = fneg half %select0
599 %select1 = select i1 %cond1, half %fneg1, half %select0
603 define i16 @select_fneg_xor_select_i16(i1 %cond0, i1 %cond1, i16 %arg0, i16 %arg1) {
604 ; GCN-LABEL: select_fneg_xor_select_i16:
606 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
607 ; GCN-NEXT: v_and_b32_e32 v0, 1, v0
608 ; GCN-NEXT: v_xor_b32_e32 v2, 0xffff8000, v2
609 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
610 ; GCN-NEXT: v_and_b32_e32 v1, 1, v1
611 ; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
612 ; GCN-NEXT: v_xor_b32_e32 v2, 0xffff8000, v0
613 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
614 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
615 ; GCN-NEXT: s_setpc_b64 s[30:31]
617 ; GFX11-LABEL: select_fneg_xor_select_i16:
619 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
620 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
621 ; GFX11-NEXT: v_xor_b32_e32 v2, 0xffff8000, v2
622 ; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
623 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
624 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
625 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo
626 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
627 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
628 ; GFX11-NEXT: v_xor_b32_e32 v2, 0xffff8000, v0
629 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
630 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
631 ; GFX11-NEXT: s_setpc_b64 s[30:31]
632 %fneg0 = xor i16 %arg0, -32768
633 %select0 = select i1 %cond0, i16 %arg1, i16 %fneg0
634 %fneg1 = xor i16 %select0, -32768
635 %select1 = select i1 %cond1, i16 %fneg1, i16 %select0
639 define <2 x half> @select_fneg_select_v2f16(<2 x i1> %cond0, <2 x i1> %cond1, <2 x half> %arg0, <2 x half> %arg1) {
640 ; GFX7-LABEL: select_fneg_select_v2f16:
642 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
643 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
644 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
645 ; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
646 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
647 ; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5
648 ; GFX7-NEXT: v_or_b32_e32 v4, v4, v5
649 ; GFX7-NEXT: v_xor_b32_e32 v4, 0x80008000, v4
650 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v6
651 ; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v7
652 ; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v4
653 ; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
654 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
655 ; GFX7-NEXT: v_and_b32_e32 v1, 1, v1
656 ; GFX7-NEXT: v_and_b32_e32 v0, 1, v0
657 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
658 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
659 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
660 ; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v1
661 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
662 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v0
663 ; GFX7-NEXT: v_and_b32_e32 v2, 1, v2
664 ; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v6
665 ; GFX7-NEXT: v_and_b32_e32 v3, 1, v3
666 ; GFX7-NEXT: v_or_b32_e32 v4, v4, v5
667 ; GFX7-NEXT: v_xor_b32_e32 v4, 0x80008000, v4
668 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v4
669 ; GFX7-NEXT: v_lshrrev_b32_e32 v4, 16, v4
670 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
671 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
672 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
673 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3
674 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
675 ; GFX7-NEXT: s_setpc_b64 s[30:31]
677 ; GFX9-LABEL: select_fneg_select_v2f16:
679 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
680 ; GFX9-NEXT: v_and_b32_e32 v1, 1, v1
681 ; GFX9-NEXT: v_xor_b32_e32 v4, 0x80008000, v4
682 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
683 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v5
684 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v4
685 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
686 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
687 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
688 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
689 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
690 ; GFX9-NEXT: v_perm_b32 v4, v1, v0, s4
691 ; GFX9-NEXT: v_and_b32_e32 v3, 1, v3
692 ; GFX9-NEXT: v_xor_b32_e32 v4, 0x80008000, v4
693 ; GFX9-NEXT: v_and_b32_e32 v2, 1, v2
694 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v4
695 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3
696 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
697 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
698 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
699 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
700 ; GFX9-NEXT: s_setpc_b64 s[30:31]
702 ; GFX11-LABEL: select_fneg_select_v2f16:
704 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
705 ; GFX11-NEXT: v_xor_b32_e32 v4, 0x80008000, v4
706 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v5
707 ; GFX11-NEXT: v_and_b32_e32 v3, 1, v3
708 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
709 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v4
710 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
711 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
712 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc_lo
713 ; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
714 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
715 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
716 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
717 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
718 ; GFX11-NEXT: v_perm_b32 v4, v1, v0, 0x5040100
719 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
720 ; GFX11-NEXT: v_xor_b32_e32 v4, 0x80008000, v4
721 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v4
722 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
723 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v5 :: v_dual_and_b32 v2, 1, v2
724 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
725 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
726 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
727 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
728 ; GFX11-NEXT: s_setpc_b64 s[30:31]
729 %fneg0 = fneg <2 x half> %arg0
730 %select0 = select <2 x i1> %cond0, <2 x half> %arg1, <2 x half> %fneg0
731 %fneg1 = fneg <2 x half> %select0
732 %select1 = select <2 x i1> %cond1, <2 x half> %fneg1, <2 x half> %select0
733 ret <2 x half> %select1
736 define <2 x i16> @select_fneg_xor_select_v2i16(<2 x i1> %cond0, <2 x i1> %cond1, <2 x i16> %arg0, <2 x i16> %arg1) {
737 ; GFX7-LABEL: select_fneg_xor_select_v2i16:
739 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
740 ; GFX7-NEXT: v_and_b32_e32 v1, 1, v1
741 ; GFX7-NEXT: v_and_b32_e32 v0, 1, v0
742 ; GFX7-NEXT: v_xor_b32_e32 v5, 0xffff8000, v5
743 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
744 ; GFX7-NEXT: v_and_b32_e32 v3, 1, v3
745 ; GFX7-NEXT: v_xor_b32_e32 v4, 0xffff8000, v4
746 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc
747 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
748 ; GFX7-NEXT: v_and_b32_e32 v2, 1, v2
749 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc
750 ; GFX7-NEXT: v_xor_b32_e32 v5, 0x8000, v1
751 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3
752 ; GFX7-NEXT: v_xor_b32_e32 v4, 0x8000, v0
753 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
754 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
755 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
756 ; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v1
757 ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
758 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v3
759 ; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1
760 ; GFX7-NEXT: s_setpc_b64 s[30:31]
762 ; GFX9-LABEL: select_fneg_xor_select_v2i16:
764 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
765 ; GFX9-NEXT: v_and_b32_e32 v1, 1, v1
766 ; GFX9-NEXT: v_xor_b32_e32 v4, 0x80008000, v4
767 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
768 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v5
769 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v4
770 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
771 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
772 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
773 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
774 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
775 ; GFX9-NEXT: v_perm_b32 v4, v1, v0, s4
776 ; GFX9-NEXT: v_and_b32_e32 v3, 1, v3
777 ; GFX9-NEXT: v_xor_b32_e32 v4, 0x80008000, v4
778 ; GFX9-NEXT: v_and_b32_e32 v2, 1, v2
779 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v4
780 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3
781 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
782 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
783 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
784 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
785 ; GFX9-NEXT: s_setpc_b64 s[30:31]
787 ; GFX11-LABEL: select_fneg_xor_select_v2i16:
789 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
790 ; GFX11-NEXT: v_xor_b32_e32 v4, 0x80008000, v4
791 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v5
792 ; GFX11-NEXT: v_and_b32_e32 v3, 1, v3
793 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
794 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v4
795 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
796 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
797 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc_lo
798 ; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
799 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
800 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
801 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
802 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
803 ; GFX11-NEXT: v_perm_b32 v4, v1, v0, 0x5040100
804 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
805 ; GFX11-NEXT: v_xor_b32_e32 v4, 0x80008000, v4
806 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v4
807 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
808 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v5 :: v_dual_and_b32 v2, 1, v2
809 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
810 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
811 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
812 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
813 ; GFX11-NEXT: s_setpc_b64 s[30:31]
814 %fneg0 = xor <2 x i16> %arg0, <i16 -32768, i16 -32768>
815 %select0 = select <2 x i1> %cond0, <2 x i16> %arg1, <2 x i16> %fneg0
816 %fneg1 = xor <2 x i16> %select0, <i16 -32768, i16 -32768>
817 %select1 = select <2 x i1> %cond1, <2 x i16> %fneg1, <2 x i16> %select0
818 ret <2 x i16> %select1
821 ; pattern that appeared in rocm-device-libs to manually operate on the
822 ; sign bit of the high half of a double
823 define double @cospiD_pattern0(i32 %arg, double %arg1, double %arg2) {
824 ; GCN-LABEL: cospiD_pattern0:
826 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
827 ; GCN-NEXT: v_and_b32_e32 v5, 1, v0
828 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
829 ; GCN-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc
830 ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc
831 ; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 1, v0
832 ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
833 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 31, v0
834 ; GCN-NEXT: v_xor_b32_e32 v1, v1, v0
835 ; GCN-NEXT: v_mov_b32_e32 v0, v3
836 ; GCN-NEXT: s_setpc_b64 s[30:31]
838 ; GFX11-LABEL: cospiD_pattern0:
840 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
841 ; GFX11-NEXT: v_and_b32_e32 v5, 1, v0
842 ; GFX11-NEXT: v_cmp_lt_i32_e64 s0, 1, v0
843 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
844 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5
845 ; GFX11-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0
846 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v1, v3 :: v_dual_cndmask_b32 v1, v2, v4
847 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
848 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 31, v5
849 ; GFX11-NEXT: v_xor_b32_e32 v1, v1, v2
850 ; GFX11-NEXT: s_setpc_b64 s[30:31]
852 %i3 = icmp eq i32 %i, 0
853 %i4 = select i1 %i3, double %arg2, double %arg1
854 %i5 = bitcast double %i4 to <2 x i32>
855 %i6 = icmp sgt i32 %arg, 1
856 %i7 = select i1 %i6, i32 -2147483648, i32 0
857 %i8 = extractelement <2 x i32> %i5, i64 1
858 %i9 = xor i32 %i8, %i7
859 %i10 = insertelement <2 x i32> %i5, i32 %i9, i64 1
860 %i11 = bitcast <2 x i32> %i10 to double
864 define double @cospiD_pattern1(i32 %arg, double %arg1, double %arg2) {
865 ; GCN-LABEL: cospiD_pattern1:
867 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
868 ; GCN-NEXT: v_and_b32_e32 v5, 1, v0
869 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
870 ; GCN-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc
871 ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc
872 ; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v1
873 ; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 1, v0
874 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
875 ; GCN-NEXT: v_mov_b32_e32 v0, v3
876 ; GCN-NEXT: s_setpc_b64 s[30:31]
878 ; GFX11-LABEL: cospiD_pattern1:
880 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
881 ; GFX11-NEXT: v_and_b32_e32 v5, 1, v0
882 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3)
883 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5
884 ; GFX11-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc_lo
885 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
886 ; GFX11-NEXT: v_cmp_lt_i32_e32 vcc_lo, 1, v0
887 ; GFX11-NEXT: v_mov_b32_e32 v0, v3
888 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
889 ; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v1
890 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo
891 ; GFX11-NEXT: s_setpc_b64 s[30:31]
893 %i3 = icmp eq i32 %i, 0
894 %i4 = select i1 %i3, double %arg2, double %arg1
895 %i5 = icmp sgt i32 %arg, 1
896 %i6 = fneg double %i4
897 %i7 = select i1 %i5, double %i6, double %i4
901 ; artifical example, scaled to operation on 16-bit halves of a float.
902 define float @cospiD_pattern0_half(i16 %arg, float %arg1, float %arg2) {
903 ; GFX7-LABEL: cospiD_pattern0_half:
905 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
906 ; GFX7-NEXT: v_bfe_i32 v3, v0, 0, 16
907 ; GFX7-NEXT: v_and_b32_e32 v0, 1, v0
908 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
909 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
910 ; GFX7-NEXT: v_cmp_lt_i32_e32 vcc, 1, v3
911 ; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
912 ; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v0
913 ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
914 ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 31, v2
915 ; GFX7-NEXT: v_xor_b32_e32 v0, v2, v0
916 ; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
917 ; GFX7-NEXT: s_setpc_b64 s[30:31]
919 ; GFX9-LABEL: cospiD_pattern0_half:
921 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
922 ; GFX9-NEXT: v_and_b32_e32 v3, 1, v0
923 ; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v3
924 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
925 ; GFX9-NEXT: v_cmp_lt_i16_e32 vcc, 1, v0
926 ; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
927 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 15, v0
928 ; GFX9-NEXT: v_xor_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
929 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
930 ; GFX9-NEXT: v_perm_b32 v0, v0, v1, s4
931 ; GFX9-NEXT: s_setpc_b64 s[30:31]
933 ; GFX11-LABEL: cospiD_pattern0_half:
935 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
936 ; GFX11-NEXT: v_and_b32_e32 v3, 1, v0
937 ; GFX11-NEXT: v_cmp_lt_i16_e32 vcc_lo, 1, v0
938 ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
939 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
940 ; GFX11-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v3
941 ; GFX11-NEXT: v_lshlrev_b16 v0, 15, v0
942 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo
943 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
944 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1
945 ; GFX11-NEXT: v_xor_b32_e32 v0, v2, v0
946 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
947 ; GFX11-NEXT: v_perm_b32 v0, v0, v1, 0x5040100
948 ; GFX11-NEXT: s_setpc_b64 s[30:31]
950 %i3 = icmp eq i16 %i, 0
951 %i4 = select i1 %i3, float %arg2, float %arg1
952 %i5 = bitcast float %i4 to <2 x i16>
953 %i6 = icmp sgt i16 %arg, 1
954 %i7 = select i1 %i6, i16 -32768, i16 0
955 %i8 = extractelement <2 x i16> %i5, i64 1
956 %i9 = xor i16 %i8, %i7
957 %i10 = insertelement <2 x i16> %i5, i16 %i9, i64 1
958 %i11 = bitcast <2 x i16> %i10 to float
962 define float @cospiD_pattern1_half(i16 %arg, float %arg1, float %arg2) {
963 ; GFX7-LABEL: cospiD_pattern1_half:
965 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
966 ; GFX7-NEXT: v_bfe_i32 v3, v0, 0, 16
967 ; GFX7-NEXT: v_and_b32_e32 v0, 1, v0
968 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
969 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
970 ; GFX7-NEXT: v_cmp_lt_i32_e32 vcc, 1, v3
971 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, -v0, vcc
972 ; GFX7-NEXT: s_setpc_b64 s[30:31]
974 ; GFX9-LABEL: cospiD_pattern1_half:
976 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
977 ; GFX9-NEXT: v_and_b32_e32 v3, 1, v0
978 ; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v3
979 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
980 ; GFX9-NEXT: v_cmp_lt_i16_e32 vcc, 1, v0
981 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v1, -v1, vcc
982 ; GFX9-NEXT: s_setpc_b64 s[30:31]
984 ; GFX11-LABEL: cospiD_pattern1_half:
986 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
987 ; GFX11-NEXT: v_and_b32_e32 v3, 1, v0
988 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
989 ; GFX11-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v3
990 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo
991 ; GFX11-NEXT: v_cmp_lt_i16_e32 vcc_lo, 1, v0
992 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, -v1, vcc_lo
993 ; GFX11-NEXT: s_setpc_b64 s[30:31]
995 %i3 = icmp eq i16 %i, 0
996 %i4 = select i1 %i3, float %arg2, float %arg1
997 %i5 = icmp sgt i16 %arg, 1
999 %i7 = select i1 %i5, float %i6, float %i4
1003 define double @fneg_f64_bitcast_vector_i64_to_f64(i64 %arg) {
1004 ; GCN-LABEL: fneg_f64_bitcast_vector_i64_to_f64:
1006 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1007 ; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1008 ; GCN-NEXT: s_setpc_b64 s[30:31]
1010 ; GFX11-LABEL: fneg_f64_bitcast_vector_i64_to_f64:
1012 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1013 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1014 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1015 %bitcast = bitcast i64 %arg to double
1016 %fneg = fneg double %bitcast
1020 define double @fneg_f64_bitcast_vector_v2i32_to_f64(<2 x i32> %arg) {
1021 ; GCN-LABEL: fneg_f64_bitcast_vector_v2i32_to_f64:
1023 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1024 ; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1025 ; GCN-NEXT: s_setpc_b64 s[30:31]
1027 ; GFX11-LABEL: fneg_f64_bitcast_vector_v2i32_to_f64:
1029 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1030 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1031 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1032 %bitcast = bitcast <2 x i32> %arg to double
1033 %fneg = fneg double %bitcast
1037 define double @fneg_f64_bitcast_vector_v2f32_to_f64(<2 x float> %arg) {
1038 ; GCN-LABEL: fneg_f64_bitcast_vector_v2f32_to_f64:
1040 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1041 ; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1042 ; GCN-NEXT: s_setpc_b64 s[30:31]
1044 ; GFX11-LABEL: fneg_f64_bitcast_vector_v2f32_to_f64:
1046 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1047 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1048 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1049 %bitcast = bitcast <2 x float> %arg to double
1050 %fneg = fneg double %bitcast
1054 define double @fneg_f64_bitcast_vector_v4i16_to_f64(<4 x i16> %arg) {
1055 ; GFX7-LABEL: fneg_f64_bitcast_vector_v4i16_to_f64:
1057 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1058 ; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1059 ; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2
1060 ; GFX7-NEXT: v_or_b32_e32 v2, v2, v3
1061 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1062 ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
1063 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
1064 ; GFX7-NEXT: v_xor_b32_e32 v1, 0x80000000, v2
1065 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1067 ; GFX9-LABEL: fneg_f64_bitcast_vector_v4i16_to_f64:
1069 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1070 ; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1071 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1073 ; GFX11-LABEL: fneg_f64_bitcast_vector_v4i16_to_f64:
1075 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1076 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1077 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1078 %bitcast = bitcast <4 x i16> %arg to double
1079 %fneg = fneg double %bitcast
1083 define double @fneg_f64_bitcast_vector_v4f16_to_f64(<4 x half> %arg) {
1084 ; GFX7-LABEL: fneg_f64_bitcast_vector_v4f16_to_f64:
1086 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1087 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1088 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1089 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1090 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1091 ; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1092 ; GFX7-NEXT: v_or_b32_e32 v2, v2, v3
1093 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1094 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
1095 ; GFX7-NEXT: v_xor_b32_e32 v1, 0x80000000, v2
1096 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1098 ; GFX9-LABEL: fneg_f64_bitcast_vector_v4f16_to_f64:
1100 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1101 ; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1102 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1104 ; GFX11-LABEL: fneg_f64_bitcast_vector_v4f16_to_f64:
1106 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1107 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1108 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1109 %bitcast = bitcast <4 x half> %arg to double
1110 %fneg = fneg double %bitcast
1114 define double @fneg_f64_bitcast_build_vector_v2i32_to_f64(i32 %elt0, i32 %elt1) {
1115 ; GCN-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64:
1117 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1118 ; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1119 ; GCN-NEXT: s_setpc_b64 s[30:31]
1121 ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64:
1123 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1124 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1125 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1126 %insert.0 = insertelement <2 x i32> poison, i32 %elt0, i32 0
1127 %insert.1 = insertelement <2 x i32> %insert.0, i32 %elt1, i32 1
1128 %bitcast = bitcast <2 x i32> %insert.1 to double
1129 %fneg = fneg double %bitcast
1133 define double @fneg_f64_bitcast_build_vector_v2f32_to_f64(float %elt0, float %elt1) {
1134 ; GCN-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64:
1136 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1137 ; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1138 ; GCN-NEXT: s_setpc_b64 s[30:31]
1140 ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64:
1142 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1143 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1144 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1145 %insert.0 = insertelement <2 x float> poison, float %elt0, i32 0
1146 %insert.1 = insertelement <2 x float> %insert.0, float %elt1, i32 1
1147 %bitcast = bitcast <2 x float> %insert.1 to double
1148 %fneg = fneg double %bitcast
1152 define double @fneg_f64_bitcast_build_vector_v4i16_to_f64(i16 %elt0, i16 %elt1, i16 %elt2, i16 %elt3) {
1153 ; GFX7-LABEL: fneg_f64_bitcast_build_vector_v4i16_to_f64:
1155 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1156 ; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1157 ; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2
1158 ; GFX7-NEXT: v_or_b32_e32 v2, v2, v3
1159 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1160 ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
1161 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
1162 ; GFX7-NEXT: v_xor_b32_e32 v1, 0x80000000, v2
1163 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1165 ; GFX9-LABEL: fneg_f64_bitcast_build_vector_v4i16_to_f64:
1167 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1168 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
1169 ; GFX9-NEXT: v_perm_b32 v2, v3, v2, s4
1170 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
1171 ; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v2
1172 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1174 ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v4i16_to_f64:
1176 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1177 ; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
1178 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
1179 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
1180 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v2
1181 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1182 %insert.0 = insertelement <4 x i16> poison, i16 %elt0, i32 0
1183 %insert.1 = insertelement <4 x i16> %insert.0, i16 %elt1, i32 1
1184 %insert.2 = insertelement <4 x i16> %insert.1, i16 %elt2, i32 2
1185 %insert.3 = insertelement <4 x i16> %insert.2, i16 %elt3, i32 3
1186 %bitcast = bitcast <4 x i16> %insert.3 to double
1187 %fneg = fneg double %bitcast
1191 define double @fneg_f64_bitcast_build_vector_v4f16_to_f64(half %elt0, half %elt1, half %elt2, half %elt3) {
1192 ; GFX7-LABEL: fneg_f64_bitcast_build_vector_v4f16_to_f64:
1194 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1195 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1196 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1197 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1198 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1199 ; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1200 ; GFX7-NEXT: v_or_b32_e32 v2, v2, v3
1201 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1202 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
1203 ; GFX7-NEXT: v_xor_b32_e32 v1, 0x80000000, v2
1204 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1206 ; GFX9-LABEL: fneg_f64_bitcast_build_vector_v4f16_to_f64:
1208 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1209 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
1210 ; GFX9-NEXT: v_perm_b32 v2, v3, v2, s4
1211 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
1212 ; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v2
1213 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1215 ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v4f16_to_f64:
1217 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1218 ; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
1219 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
1220 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
1221 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v2
1222 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1223 %insert.0 = insertelement <4 x half> poison, half %elt0, i32 0
1224 %insert.1 = insertelement <4 x half> %insert.0, half %elt1, i32 1
1225 %insert.2 = insertelement <4 x half> %insert.1, half %elt2, i32 2
1226 %insert.3 = insertelement <4 x half> %insert.2, half %elt3, i32 3
1227 %bitcast = bitcast <4 x half> %insert.3 to double
1228 %fneg = fneg double %bitcast
1232 define double @fneg_f64_bitcast_build_vector_v4bf16_to_f64(bfloat %elt0, bfloat %elt1, bfloat %elt2, bfloat %elt3) {
1233 ; GFX7-LABEL: fneg_f64_bitcast_build_vector_v4bf16_to_f64:
1235 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1236 ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v3
1237 ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1238 ; GFX7-NEXT: v_alignbit_b32 v2, v3, v2, 16
1239 ; GFX7-NEXT: v_alignbit_b32 v0, v1, v0, 16
1240 ; GFX7-NEXT: v_xor_b32_e32 v1, 0x80000000, v2
1241 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1243 ; GFX9-LABEL: fneg_f64_bitcast_build_vector_v4bf16_to_f64:
1245 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1246 ; GFX9-NEXT: s_mov_b32 s4, 0x3020706
1247 ; GFX9-NEXT: v_perm_b32 v2, v2, v3, s4
1248 ; GFX9-NEXT: v_perm_b32 v0, v0, v1, s4
1249 ; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v2
1250 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1252 ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v4bf16_to_f64:
1254 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1255 ; GFX11-NEXT: v_perm_b32 v2, v2, v3, 0x3020706
1256 ; GFX11-NEXT: v_perm_b32 v0, v0, v1, 0x3020706
1257 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
1258 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v2
1259 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1260 %insert.0 = insertelement <4 x bfloat> poison, bfloat %elt0, i32 0
1261 %insert.1 = insertelement <4 x bfloat> %insert.0, bfloat %elt1, i32 1
1262 %insert.2 = insertelement <4 x bfloat> %insert.1, bfloat %elt2, i32 2
1263 %insert.3 = insertelement <4 x bfloat> %insert.2, bfloat %elt3, i32 3
1264 %bitcast = bitcast <4 x bfloat> %insert.3 to double
1265 %fneg = fneg double %bitcast
1269 define double @fneg_f64_bitcast_build_vector_v2i32_to_f64_modifier_user(i32 %elt0, i32 %elt1, double %fp.val) {
1270 ; GCN-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64_modifier_user:
1272 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1273 ; GCN-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3]
1274 ; GCN-NEXT: s_setpc_b64 s[30:31]
1276 ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64_modifier_user:
1278 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1279 ; GFX11-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3]
1280 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1281 %insert.0 = insertelement <2 x i32> poison, i32 %elt0, i32 0
1282 %insert.1 = insertelement <2 x i32> %insert.0, i32 %elt1, i32 1
1283 %bitcast = bitcast <2 x i32> %insert.1 to double
1284 %fneg = fneg double %bitcast
1285 %fmul = fmul double %fneg, %fp.val
1289 define { double, double } @fneg_f64_bitcast_build_vector_v2i32_to_f64_multi_modifier_user(i32 %elt0, i32 %elt1, double %fp.val0, double %fp.val1) {
1290 ; GCN-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64_multi_modifier_user:
1292 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1293 ; GCN-NEXT: v_mul_f64 v[6:7], -v[0:1], v[2:3]
1294 ; GCN-NEXT: v_mul_f64 v[2:3], v[4:5], -v[0:1]
1295 ; GCN-NEXT: v_mov_b32_e32 v0, v6
1296 ; GCN-NEXT: v_mov_b32_e32 v1, v7
1297 ; GCN-NEXT: s_setpc_b64 s[30:31]
1299 ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64_multi_modifier_user:
1301 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1302 ; GFX11-NEXT: v_mul_f64 v[6:7], -v[0:1], v[2:3]
1303 ; GFX11-NEXT: v_mul_f64 v[2:3], v[4:5], -v[0:1]
1304 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
1305 ; GFX11-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
1306 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1307 %insert.0 = insertelement <2 x i32> poison, i32 %elt0, i32 0
1308 %insert.1 = insertelement <2 x i32> %insert.0, i32 %elt1, i32 1
1309 %bitcast = bitcast <2 x i32> %insert.1 to double
1310 %fneg = fneg double %bitcast
1311 %fmul0 = fmul double %fneg, %fp.val0
1312 %fmul1 = fmul double %fp.val1, %fneg
1313 %ret.0 = insertvalue { double, double } poison, double %fmul0, 0
1314 %ret.1 = insertvalue { double, double } %ret.0, double %fmul1, 1
1315 ret { double, double } %ret.1
1318 define double @fneg_f64_bitcast_build_vector_v2i32_to_f64_modifier_user_integer_neg_source(i32 %elt0, i32 %elt1, double %fp.val) {
1319 ; GCN-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64_modifier_user_integer_neg_source:
1321 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1322 ; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1323 ; GCN-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3]
1324 ; GCN-NEXT: s_setpc_b64 s[30:31]
1326 ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64_modifier_user_integer_neg_source:
1328 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1329 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1330 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1331 ; GFX11-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3]
1332 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1333 %neg.elt1 = xor i32 %elt1, -2147483648
1334 %insert.0 = insertelement <2 x i32> poison, i32 %elt0, i32 0
1335 %insert.1 = insertelement <2 x i32> %insert.0, i32 %neg.elt1, i32 1
1336 %bitcast = bitcast <2 x i32> %insert.1 to double
1337 %fneg = fneg double %bitcast
1338 %fmul = fmul double %fneg, %fp.val
1342 define double @fneg_f64_bitcast_build_vector_v2f32_foldable_sources_to_f64(float %elt0, float %elt1) {
1343 ; GCN-LABEL: fneg_f64_bitcast_build_vector_v2f32_foldable_sources_to_f64:
1345 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1346 ; GCN-NEXT: v_sub_f32_e32 v1, -2.0, v1
1347 ; GCN-NEXT: s_setpc_b64 s[30:31]
1349 ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_foldable_sources_to_f64:
1351 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1352 ; GFX11-NEXT: v_sub_f32_e32 v1, -2.0, v1
1353 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1354 %fadd = fadd nsz nnan float %elt1, 2.0
1355 %insert.0 = insertelement <2 x float> poison, float %elt0, i32 0
1356 %insert.1 = insertelement <2 x float> %insert.0, float %fadd, i32 1
1357 %bitcast = bitcast <2 x float> %insert.1 to double
1358 %fneg = fneg double %bitcast
1362 define double @fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_user(float %elt0, float %elt1, ptr addrspace(1) %ptr) {
1363 ; GFX7-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_user:
1365 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1366 ; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1367 ; GFX7-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1368 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1369 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1371 ; GFX9-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_user:
1373 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1374 ; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
1375 ; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1376 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1377 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1379 ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_user:
1381 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1382 ; GFX11-NEXT: v_xor_b32_e32 v4, 0x80000000, v1
1383 ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
1384 ; GFX11-NEXT: v_mov_b32_e32 v1, v4
1385 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1386 %insert.0 = insertelement <2 x float> poison, float %elt0, i32 0
1387 %insert.1 = insertelement <2 x float> %insert.0, float %elt1, i32 1
1388 store <2 x float> %insert.1, ptr addrspace(1) %ptr
1389 %bitcast = bitcast <2 x float> %insert.1 to double
1390 %fneg = fneg double %bitcast
1394 define { double, <2 x float> } @fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_foldable_user(float %elt0, float %elt1, <2 x float> %arg.v2f32) {
1395 ; GCN-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_foldable_user:
1397 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1398 ; GCN-NEXT: v_add_f32_e32 v2, v0, v2
1399 ; GCN-NEXT: v_add_f32_e32 v3, v1, v3
1400 ; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1401 ; GCN-NEXT: s_setpc_b64 s[30:31]
1403 ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_foldable_user:
1405 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1406 ; GFX11-NEXT: v_xor_b32_e32 v4, 0x80000000, v1
1407 ; GFX11-NEXT: v_dual_add_f32 v2, v0, v2 :: v_dual_add_f32 v3, v1, v3
1408 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
1409 ; GFX11-NEXT: v_mov_b32_e32 v1, v4
1410 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1411 %insert.0 = insertelement <2 x float> poison, float %elt0, i32 0
1412 %insert.1 = insertelement <2 x float> %insert.0, float %elt1, i32 1
1413 %other.bitcast.source.user = fadd <2 x float> %insert.1, %arg.v2f32
1414 %bitcast = bitcast <2 x float> %insert.1 to double
1415 %fneg = fneg double %bitcast
1416 %ret.0 = insertvalue { double, <2 x float> } poison, double %fneg, 0
1417 %ret.1 = insertvalue { double, <2 x float> } %ret.0, <2 x float> %other.bitcast.source.user, 1
1418 ret { double, <2 x float> } %ret.1
1421 define { double, double } @fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_user(float %elt0, float %elt1) {
1422 ; GCN-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_user:
1424 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1425 ; GCN-NEXT: v_mov_b32_e32 v3, v1
1426 ; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v3
1427 ; GCN-NEXT: v_mov_b32_e32 v2, v0
1428 ; GCN-NEXT: s_setpc_b64 s[30:31]
1430 ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_user:
1432 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1433 ; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0
1434 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1435 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v3
1436 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1437 %insert.0 = insertelement <2 x float> poison, float %elt0, i32 0
1438 %insert.1 = insertelement <2 x float> %insert.0, float %elt1, i32 1
1439 %bitcast = bitcast <2 x float> %insert.1 to double
1440 %fneg = fneg double %bitcast
1441 %ret.0 = insertvalue { double, double } poison, double %fneg, 0
1442 %ret.1 = insertvalue { double, double } %ret.0, double %bitcast, 1
1443 ret { double, double } %ret.1
1446 define { double, double } @fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_foldable_user(float %elt0, float %elt1, double %arg.f64) {
1447 ; GCN-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_foldable_user:
1449 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1450 ; GCN-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3]
1451 ; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1452 ; GCN-NEXT: s_setpc_b64 s[30:31]
1454 ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_foldable_user:
1456 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1457 ; GFX11-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3]
1458 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1459 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1460 %insert.0 = insertelement <2 x float> poison, float %elt0, i32 0
1461 %insert.1 = insertelement <2 x float> %insert.0, float %elt1, i32 1
1462 %bitcast = bitcast <2 x float> %insert.1 to double
1463 %other.bitcast.user = fadd double %bitcast, %arg.f64
1464 %fneg = fneg double %bitcast
1465 %ret.0 = insertvalue { double, double } poison, double %fneg, 0
1466 %ret.1 = insertvalue { double, double } %ret.0, double %other.bitcast.user, 1
1467 ret { double, double } %ret.1
1470 ; Check for correct bitcasting back when there are multiple uses
1471 define amdgpu_kernel void @multiple_uses_fneg_select_f64(double %x, double %y, i1 %z, ptr addrspace(1) %dst) {
1472 ; GFX7-LABEL: multiple_uses_fneg_select_f64:
1474 ; GFX7-NEXT: s_load_dword s6, s[4:5], 0x4
1475 ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
1476 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x6
1477 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1478 ; GFX7-NEXT: s_bitcmp1_b32 s6, 0
1479 ; GFX7-NEXT: s_cselect_b64 vcc, -1, 0
1480 ; GFX7-NEXT: s_and_b64 s[6:7], vcc, exec
1481 ; GFX7-NEXT: v_mov_b32_e32 v0, s3
1482 ; GFX7-NEXT: v_mov_b32_e32 v1, s1
1483 ; GFX7-NEXT: s_cselect_b32 s1, s1, s3
1484 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1485 ; GFX7-NEXT: s_cselect_b32 s0, s0, s2
1486 ; GFX7-NEXT: v_mov_b32_e32 v1, s1
1487 ; GFX7-NEXT: v_mov_b32_e32 v2, s4
1488 ; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, -v0, vcc
1489 ; GFX7-NEXT: v_mov_b32_e32 v0, s0
1490 ; GFX7-NEXT: v_mov_b32_e32 v3, s5
1491 ; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1492 ; GFX7-NEXT: s_endpgm
1494 ; GFX9-LABEL: multiple_uses_fneg_select_f64:
1496 ; GFX9-NEXT: s_load_dword s6, s[4:5], 0x10
1497 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
1498 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
1499 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x18
1500 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1501 ; GFX9-NEXT: s_bitcmp1_b32 s6, 0
1502 ; GFX9-NEXT: s_cselect_b64 vcc, -1, 0
1503 ; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec
1504 ; GFX9-NEXT: v_mov_b32_e32 v0, s3
1505 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1506 ; GFX9-NEXT: s_cselect_b32 s1, s1, s3
1507 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1508 ; GFX9-NEXT: s_cselect_b32 s0, s0, s2
1509 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1510 ; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v0, vcc
1511 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1512 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
1513 ; GFX9-NEXT: s_endpgm
1515 ; GFX11-LABEL: multiple_uses_fneg_select_f64:
1517 ; GFX11-NEXT: s_clause 0x2
1518 ; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0
1519 ; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x10
1520 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x18
1521 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
1522 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1523 ; GFX11-NEXT: v_mov_b32_e32 v0, s5
1524 ; GFX11-NEXT: s_bitcmp1_b32 s2, 0
1525 ; GFX11-NEXT: s_cselect_b32 vcc_lo, -1, 0
1526 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
1527 ; GFX11-NEXT: v_cndmask_b32_e32 v0, s7, v0, vcc_lo
1528 ; GFX11-NEXT: s_and_b32 s2, vcc_lo, exec_lo
1529 ; GFX11-NEXT: s_cselect_b32 s2, s5, s7
1530 ; GFX11-NEXT: s_cselect_b32 s3, s4, s6
1531 ; GFX11-NEXT: v_cndmask_b32_e64 v1, s2, -v0, vcc_lo
1532 ; GFX11-NEXT: v_mov_b32_e32 v0, s3
1533 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1534 ; GFX11-NEXT: s_nop 0
1535 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1536 ; GFX11-NEXT: s_endpgm
1537 %a = select i1 %z, double %x, double %y
1539 %c = select i1 %z, double %a, double %b
1541 store double %d, ptr addrspace(1) %dst
1545 define amdgpu_kernel void @fnge_select_f32_multi_use_regression(float %.i2369) {
1546 ; GCN-LABEL: fnge_select_f32_multi_use_regression:
1547 ; GCN: ; %bb.0: ; %.entry
1548 ; GCN-NEXT: s_load_dword s0, s[4:5], 0x0
1549 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
1550 ; GCN-NEXT: v_cmp_nlt_f32_e64 s[0:1], s0, 0
1551 ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
1552 ; GCN-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
1553 ; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc
1554 ; GCN-NEXT: v_mul_f32_e64 v0, -v0, v1
1555 ; GCN-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
1556 ; GCN-NEXT: s_and_b64 vcc, exec, vcc
1557 ; GCN-NEXT: s_endpgm
1559 ; GFX11-LABEL: fnge_select_f32_multi_use_regression:
1560 ; GFX11: ; %bb.0: ; %.entry
1561 ; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
1562 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1563 ; GFX11-NEXT: v_cmp_nlt_f32_e64 s0, s0, 0
1564 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1565 ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
1566 ; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
1567 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc_lo
1568 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1569 ; GFX11-NEXT: v_mul_f32_e64 v0, -v0, v1
1570 ; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0, v0
1571 ; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo
1572 ; GFX11-NEXT: s_endpgm
1574 %i = fcmp uge float %.i2369, 0.000000e+00
1575 %.i2379 = select i1 %i, i32 1, i32 0
1576 %.i0436 = bitcast i32 %.i2379 to float
1577 %.i0440 = fneg float %.i0436
1578 %i1 = fcmp uge float %.i0436, 0.000000e+00
1579 %.i2495 = select i1 %i1, i32 %.i2379, i32 0
1580 %.i0552 = bitcast i32 %.i2495 to float
1581 %.i0592 = fmul float %.i0440, %.i0552
1582 %.i0721 = fcmp ogt float %.i0592, 0.000000e+00
1583 br i1 %.i0721, label %bb5, label %bb
1585 bb: ; preds = %.entry
1586 %i2 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> zeroinitializer, i32 1, i32 0)
1587 %i3 = shufflevector <2 x i32> %i2, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1588 %i4 = bitcast <4 x i32> %i3 to <4 x float>
1589 %.i0753 = extractelement <4 x float> %i4, i64 0
1592 bb5: ; preds = %bb, %.entry
1597 declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32 immarg) #0
1599 attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }