1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
7 define <2 x half> @v_fmul_v2f16(<2 x half> %a, <2 x half> %b) {
8 ; GFX9-LABEL: v_fmul_v2f16:
10 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1
12 ; GFX9-NEXT: s_setpc_b64 s[30:31]
14 ; GFX8-LABEL: v_fmul_v2f16:
16 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17 ; GFX8-NEXT: v_mul_f16_e32 v2, v0, v1
18 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
19 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
20 ; GFX8-NEXT: s_setpc_b64 s[30:31]
22 ; GFX10-LABEL: v_fmul_v2f16:
24 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v1
26 ; GFX10-NEXT: s_setpc_b64 s[30:31]
27 %mul = fmul <2 x half> %a, %b
31 define <2 x half> @v_fmul_v2f16_fneg_lhs(<2 x half> %a, <2 x half> %b) {
32 ; GFX9-LABEL: v_fmul_v2f16_fneg_lhs:
34 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
36 ; GFX9-NEXT: s_setpc_b64 s[30:31]
38 ; GFX8-LABEL: v_fmul_v2f16_fneg_lhs:
40 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
42 ; GFX8-NEXT: v_mul_f16_e32 v2, v0, v1
43 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
44 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
45 ; GFX8-NEXT: s_setpc_b64 s[30:31]
47 ; GFX10-LABEL: v_fmul_v2f16_fneg_lhs:
49 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
51 ; GFX10-NEXT: s_setpc_b64 s[30:31]
52 %neg.a = fneg <2 x half> %a
53 %mul = fmul <2 x half> %neg.a, %b
57 define <2 x half> @v_fmul_v2f16_fneg_rhs(<2 x half> %a, <2 x half> %b) {
58 ; GFX9-LABEL: v_fmul_v2f16_fneg_rhs:
60 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
62 ; GFX9-NEXT: s_setpc_b64 s[30:31]
64 ; GFX8-LABEL: v_fmul_v2f16_fneg_rhs:
66 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
68 ; GFX8-NEXT: v_mul_f16_e32 v2, v0, v1
69 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
70 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
71 ; GFX8-NEXT: s_setpc_b64 s[30:31]
73 ; GFX10-LABEL: v_fmul_v2f16_fneg_rhs:
75 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
77 ; GFX10-NEXT: s_setpc_b64 s[30:31]
78 %neg.b = fneg <2 x half> %b
79 %mul = fmul <2 x half> %a, %neg.b
83 define <2 x half> @v_fmul_v2f16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) {
84 ; GFX9-LABEL: v_fmul_v2f16_fneg_lhs_fneg_rhs:
86 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
87 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1
88 ; GFX9-NEXT: s_setpc_b64 s[30:31]
90 ; GFX8-LABEL: v_fmul_v2f16_fneg_lhs_fneg_rhs:
92 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
93 ; GFX8-NEXT: v_mul_f16_e32 v2, v0, v1
94 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
95 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
96 ; GFX8-NEXT: s_setpc_b64 s[30:31]
98 ; GFX10-LABEL: v_fmul_v2f16_fneg_lhs_fneg_rhs:
100 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
101 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v1
102 ; GFX10-NEXT: s_setpc_b64 s[30:31]
103 %neg.a = fneg <2 x half> %a
104 %neg.b = fneg <2 x half> %b
105 %mul = fmul <2 x half> %neg.a, %neg.b
109 define <3 x half> @v_fmul_v3f16(<3 x half> %a, <3 x half> %b) {
110 ; GFX9-LABEL: v_fmul_v3f16:
112 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2
114 ; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3
115 ; GFX9-NEXT: s_setpc_b64 s[30:31]
117 ; GFX8-LABEL: v_fmul_v3f16:
119 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120 ; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2
121 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
122 ; GFX8-NEXT: v_mul_f16_e32 v1, v1, v3
123 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
124 ; GFX8-NEXT: s_setpc_b64 s[30:31]
126 ; GFX10-LABEL: v_fmul_v3f16:
128 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
129 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
130 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
131 ; GFX10-NEXT: s_setpc_b64 s[30:31]
132 %mul = fmul <3 x half> %a, %b
136 define <3 x half> @v_fmul_v3f16_fneg_lhs(<3 x half> %a, <3 x half> %b) {
137 ; GFX9-LABEL: v_fmul_v3f16_fneg_lhs:
139 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[1,0] neg_hi:[1,0]
141 ; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[1,0] neg_hi:[1,0]
142 ; GFX9-NEXT: s_setpc_b64 s[30:31]
144 ; GFX8-LABEL: v_fmul_v3f16_fneg_lhs:
146 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
147 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
148 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
149 ; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2
150 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
151 ; GFX8-NEXT: v_mul_f16_e32 v1, v1, v3
152 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
153 ; GFX8-NEXT: s_setpc_b64 s[30:31]
155 ; GFX10-LABEL: v_fmul_v3f16_fneg_lhs:
157 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
158 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[1,0] neg_hi:[1,0]
159 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[1,0] neg_hi:[1,0]
160 ; GFX10-NEXT: s_setpc_b64 s[30:31]
161 %neg.a = fneg <3 x half> %a
162 %mul = fmul <3 x half> %neg.a, %b
166 define <3 x half> @v_fmul_v3f16_fneg_rhs(<3 x half> %a, <3 x half> %b) {
167 ; GFX9-LABEL: v_fmul_v3f16_fneg_rhs:
169 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
171 ; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
172 ; GFX9-NEXT: s_setpc_b64 s[30:31]
174 ; GFX8-LABEL: v_fmul_v3f16_fneg_rhs:
176 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
177 ; GFX8-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
178 ; GFX8-NEXT: v_xor_b32_e32 v3, 0x80008000, v3
179 ; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2
180 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
181 ; GFX8-NEXT: v_mul_f16_e32 v1, v1, v3
182 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
183 ; GFX8-NEXT: s_setpc_b64 s[30:31]
185 ; GFX10-LABEL: v_fmul_v3f16_fneg_rhs:
187 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
188 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
189 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
190 ; GFX10-NEXT: s_setpc_b64 s[30:31]
191 %neg.b = fneg <3 x half> %b
192 %mul = fmul <3 x half> %a, %neg.b
196 define <3 x half> @v_fmul_v3f16_fneg_lhs_fneg_rhs(<3 x half> %a, <3 x half> %b) {
197 ; GFX9-LABEL: v_fmul_v3f16_fneg_lhs_fneg_rhs:
199 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
200 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2
201 ; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3
202 ; GFX9-NEXT: s_setpc_b64 s[30:31]
204 ; GFX8-LABEL: v_fmul_v3f16_fneg_lhs_fneg_rhs:
206 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
207 ; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2
208 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
209 ; GFX8-NEXT: v_mul_f16_e32 v1, v1, v3
210 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
211 ; GFX8-NEXT: s_setpc_b64 s[30:31]
213 ; GFX10-LABEL: v_fmul_v3f16_fneg_lhs_fneg_rhs:
215 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
216 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
217 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
218 ; GFX10-NEXT: s_setpc_b64 s[30:31]
219 %neg.a = fneg <3 x half> %a
220 %neg.b = fneg <3 x half> %b
221 %mul = fmul <3 x half> %neg.a, %neg.b
225 define <4 x half> @v_fmul_v4f16(<4 x half> %a, <4 x half> %b) {
226 ; GFX9-LABEL: v_fmul_v4f16:
228 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
229 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2
230 ; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3
231 ; GFX9-NEXT: s_setpc_b64 s[30:31]
233 ; GFX8-LABEL: v_fmul_v4f16:
235 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
236 ; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2
237 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
238 ; GFX8-NEXT: v_mul_f16_e32 v2, v1, v3
239 ; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
240 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
241 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
242 ; GFX8-NEXT: s_setpc_b64 s[30:31]
244 ; GFX10-LABEL: v_fmul_v4f16:
246 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
247 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
248 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
249 ; GFX10-NEXT: s_setpc_b64 s[30:31]
250 %mul = fmul <4 x half> %a, %b
254 define <4 x half> @v_fmul_v4f16_fneg_lhs(<4 x half> %a, <4 x half> %b) {
255 ; GFX9-LABEL: v_fmul_v4f16_fneg_lhs:
257 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
258 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[1,0] neg_hi:[1,0]
259 ; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[1,0] neg_hi:[1,0]
260 ; GFX9-NEXT: s_setpc_b64 s[30:31]
262 ; GFX8-LABEL: v_fmul_v4f16_fneg_lhs:
264 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
265 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
266 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
267 ; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2
268 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
269 ; GFX8-NEXT: v_mul_f16_e32 v2, v1, v3
270 ; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
271 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
272 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
273 ; GFX8-NEXT: s_setpc_b64 s[30:31]
275 ; GFX10-LABEL: v_fmul_v4f16_fneg_lhs:
277 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
278 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[1,0] neg_hi:[1,0]
279 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[1,0] neg_hi:[1,0]
280 ; GFX10-NEXT: s_setpc_b64 s[30:31]
281 %neg.a = fneg <4 x half> %a
282 %mul = fmul <4 x half> %neg.a, %b
286 define <4 x half> @v_fmul_v4f16_fneg_rhs(<4 x half> %a, <4 x half> %b) {
287 ; GFX9-LABEL: v_fmul_v4f16_fneg_rhs:
289 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
290 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
291 ; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
292 ; GFX9-NEXT: s_setpc_b64 s[30:31]
294 ; GFX8-LABEL: v_fmul_v4f16_fneg_rhs:
296 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
297 ; GFX8-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
298 ; GFX8-NEXT: v_xor_b32_e32 v3, 0x80008000, v3
299 ; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2
300 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
301 ; GFX8-NEXT: v_mul_f16_e32 v2, v1, v3
302 ; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
303 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
304 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
305 ; GFX8-NEXT: s_setpc_b64 s[30:31]
307 ; GFX10-LABEL: v_fmul_v4f16_fneg_rhs:
309 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
310 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
311 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
312 ; GFX10-NEXT: s_setpc_b64 s[30:31]
313 %neg.b = fneg <4 x half> %b
314 %mul = fmul <4 x half> %a, %neg.b
318 define <4 x half> @v_fmul_v4f16_fneg_lhs_fneg_rhs(<4 x half> %a, <4 x half> %b) {
319 ; GFX9-LABEL: v_fmul_v4f16_fneg_lhs_fneg_rhs:
321 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2
323 ; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3
324 ; GFX9-NEXT: s_setpc_b64 s[30:31]
326 ; GFX8-LABEL: v_fmul_v4f16_fneg_lhs_fneg_rhs:
328 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
329 ; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2
330 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
331 ; GFX8-NEXT: v_mul_f16_e32 v2, v1, v3
332 ; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
333 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
334 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
335 ; GFX8-NEXT: s_setpc_b64 s[30:31]
337 ; GFX10-LABEL: v_fmul_v4f16_fneg_lhs_fneg_rhs:
339 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
340 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
341 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
342 ; GFX10-NEXT: s_setpc_b64 s[30:31]
343 %neg.a = fneg <4 x half> %a
344 %neg.b = fneg <4 x half> %b
345 %mul = fmul <4 x half> %neg.a, %neg.b
349 define <6 x half> @v_fmul_v6f16(<6 x half> %a, <6 x half> %b) {
350 ; GFX9-LABEL: v_fmul_v6f16:
352 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
353 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3
354 ; GFX9-NEXT: v_pk_mul_f16 v1, v1, v4
355 ; GFX9-NEXT: v_pk_mul_f16 v2, v2, v5
356 ; GFX9-NEXT: s_setpc_b64 s[30:31]
358 ; GFX8-LABEL: v_fmul_v6f16:
360 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
361 ; GFX8-NEXT: v_mul_f16_e32 v6, v0, v3
362 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
363 ; GFX8-NEXT: v_mul_f16_e32 v3, v1, v4
364 ; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
365 ; GFX8-NEXT: v_mul_f16_e32 v4, v2, v5
366 ; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
367 ; GFX8-NEXT: v_or_b32_e32 v0, v6, v0
368 ; GFX8-NEXT: v_or_b32_e32 v1, v3, v1
369 ; GFX8-NEXT: v_or_b32_e32 v2, v4, v2
370 ; GFX8-NEXT: s_setpc_b64 s[30:31]
372 ; GFX10-LABEL: v_fmul_v6f16:
374 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
375 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v3
376 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v4
377 ; GFX10-NEXT: v_pk_mul_f16 v2, v2, v5
378 ; GFX10-NEXT: s_setpc_b64 s[30:31]
379 %mul = fmul <6 x half> %a, %b
383 define <6 x half> @v_fmul_v6f16_fneg_lhs(<6 x half> %a, <6 x half> %b) {
384 ; GFX9-LABEL: v_fmul_v6f16_fneg_lhs:
386 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
387 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3 neg_lo:[1,0] neg_hi:[1,0]
388 ; GFX9-NEXT: v_pk_mul_f16 v1, v1, v4 neg_lo:[1,0] neg_hi:[1,0]
389 ; GFX9-NEXT: v_pk_mul_f16 v2, v2, v5 neg_lo:[1,0] neg_hi:[1,0]
390 ; GFX9-NEXT: s_setpc_b64 s[30:31]
392 ; GFX8-LABEL: v_fmul_v6f16_fneg_lhs:
394 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
395 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
396 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
397 ; GFX8-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
398 ; GFX8-NEXT: v_mul_f16_e32 v6, v0, v3
399 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
400 ; GFX8-NEXT: v_mul_f16_e32 v3, v1, v4
401 ; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
402 ; GFX8-NEXT: v_mul_f16_e32 v4, v2, v5
403 ; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
404 ; GFX8-NEXT: v_or_b32_e32 v0, v6, v0
405 ; GFX8-NEXT: v_or_b32_e32 v1, v3, v1
406 ; GFX8-NEXT: v_or_b32_e32 v2, v4, v2
407 ; GFX8-NEXT: s_setpc_b64 s[30:31]
409 ; GFX10-LABEL: v_fmul_v6f16_fneg_lhs:
411 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
412 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v3 neg_lo:[1,0] neg_hi:[1,0]
413 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v4 neg_lo:[1,0] neg_hi:[1,0]
414 ; GFX10-NEXT: v_pk_mul_f16 v2, v2, v5 neg_lo:[1,0] neg_hi:[1,0]
415 ; GFX10-NEXT: s_setpc_b64 s[30:31]
416 %neg.a = fneg <6 x half> %a
417 %mul = fmul <6 x half> %neg.a, %b
421 define <6 x half> @v_fmul_v6f16_fneg_rhs(<6 x half> %a, <6 x half> %b) {
422 ; GFX9-LABEL: v_fmul_v6f16_fneg_rhs:
424 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
425 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3 neg_lo:[0,1] neg_hi:[0,1]
426 ; GFX9-NEXT: v_pk_mul_f16 v1, v1, v4 neg_lo:[0,1] neg_hi:[0,1]
427 ; GFX9-NEXT: v_pk_mul_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1]
428 ; GFX9-NEXT: s_setpc_b64 s[30:31]
430 ; GFX8-LABEL: v_fmul_v6f16_fneg_rhs:
432 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
433 ; GFX8-NEXT: v_xor_b32_e32 v3, 0x80008000, v3
434 ; GFX8-NEXT: v_xor_b32_e32 v4, 0x80008000, v4
435 ; GFX8-NEXT: v_xor_b32_e32 v5, 0x80008000, v5
436 ; GFX8-NEXT: v_mul_f16_e32 v6, v0, v3
437 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
438 ; GFX8-NEXT: v_mul_f16_e32 v3, v1, v4
439 ; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
440 ; GFX8-NEXT: v_mul_f16_e32 v4, v2, v5
441 ; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
442 ; GFX8-NEXT: v_or_b32_e32 v0, v6, v0
443 ; GFX8-NEXT: v_or_b32_e32 v1, v3, v1
444 ; GFX8-NEXT: v_or_b32_e32 v2, v4, v2
445 ; GFX8-NEXT: s_setpc_b64 s[30:31]
447 ; GFX10-LABEL: v_fmul_v6f16_fneg_rhs:
449 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
450 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v3 neg_lo:[0,1] neg_hi:[0,1]
451 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v4 neg_lo:[0,1] neg_hi:[0,1]
452 ; GFX10-NEXT: v_pk_mul_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1]
453 ; GFX10-NEXT: s_setpc_b64 s[30:31]
454 %neg.b = fneg <6 x half> %b
455 %mul = fmul <6 x half> %a, %neg.b
459 define <6 x half> @v_fmul_v6f16_fneg_lhs_fneg_rhs(<6 x half> %a, <6 x half> %b) {
460 ; GFX9-LABEL: v_fmul_v6f16_fneg_lhs_fneg_rhs:
462 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
463 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3
464 ; GFX9-NEXT: v_pk_mul_f16 v1, v1, v4
465 ; GFX9-NEXT: v_pk_mul_f16 v2, v2, v5
466 ; GFX9-NEXT: s_setpc_b64 s[30:31]
468 ; GFX8-LABEL: v_fmul_v6f16_fneg_lhs_fneg_rhs:
470 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
471 ; GFX8-NEXT: v_mul_f16_e32 v6, v0, v3
472 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
473 ; GFX8-NEXT: v_mul_f16_e32 v3, v1, v4
474 ; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
475 ; GFX8-NEXT: v_mul_f16_e32 v4, v2, v5
476 ; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
477 ; GFX8-NEXT: v_or_b32_e32 v0, v6, v0
478 ; GFX8-NEXT: v_or_b32_e32 v1, v3, v1
479 ; GFX8-NEXT: v_or_b32_e32 v2, v4, v2
480 ; GFX8-NEXT: s_setpc_b64 s[30:31]
482 ; GFX10-LABEL: v_fmul_v6f16_fneg_lhs_fneg_rhs:
484 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
485 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v3
486 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v4
487 ; GFX10-NEXT: v_pk_mul_f16 v2, v2, v5
488 ; GFX10-NEXT: s_setpc_b64 s[30:31]
489 %neg.a = fneg <6 x half> %a
490 %neg.b = fneg <6 x half> %b
491 %mul = fmul <6 x half> %neg.a, %neg.b
495 define <8 x half> @v_fmul_v8f16(<8 x half> %a, <8 x half> %b) {
496 ; GFX9-LABEL: v_fmul_v8f16:
498 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
499 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v4
500 ; GFX9-NEXT: v_pk_mul_f16 v1, v1, v5
501 ; GFX9-NEXT: v_pk_mul_f16 v2, v2, v6
502 ; GFX9-NEXT: v_pk_mul_f16 v3, v3, v7
503 ; GFX9-NEXT: s_setpc_b64 s[30:31]
505 ; GFX8-LABEL: v_fmul_v8f16:
507 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
508 ; GFX8-NEXT: v_mul_f16_e32 v8, v0, v4
509 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
510 ; GFX8-NEXT: v_mul_f16_e32 v4, v1, v5
511 ; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
512 ; GFX8-NEXT: v_mul_f16_e32 v5, v2, v6
513 ; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
514 ; GFX8-NEXT: v_mul_f16_e32 v6, v3, v7
515 ; GFX8-NEXT: v_mul_f16_sdwa v3, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
516 ; GFX8-NEXT: v_or_b32_e32 v0, v8, v0
517 ; GFX8-NEXT: v_or_b32_e32 v1, v4, v1
518 ; GFX8-NEXT: v_or_b32_e32 v2, v5, v2
519 ; GFX8-NEXT: v_or_b32_e32 v3, v6, v3
520 ; GFX8-NEXT: s_setpc_b64 s[30:31]
522 ; GFX10-LABEL: v_fmul_v8f16:
524 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
525 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v4
526 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v5
527 ; GFX10-NEXT: v_pk_mul_f16 v2, v2, v6
528 ; GFX10-NEXT: v_pk_mul_f16 v3, v3, v7
529 ; GFX10-NEXT: s_setpc_b64 s[30:31]
530 %mul = fmul <8 x half> %a, %b
534 define <8 x half> @v_fmul_v8f16_fneg_lhs(<8 x half> %a, <8 x half> %b) {
535 ; GFX9-LABEL: v_fmul_v8f16_fneg_lhs:
537 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
538 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v4 neg_lo:[1,0] neg_hi:[1,0]
539 ; GFX9-NEXT: v_pk_mul_f16 v1, v1, v5 neg_lo:[1,0] neg_hi:[1,0]
540 ; GFX9-NEXT: v_pk_mul_f16 v2, v2, v6 neg_lo:[1,0] neg_hi:[1,0]
541 ; GFX9-NEXT: v_pk_mul_f16 v3, v3, v7 neg_lo:[1,0] neg_hi:[1,0]
542 ; GFX9-NEXT: s_setpc_b64 s[30:31]
544 ; GFX8-LABEL: v_fmul_v8f16_fneg_lhs:
546 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
547 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
548 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
549 ; GFX8-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
550 ; GFX8-NEXT: v_xor_b32_e32 v3, 0x80008000, v3
551 ; GFX8-NEXT: v_mul_f16_e32 v8, v0, v4
552 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
553 ; GFX8-NEXT: v_mul_f16_e32 v4, v1, v5
554 ; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
555 ; GFX8-NEXT: v_mul_f16_e32 v5, v2, v6
556 ; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
557 ; GFX8-NEXT: v_mul_f16_e32 v6, v3, v7
558 ; GFX8-NEXT: v_mul_f16_sdwa v3, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
559 ; GFX8-NEXT: v_or_b32_e32 v0, v8, v0
560 ; GFX8-NEXT: v_or_b32_e32 v1, v4, v1
561 ; GFX8-NEXT: v_or_b32_e32 v2, v5, v2
562 ; GFX8-NEXT: v_or_b32_e32 v3, v6, v3
563 ; GFX8-NEXT: s_setpc_b64 s[30:31]
565 ; GFX10-LABEL: v_fmul_v8f16_fneg_lhs:
567 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
568 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v4 neg_lo:[1,0] neg_hi:[1,0]
569 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v5 neg_lo:[1,0] neg_hi:[1,0]
570 ; GFX10-NEXT: v_pk_mul_f16 v2, v2, v6 neg_lo:[1,0] neg_hi:[1,0]
571 ; GFX10-NEXT: v_pk_mul_f16 v3, v3, v7 neg_lo:[1,0] neg_hi:[1,0]
572 ; GFX10-NEXT: s_setpc_b64 s[30:31]
573 %neg.a = fneg <8 x half> %a
574 %mul = fmul <8 x half> %neg.a, %b
578 define <8 x half> @v_fmul_v8f16_fneg_rhs(<8 x half> %a, <8 x half> %b) {
579 ; GFX9-LABEL: v_fmul_v8f16_fneg_rhs:
581 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
582 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1]
583 ; GFX9-NEXT: v_pk_mul_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
584 ; GFX9-NEXT: v_pk_mul_f16 v2, v2, v6 neg_lo:[0,1] neg_hi:[0,1]
585 ; GFX9-NEXT: v_pk_mul_f16 v3, v3, v7 neg_lo:[0,1] neg_hi:[0,1]
586 ; GFX9-NEXT: s_setpc_b64 s[30:31]
588 ; GFX8-LABEL: v_fmul_v8f16_fneg_rhs:
590 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
591 ; GFX8-NEXT: v_xor_b32_e32 v4, 0x80008000, v4
592 ; GFX8-NEXT: v_xor_b32_e32 v5, 0x80008000, v5
593 ; GFX8-NEXT: v_xor_b32_e32 v6, 0x80008000, v6
594 ; GFX8-NEXT: v_xor_b32_e32 v7, 0x80008000, v7
595 ; GFX8-NEXT: v_mul_f16_e32 v8, v0, v4
596 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
597 ; GFX8-NEXT: v_mul_f16_e32 v4, v1, v5
598 ; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
599 ; GFX8-NEXT: v_mul_f16_e32 v5, v2, v6
600 ; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
601 ; GFX8-NEXT: v_mul_f16_e32 v6, v3, v7
602 ; GFX8-NEXT: v_mul_f16_sdwa v3, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
603 ; GFX8-NEXT: v_or_b32_e32 v0, v8, v0
604 ; GFX8-NEXT: v_or_b32_e32 v1, v4, v1
605 ; GFX8-NEXT: v_or_b32_e32 v2, v5, v2
606 ; GFX8-NEXT: v_or_b32_e32 v3, v6, v3
607 ; GFX8-NEXT: s_setpc_b64 s[30:31]
609 ; GFX10-LABEL: v_fmul_v8f16_fneg_rhs:
611 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
612 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1]
613 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
614 ; GFX10-NEXT: v_pk_mul_f16 v2, v2, v6 neg_lo:[0,1] neg_hi:[0,1]
615 ; GFX10-NEXT: v_pk_mul_f16 v3, v3, v7 neg_lo:[0,1] neg_hi:[0,1]
616 ; GFX10-NEXT: s_setpc_b64 s[30:31]
617 %neg.b = fneg <8 x half> %b
618 %mul = fmul <8 x half> %a, %neg.b
622 define <8 x half> @v_fmul_v8f16_fneg_lhs_fneg_rhs(<8 x half> %a, <8 x half> %b) {
623 ; GFX9-LABEL: v_fmul_v8f16_fneg_lhs_fneg_rhs:
625 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
626 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v4
627 ; GFX9-NEXT: v_pk_mul_f16 v1, v1, v5
628 ; GFX9-NEXT: v_pk_mul_f16 v2, v2, v6
629 ; GFX9-NEXT: v_pk_mul_f16 v3, v3, v7
630 ; GFX9-NEXT: s_setpc_b64 s[30:31]
632 ; GFX8-LABEL: v_fmul_v8f16_fneg_lhs_fneg_rhs:
634 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
635 ; GFX8-NEXT: v_mul_f16_e32 v8, v0, v4
636 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
637 ; GFX8-NEXT: v_mul_f16_e32 v4, v1, v5
638 ; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
639 ; GFX8-NEXT: v_mul_f16_e32 v5, v2, v6
640 ; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
641 ; GFX8-NEXT: v_mul_f16_e32 v6, v3, v7
642 ; GFX8-NEXT: v_mul_f16_sdwa v3, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
643 ; GFX8-NEXT: v_or_b32_e32 v0, v8, v0
644 ; GFX8-NEXT: v_or_b32_e32 v1, v4, v1
645 ; GFX8-NEXT: v_or_b32_e32 v2, v5, v2
646 ; GFX8-NEXT: v_or_b32_e32 v3, v6, v3
647 ; GFX8-NEXT: s_setpc_b64 s[30:31]
649 ; GFX10-LABEL: v_fmul_v8f16_fneg_lhs_fneg_rhs:
651 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
652 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v4
653 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v5
654 ; GFX10-NEXT: v_pk_mul_f16 v2, v2, v6
655 ; GFX10-NEXT: v_pk_mul_f16 v3, v3, v7
656 ; GFX10-NEXT: s_setpc_b64 s[30:31]
657 %neg.a = fneg <8 x half> %a
658 %neg.b = fneg <8 x half> %b
659 %mul = fmul <8 x half> %neg.a, %neg.b