1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6 define <2 x i16> @v_add_v2i16(<2 x i16> %a, <2 x i16> %b) {
7 ; GFX9-LABEL: v_add_v2i16:
9 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10 ; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
11 ; GFX9-NEXT: s_setpc_b64 s[30:31]
13 ; GFX8-LABEL: v_add_v2i16:
15 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16 ; GFX8-NEXT: v_add_u16_e32 v2, v0, v1
17 ; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
18 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
19 ; GFX8-NEXT: s_setpc_b64 s[30:31]
21 ; GFX10-LABEL: v_add_v2i16:
23 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
25 ; GFX10-NEXT: v_pk_add_u16 v0, v0, v1
26 ; GFX10-NEXT: s_setpc_b64 s[30:31]
27 %add = add <2 x i16> %a, %b
31 define <2 x i16> @v_add_v2i16_fneg_lhs(<2 x half> %a, <2 x i16> %b) {
32 ; GFX9-LABEL: v_add_v2i16_fneg_lhs:
34 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35 ; GFX9-NEXT: v_pk_add_u16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
36 ; GFX9-NEXT: s_setpc_b64 s[30:31]
38 ; GFX8-LABEL: v_add_v2i16_fneg_lhs:
40 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
42 ; GFX8-NEXT: v_add_u16_e32 v2, v0, v1
43 ; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
44 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
45 ; GFX8-NEXT: s_setpc_b64 s[30:31]
47 ; GFX10-LABEL: v_add_v2i16_fneg_lhs:
49 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
51 ; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
52 ; GFX10-NEXT: s_setpc_b64 s[30:31]
53 %neg.a = fneg <2 x half> %a
54 %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16>
55 %add = add <2 x i16> %cast.neg.a, %b
59 define <2 x i16> @v_add_v2i16_fneg_rhs(<2 x i16> %a, <2 x half> %b) {
60 ; GFX9-LABEL: v_add_v2i16_fneg_rhs:
62 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
63 ; GFX9-NEXT: v_pk_add_u16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
64 ; GFX9-NEXT: s_setpc_b64 s[30:31]
66 ; GFX8-LABEL: v_add_v2i16_fneg_rhs:
68 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
70 ; GFX8-NEXT: v_add_u16_e32 v2, v0, v1
71 ; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
72 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
73 ; GFX8-NEXT: s_setpc_b64 s[30:31]
75 ; GFX10-LABEL: v_add_v2i16_fneg_rhs:
77 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
79 ; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
80 ; GFX10-NEXT: s_setpc_b64 s[30:31]
81 %neg.b = fneg <2 x half> %b
82 %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16>
83 %add = add <2 x i16> %a, %cast.neg.b
87 define <2 x i16> @v_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) {
88 ; GFX9-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs:
90 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
91 ; GFX9-NEXT: v_pk_add_u16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
92 ; GFX9-NEXT: s_setpc_b64 s[30:31]
94 ; GFX8-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs:
96 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97 ; GFX8-NEXT: s_mov_b32 s4, 0x80008000
98 ; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0
99 ; GFX8-NEXT: v_xor_b32_e32 v1, s4, v1
100 ; GFX8-NEXT: v_add_u16_e32 v2, v0, v1
101 ; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
102 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
103 ; GFX8-NEXT: s_setpc_b64 s[30:31]
105 ; GFX10-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs:
107 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
109 ; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
110 ; GFX10-NEXT: s_setpc_b64 s[30:31]
111 %neg.a = fneg <2 x half> %a
112 %neg.b = fneg <2 x half> %b
113 %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16>
114 %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16>
115 %add = add <2 x i16> %cast.neg.a, %cast.neg.b
119 define <2 x i16> @v_add_v2i16_neg_inline_imm_splat(<2 x i16> %a) {
120 ; GFX9-LABEL: v_add_v2i16_neg_inline_imm_splat:
122 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123 ; GFX9-NEXT: v_mov_b32_e32 v1, 0xffc0ffc0
124 ; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
125 ; GFX9-NEXT: s_setpc_b64 s[30:31]
127 ; GFX8-LABEL: v_add_v2i16_neg_inline_imm_splat:
129 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130 ; GFX8-NEXT: s_movk_i32 s4, 0xffc0
131 ; GFX8-NEXT: v_mov_b32_e32 v2, s4
132 ; GFX8-NEXT: v_add_u16_e32 v1, s4, v0
133 ; GFX8-NEXT: v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
134 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
135 ; GFX8-NEXT: s_setpc_b64 s[30:31]
137 ; GFX10-LABEL: v_add_v2i16_neg_inline_imm_splat:
139 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
141 ; GFX10-NEXT: v_pk_add_u16 v0, 0xffc0, v0 op_sel_hi:[0,1]
142 ; GFX10-NEXT: s_setpc_b64 s[30:31]
143 %add = add <2 x i16> %a, <i16 -64, i16 -64>
147 define <2 x i16> @v_add_v2i16_neg_inline_imm_lo(<2 x i16> %a) {
148 ; GFX9-LABEL: v_add_v2i16_neg_inline_imm_lo:
150 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x4ffc0
152 ; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
153 ; GFX9-NEXT: s_setpc_b64 s[30:31]
155 ; GFX8-LABEL: v_add_v2i16_neg_inline_imm_lo:
157 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
158 ; GFX8-NEXT: v_mov_b32_e32 v2, 4
159 ; GFX8-NEXT: v_add_u16_e32 v1, 0xffc0, v0
160 ; GFX8-NEXT: v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
161 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
162 ; GFX8-NEXT: s_setpc_b64 s[30:31]
164 ; GFX10-LABEL: v_add_v2i16_neg_inline_imm_lo:
166 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
167 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
168 ; GFX10-NEXT: v_pk_add_u16 v0, 0x4ffc0, v0
169 ; GFX10-NEXT: s_setpc_b64 s[30:31]
170 %add = add <2 x i16> %a, <i16 -64, i16 4>
174 define <2 x i16> @v_add_v2i16_neg_inline_imm_hi(<2 x i16> %a) {
175 ; GFX9-LABEL: v_add_v2i16_neg_inline_imm_hi:
177 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178 ; GFX9-NEXT: v_mov_b32_e32 v1, 0xffc00004
179 ; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
180 ; GFX9-NEXT: s_setpc_b64 s[30:31]
182 ; GFX8-LABEL: v_add_v2i16_neg_inline_imm_hi:
184 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185 ; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffc0
186 ; GFX8-NEXT: v_add_u16_e32 v2, 4, v0
187 ; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
188 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
189 ; GFX8-NEXT: s_setpc_b64 s[30:31]
191 ; GFX10-LABEL: v_add_v2i16_neg_inline_imm_hi:
193 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
194 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
195 ; GFX10-NEXT: v_pk_add_u16 v0, 0xffc00004, v0
196 ; GFX10-NEXT: s_setpc_b64 s[30:31]
197 %add = add <2 x i16> %a, <i16 4, i16 -64>
201 define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) {
202 ; GFX9-LABEL: s_add_v2i16_neg_inline_imm_splat:
204 ; GFX9-NEXT: s_lshr_b32 s1, s0, 16
205 ; GFX9-NEXT: s_add_i32 s0, s0, 0xffc0ffc0
206 ; GFX9-NEXT: s_add_i32 s1, s1, 0xffc0
207 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1
208 ; GFX9-NEXT: ; return to shader part epilog
210 ; GFX8-LABEL: s_add_v2i16_neg_inline_imm_splat:
212 ; GFX8-NEXT: s_mov_b32 s3, 0xffff
213 ; GFX8-NEXT: s_mov_b32 s1, 0xffc0
214 ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
215 ; GFX8-NEXT: s_and_b32 s0, s0, s3
216 ; GFX8-NEXT: s_add_i32 s0, s0, s1
217 ; GFX8-NEXT: s_add_i32 s2, s2, s1
218 ; GFX8-NEXT: s_lshl_b32 s1, s2, 16
219 ; GFX8-NEXT: s_and_b32 s0, s0, s3
220 ; GFX8-NEXT: s_or_b32 s0, s1, s0
221 ; GFX8-NEXT: ; return to shader part epilog
223 ; GFX10-LABEL: s_add_v2i16_neg_inline_imm_splat:
225 ; GFX10-NEXT: s_lshr_b32 s1, s0, 16
226 ; GFX10-NEXT: s_add_i32 s0, s0, 0xffc0ffc0
227 ; GFX10-NEXT: s_add_i32 s1, s1, 0xffc0
228 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1
229 ; GFX10-NEXT: ; return to shader part epilog
230 %add = add <2 x i16> %a, <i16 -64, i16 -64>
231 %cast = bitcast <2 x i16> %add to i32
235 define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) {
236 ; GFX9-LABEL: s_add_v2i16_neg_inline_imm_lo:
238 ; GFX9-NEXT: s_lshr_b32 s1, s0, 16
239 ; GFX9-NEXT: s_add_i32 s0, s0, 0x4ffc0
240 ; GFX9-NEXT: s_add_i32 s1, s1, 4
241 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1
242 ; GFX9-NEXT: ; return to shader part epilog
244 ; GFX8-LABEL: s_add_v2i16_neg_inline_imm_lo:
246 ; GFX8-NEXT: s_mov_b32 s2, 0xffff
247 ; GFX8-NEXT: s_lshr_b32 s1, s0, 16
248 ; GFX8-NEXT: s_and_b32 s0, s0, s2
249 ; GFX8-NEXT: s_add_i32 s0, s0, 0xffc0
250 ; GFX8-NEXT: s_add_i32 s1, s1, 4
251 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
252 ; GFX8-NEXT: s_and_b32 s0, s0, s2
253 ; GFX8-NEXT: s_or_b32 s0, s1, s0
254 ; GFX8-NEXT: ; return to shader part epilog
256 ; GFX10-LABEL: s_add_v2i16_neg_inline_imm_lo:
258 ; GFX10-NEXT: s_lshr_b32 s1, s0, 16
259 ; GFX10-NEXT: s_add_i32 s0, s0, 0x4ffc0
260 ; GFX10-NEXT: s_add_i32 s1, s1, 4
261 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1
262 ; GFX10-NEXT: ; return to shader part epilog
263 %add = add <2 x i16> %a, <i16 -64, i16 4>
264 %cast = bitcast <2 x i16> %add to i32
268 define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) {
269 ; GFX9-LABEL: s_add_v2i16_neg_inline_imm_hi:
271 ; GFX9-NEXT: s_lshr_b32 s1, s0, 16
272 ; GFX9-NEXT: s_add_i32 s0, s0, 0xffc00004
273 ; GFX9-NEXT: s_add_i32 s1, s1, 0xffc0
274 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1
275 ; GFX9-NEXT: ; return to shader part epilog
277 ; GFX8-LABEL: s_add_v2i16_neg_inline_imm_hi:
279 ; GFX8-NEXT: s_mov_b32 s2, 0xffff
280 ; GFX8-NEXT: s_lshr_b32 s1, s0, 16
281 ; GFX8-NEXT: s_and_b32 s0, s0, s2
282 ; GFX8-NEXT: s_add_i32 s0, s0, 4
283 ; GFX8-NEXT: s_add_i32 s1, s1, 0xffc0
284 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
285 ; GFX8-NEXT: s_and_b32 s0, s0, s2
286 ; GFX8-NEXT: s_or_b32 s0, s1, s0
287 ; GFX8-NEXT: ; return to shader part epilog
289 ; GFX10-LABEL: s_add_v2i16_neg_inline_imm_hi:
291 ; GFX10-NEXT: s_lshr_b32 s1, s0, 16
292 ; GFX10-NEXT: s_add_i32 s0, s0, 0xffc00004
293 ; GFX10-NEXT: s_add_i32 s1, s1, 0xffc0
294 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1
295 ; GFX10-NEXT: ; return to shader part epilog
296 %add = add <2 x i16> %a, <i16 4, i16 -64>
297 %cast = bitcast <2 x i16> %add to i32
301 define amdgpu_ps i32 @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) {
302 ; GFX9-LABEL: s_add_v2i16:
304 ; GFX9-NEXT: s_lshr_b32 s2, s0, 16
305 ; GFX9-NEXT: s_lshr_b32 s3, s1, 16
306 ; GFX9-NEXT: s_add_i32 s0, s0, s1
307 ; GFX9-NEXT: s_add_i32 s2, s2, s3
308 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
309 ; GFX9-NEXT: ; return to shader part epilog
311 ; GFX8-LABEL: s_add_v2i16:
313 ; GFX8-NEXT: s_mov_b32 s3, 0xffff
314 ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
315 ; GFX8-NEXT: s_and_b32 s0, s0, s3
316 ; GFX8-NEXT: s_lshr_b32 s4, s1, 16
317 ; GFX8-NEXT: s_and_b32 s1, s1, s3
318 ; GFX8-NEXT: s_add_i32 s0, s0, s1
319 ; GFX8-NEXT: s_add_i32 s2, s2, s4
320 ; GFX8-NEXT: s_lshl_b32 s1, s2, 16
321 ; GFX8-NEXT: s_and_b32 s0, s0, s3
322 ; GFX8-NEXT: s_or_b32 s0, s1, s0
323 ; GFX8-NEXT: ; return to shader part epilog
325 ; GFX10-LABEL: s_add_v2i16:
327 ; GFX10-NEXT: s_lshr_b32 s2, s0, 16
328 ; GFX10-NEXT: s_lshr_b32 s3, s1, 16
329 ; GFX10-NEXT: s_add_i32 s0, s0, s1
330 ; GFX10-NEXT: s_add_i32 s2, s2, s3
331 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2
332 ; GFX10-NEXT: ; return to shader part epilog
333 %add = add <2 x i16> %a, %b
334 %cast = bitcast <2 x i16> %add to i32
338 define amdgpu_ps i32 @s_add_v2i16_fneg_lhs(<2 x half> inreg %a, <2 x i16> inreg %b) {
339 ; GFX9-LABEL: s_add_v2i16_fneg_lhs:
341 ; GFX9-NEXT: s_xor_b32 s0, s0, 0x80008000
342 ; GFX9-NEXT: s_lshr_b32 s2, s0, 16
343 ; GFX9-NEXT: s_lshr_b32 s3, s1, 16
344 ; GFX9-NEXT: s_add_i32 s0, s0, s1
345 ; GFX9-NEXT: s_add_i32 s2, s2, s3
346 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
347 ; GFX9-NEXT: ; return to shader part epilog
349 ; GFX8-LABEL: s_add_v2i16_fneg_lhs:
351 ; GFX8-NEXT: s_xor_b32 s0, s0, 0x80008000
352 ; GFX8-NEXT: s_mov_b32 s3, 0xffff
353 ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
354 ; GFX8-NEXT: s_and_b32 s0, s0, s3
355 ; GFX8-NEXT: s_lshr_b32 s4, s1, 16
356 ; GFX8-NEXT: s_and_b32 s1, s1, s3
357 ; GFX8-NEXT: s_add_i32 s0, s0, s1
358 ; GFX8-NEXT: s_add_i32 s2, s2, s4
359 ; GFX8-NEXT: s_lshl_b32 s1, s2, 16
360 ; GFX8-NEXT: s_and_b32 s0, s0, s3
361 ; GFX8-NEXT: s_or_b32 s0, s1, s0
362 ; GFX8-NEXT: ; return to shader part epilog
364 ; GFX10-LABEL: s_add_v2i16_fneg_lhs:
366 ; GFX10-NEXT: s_xor_b32 s0, s0, 0x80008000
367 ; GFX10-NEXT: s_lshr_b32 s3, s1, 16
368 ; GFX10-NEXT: s_lshr_b32 s2, s0, 16
369 ; GFX10-NEXT: s_add_i32 s0, s0, s1
370 ; GFX10-NEXT: s_add_i32 s2, s2, s3
371 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2
372 ; GFX10-NEXT: ; return to shader part epilog
373 %neg.a = fneg <2 x half> %a
374 %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16>
375 %add = add <2 x i16> %cast.neg.a, %b
376 %cast = bitcast <2 x i16> %add to i32
380 define amdgpu_ps i32 @s_add_v2i16_fneg_rhs(<2 x i16> inreg %a, <2 x half> inreg %b) {
381 ; GFX9-LABEL: s_add_v2i16_fneg_rhs:
383 ; GFX9-NEXT: s_xor_b32 s1, s1, 0x80008000
384 ; GFX9-NEXT: s_lshr_b32 s2, s0, 16
385 ; GFX9-NEXT: s_lshr_b32 s3, s1, 16
386 ; GFX9-NEXT: s_add_i32 s0, s0, s1
387 ; GFX9-NEXT: s_add_i32 s2, s2, s3
388 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
389 ; GFX9-NEXT: ; return to shader part epilog
391 ; GFX8-LABEL: s_add_v2i16_fneg_rhs:
393 ; GFX8-NEXT: s_xor_b32 s1, s1, 0x80008000
394 ; GFX8-NEXT: s_mov_b32 s3, 0xffff
395 ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
396 ; GFX8-NEXT: s_and_b32 s0, s0, s3
397 ; GFX8-NEXT: s_lshr_b32 s4, s1, 16
398 ; GFX8-NEXT: s_and_b32 s1, s1, s3
399 ; GFX8-NEXT: s_add_i32 s0, s0, s1
400 ; GFX8-NEXT: s_add_i32 s2, s2, s4
401 ; GFX8-NEXT: s_lshl_b32 s1, s2, 16
402 ; GFX8-NEXT: s_and_b32 s0, s0, s3
403 ; GFX8-NEXT: s_or_b32 s0, s1, s0
404 ; GFX8-NEXT: ; return to shader part epilog
406 ; GFX10-LABEL: s_add_v2i16_fneg_rhs:
408 ; GFX10-NEXT: s_xor_b32 s1, s1, 0x80008000
409 ; GFX10-NEXT: s_lshr_b32 s2, s0, 16
410 ; GFX10-NEXT: s_lshr_b32 s3, s1, 16
411 ; GFX10-NEXT: s_add_i32 s0, s0, s1
412 ; GFX10-NEXT: s_add_i32 s2, s2, s3
413 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2
414 ; GFX10-NEXT: ; return to shader part epilog
415 %neg.b = fneg <2 x half> %b
416 %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16>
417 %add = add <2 x i16> %a, %cast.neg.b
418 %cast = bitcast <2 x i16> %add to i32
422 define amdgpu_ps i32 @s_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> inreg %a, <2 x half> inreg %b) {
423 ; GFX9-LABEL: s_add_v2i16_fneg_lhs_fneg_rhs:
425 ; GFX9-NEXT: s_mov_b32 s2, 0x80008000
426 ; GFX9-NEXT: s_xor_b32 s0, s0, s2
427 ; GFX9-NEXT: s_xor_b32 s1, s1, s2
428 ; GFX9-NEXT: s_lshr_b32 s2, s0, 16
429 ; GFX9-NEXT: s_lshr_b32 s3, s1, 16
430 ; GFX9-NEXT: s_add_i32 s0, s0, s1
431 ; GFX9-NEXT: s_add_i32 s2, s2, s3
432 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
433 ; GFX9-NEXT: ; return to shader part epilog
435 ; GFX8-LABEL: s_add_v2i16_fneg_lhs_fneg_rhs:
437 ; GFX8-NEXT: s_mov_b32 s2, 0x80008000
438 ; GFX8-NEXT: s_xor_b32 s0, s0, s2
439 ; GFX8-NEXT: s_xor_b32 s1, s1, s2
440 ; GFX8-NEXT: s_mov_b32 s3, 0xffff
441 ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
442 ; GFX8-NEXT: s_and_b32 s0, s0, s3
443 ; GFX8-NEXT: s_lshr_b32 s4, s1, 16
444 ; GFX8-NEXT: s_and_b32 s1, s1, s3
445 ; GFX8-NEXT: s_add_i32 s0, s0, s1
446 ; GFX8-NEXT: s_add_i32 s2, s2, s4
447 ; GFX8-NEXT: s_lshl_b32 s1, s2, 16
448 ; GFX8-NEXT: s_and_b32 s0, s0, s3
449 ; GFX8-NEXT: s_or_b32 s0, s1, s0
450 ; GFX8-NEXT: ; return to shader part epilog
452 ; GFX10-LABEL: s_add_v2i16_fneg_lhs_fneg_rhs:
454 ; GFX10-NEXT: s_mov_b32 s2, 0x80008000
455 ; GFX10-NEXT: s_xor_b32 s0, s0, s2
456 ; GFX10-NEXT: s_xor_b32 s1, s1, s2
457 ; GFX10-NEXT: s_lshr_b32 s2, s0, 16
458 ; GFX10-NEXT: s_lshr_b32 s3, s1, 16
459 ; GFX10-NEXT: s_add_i32 s0, s0, s1
460 ; GFX10-NEXT: s_add_i32 s2, s2, s3
461 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2
462 ; GFX10-NEXT: ; return to shader part epilog
463 %neg.a = fneg <2 x half> %a
464 %neg.b = fneg <2 x half> %b
465 %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16>
466 %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16>
467 %add = add <2 x i16> %cast.neg.a, %cast.neg.b
468 %cast = bitcast <2 x i16> %add to i32