1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX900 %s
2 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PACKED-SDAG %s
3 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PACKED-GISEL %s
4 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PACKED-SDAG %s
5 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PACKED-GISEL %s
7 ; GCN-LABEL: {{^}}fadd_v2_vv:
8 ; GFX900-COUNT-2: v_add_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
9 ; PACKED: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
10 define amdgpu_kernel void @fadd_v2_vv(ptr addrspace(1) %a) {
11 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
12 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
13 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
14 %add = fadd <2 x float> %load, %load
15 store <2 x float> %add, ptr addrspace(1) %gep, align 8
19 ; GCN-LABEL: {{^}}fadd_v2_vs:
20 ; GFX900-COUNT-2: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
21 ; PACKED: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
22 define amdgpu_kernel void @fadd_v2_vs(ptr addrspace(1) %a, <2 x float> %x) {
23 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
24 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
25 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
26 %add = fadd <2 x float> %load, %x
27 store <2 x float> %add, ptr addrspace(1) %gep, align 8
31 ; GCN-LABEL: {{^}}fadd_v4_vs:
32 ; GFX900-COUNT-4: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
33 ; PACKED-COUNT-2: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
34 define amdgpu_kernel void @fadd_v4_vs(ptr addrspace(1) %a, <4 x float> %x) {
35 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
36 %gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %a, i32 %id
37 %load = load <4 x float>, ptr addrspace(1) %gep, align 16
38 %add = fadd <4 x float> %load, %x
39 store <4 x float> %add, ptr addrspace(1) %gep, align 16
43 ; GCN-LABEL: {{^}}fadd_v32_vs:
44 ; GFX900-COUNT-32: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
45 ; PACKED-COUNT-16: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
46 define amdgpu_kernel void @fadd_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
47 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
48 %gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %a, i32 %id
49 %load = load <32 x float>, ptr addrspace(1) %gep, align 128
50 %add = fadd <32 x float> %load, %x
51 store <32 x float> %add, ptr addrspace(1) %gep, align 128
55 ; FIXME: GISel does not use op_sel for splat constants.
57 ; GCN-LABEL: {{^}}fadd_v2_v_imm:
58 ; PACKED: s_mov_b32 s[[K:[0-9]+]], 0x42c80000
59 ; GFX900-COUNT-2: v_add_f32_e32 v{{[0-9]+}}, 0x42c80000, v{{[0-9]+}}
60 ; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[K]]:{{[0-9:]+}}] op_sel_hi:[1,0]{{$}}
61 ; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[K]]:{{[0-9:]+}}]{{$}}
62 define amdgpu_kernel void @fadd_v2_v_imm(ptr addrspace(1) %a) {
63 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
64 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
65 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
66 %add = fadd <2 x float> %load, <float 100.0, float 100.0>
67 store <2 x float> %add, ptr addrspace(1) %gep, align 8
71 ; GCN-LABEL: {{^}}fadd_v2_v_v_splat:
72 ; GFX900-COUNT-2: v_add_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v0
73 ; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[0:1] op_sel_hi:[1,0]{{$}}
74 ; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[0:1]{{$}}
75 define amdgpu_kernel void @fadd_v2_v_v_splat(ptr addrspace(1) %a) {
76 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
77 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
78 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
79 %fid = bitcast i32 %id to float
80 %tmp1 = insertelement <2 x float> undef, float %fid, i64 0
81 %k = insertelement <2 x float> %tmp1, float %fid, i64 1
82 %add = fadd <2 x float> %load, %k
83 store <2 x float> %add, ptr addrspace(1) %gep, align 8
87 ; GCN-LABEL: {{^}}fadd_v2_v_lit_splat:
88 ; GFX900-COUNT-2: v_add_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
89 ; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 1.0 op_sel_hi:[1,0]{{$}}
90 ; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
91 define amdgpu_kernel void @fadd_v2_v_lit_splat(ptr addrspace(1) %a) {
92 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
93 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
94 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
95 %add = fadd <2 x float> %load, <float 1.0, float 1.0>
96 store <2 x float> %add, ptr addrspace(1) %gep, align 8
100 ; GCN-LABEL: {{^}}fadd_v2_v_lit_hi0:
101 ; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
102 ; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
103 ; PACKED-DAG: s_mov_b64 [[K:s\[[0-9:]+\]]], 0x3f800000
104 ; PACKED: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], [[K]]
105 define amdgpu_kernel void @fadd_v2_v_lit_hi0(ptr addrspace(1) %a) {
106 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
107 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
108 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
109 %add = fadd <2 x float> %load, <float 1.0, float 0.0>
110 store <2 x float> %add, ptr addrspace(1) %gep, align 8
114 ; GCN-LABEL: {{^}}fadd_v2_v_lit_lo0:
115 ; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
116 ; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
117 ; PACKED-DAG: s_mov_b32 s[[LO:[0-9]+]], 0
118 ; PACKED-DAG: s_mov_b32 s[[HI:[0-9]+]], 1.0
119 ; PACKED: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[LO]]:[[HI]]]{{$}}
120 define amdgpu_kernel void @fadd_v2_v_lit_lo0(ptr addrspace(1) %a) {
121 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
122 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
123 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
124 %add = fadd <2 x float> %load, <float 0.0, float 1.0>
125 store <2 x float> %add, ptr addrspace(1) %gep, align 8
129 ; GCN-LABEL: {{^}}fadd_v2_v_unfoldable_lit:
130 ; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
131 ; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
132 ; PACKED-DAG: s_mov_b32 s{{[0-9]+}}, 1.0
133 ; PACKED-DAG: s_mov_b32 s{{[0-9]+}}, 2.0
134 ; PACKED: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
135 define amdgpu_kernel void @fadd_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
136 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
137 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
138 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
139 %add = fadd <2 x float> %load, <float 1.0, float 2.0>
140 store <2 x float> %add, ptr addrspace(1) %gep, align 8
144 ; FIXME: Fold fneg into v_pk_add_f32 with Global ISel.
146 ; GCN-LABEL: {{^}}fadd_v2_v_fneg:
147 ; GFX900-COUNT-2: v_subrev_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
148 ; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] op_sel_hi:[1,0] neg_lo:[0,1] neg_hi:[0,1]{{$}}
149 ; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
150 define amdgpu_kernel void @fadd_v2_v_fneg(ptr addrspace(1) %a, float %x) {
151 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
152 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
153 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
154 %fneg = fsub float -0.0, %x
155 %tmp1 = insertelement <2 x float> undef, float %fneg, i64 0
156 %k = insertelement <2 x float> %tmp1, float %fneg, i64 1
157 %add = fadd <2 x float> %load, %k
158 store <2 x float> %add, ptr addrspace(1) %gep, align 8
162 ; GCN-LABEL: {{^}}fadd_v2_v_fneg_lo:
163 ; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
164 ; GFX900-DAG: v_subrev_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
165 ; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] op_sel_hi:[1,0] neg_lo:[0,1]{{$}}
166 ; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
167 define amdgpu_kernel void @fadd_v2_v_fneg_lo(ptr addrspace(1) %a, float %x) {
168 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
169 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
170 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
171 %fneg = fsub float -0.0, %x
172 %tmp1 = insertelement <2 x float> undef, float %fneg, i64 0
173 %k = insertelement <2 x float> %tmp1, float %x, i64 1
174 %add = fadd <2 x float> %load, %k
175 store <2 x float> %add, ptr addrspace(1) %gep, align 8
179 ; GCN-LABEL: {{^}}fadd_v2_v_fneg_hi:
180 ; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
181 ; GFX900-DAG: v_subrev_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
182 ; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] op_sel_hi:[1,0] neg_hi:[0,1]{{$}}
183 ; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
184 define amdgpu_kernel void @fadd_v2_v_fneg_hi(ptr addrspace(1) %a, float %x) {
185 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
186 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
187 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
188 %fneg = fsub float -0.0, %x
189 %tmp1 = insertelement <2 x float> undef, float %x, i64 0
190 %k = insertelement <2 x float> %tmp1, float %fneg, i64 1
191 %add = fadd <2 x float> %load, %k
192 store <2 x float> %add, ptr addrspace(1) %gep, align 8
196 ; GCN-LABEL: {{^}}fadd_v2_v_fneg_lo2:
197 ; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
198 ; GFX900-DAG: v_subrev_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
199 ; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] neg_lo:[0,1]{{$}}
200 ; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
201 define amdgpu_kernel void @fadd_v2_v_fneg_lo2(ptr addrspace(1) %a, float %x, float %y) {
202 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
203 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
204 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
205 %fneg = fsub float -0.0, %x
206 %tmp1 = insertelement <2 x float> undef, float %fneg, i64 0
207 %k = insertelement <2 x float> %tmp1, float %y, i64 1
208 %add = fadd <2 x float> %load, %k
209 store <2 x float> %add, ptr addrspace(1) %gep, align 8
213 ; GCN-LABEL: {{^}}fadd_v2_v_fneg_hi2:
214 ; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
215 ; GFX900-DAG: v_subrev_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
216 ; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] op_sel:[0,1] op_sel_hi:[1,0] neg_hi:[0,1]{{$}}
217 ; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
218 define amdgpu_kernel void @fadd_v2_v_fneg_hi2(ptr addrspace(1) %a, float %x, float %y) {
219 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
220 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
221 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
222 %fneg = fsub float -0.0, %x
223 %tmp1 = insertelement <2 x float> undef, float %y, i64 0
224 %k = insertelement <2 x float> %tmp1, float %fneg, i64 1
225 %add = fadd <2 x float> %load, %k
226 store <2 x float> %add, ptr addrspace(1) %gep, align 8
230 ; GCN-LABEL: {{^}}fmul_v2_vv:
231 ; GFX900-COUNT-2: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
232 ; PACKED: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
233 define amdgpu_kernel void @fmul_v2_vv(ptr addrspace(1) %a) {
234 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
235 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
236 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
237 %mul = fmul <2 x float> %load, %load
238 store <2 x float> %mul, ptr addrspace(1) %gep, align 8
242 ; GCN-LABEL: {{^}}fmul_v2_vs:
243 ; GFX900-COUNT-2: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
244 ; PACKED: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
245 define amdgpu_kernel void @fmul_v2_vs(ptr addrspace(1) %a, <2 x float> %x) {
246 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
247 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
248 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
249 %mul = fmul <2 x float> %load, %x
250 store <2 x float> %mul, ptr addrspace(1) %gep, align 8
254 ; GCN-LABEL: {{^}}fmul_v4_vs:
255 ; GFX900-COUNT-4: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
256 ; PACKED-COUNT-2: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
257 define amdgpu_kernel void @fmul_v4_vs(ptr addrspace(1) %a, <4 x float> %x) {
258 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
259 %gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %a, i32 %id
260 %load = load <4 x float>, ptr addrspace(1) %gep, align 16
261 %mul = fmul <4 x float> %load, %x
262 store <4 x float> %mul, ptr addrspace(1) %gep, align 16
266 ; GCN-LABEL: {{^}}fmul_v32_vs:
267 ; GFX900-COUNT-32: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
268 ; PACKED-COUNT-16: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
269 define amdgpu_kernel void @fmul_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
270 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
271 %gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %a, i32 %id
272 %load = load <32 x float>, ptr addrspace(1) %gep, align 128
273 %mul = fmul <32 x float> %load, %x
274 store <32 x float> %mul, ptr addrspace(1) %gep, align 128
278 ; GCN-LABEL: {{^}}fmul_v2_v_imm:
279 ; PACKED: s_mov_b32 s[[K:[0-9]+]], 0x42c80000
280 ; GFX900-COUNT-2: v_mul_f32_e32 v{{[0-9]+}}, 0x42c80000, v{{[0-9]+}}
281 ; PACKED-SDAG: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[K]]:{{[0-9:]+}}] op_sel_hi:[1,0]{{$}}
282 ; PACKED-GISEL: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[K]]:{{[0-9:]+}}]{{$}}
283 define amdgpu_kernel void @fmul_v2_v_imm(ptr addrspace(1) %a) {
284 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
285 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
286 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
287 %mul = fmul <2 x float> %load, <float 100.0, float 100.0>
288 store <2 x float> %mul, ptr addrspace(1) %gep, align 8
292 ; GCN-LABEL: {{^}}fmul_v2_v_v_splat:
293 ; GFX900-COUNT-2: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v0
294 ; PACKED-SDAG: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[0:1] op_sel_hi:[1,0]{{$}}
295 ; PACKED-GISEL: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[0:1]{{$}}
296 define amdgpu_kernel void @fmul_v2_v_v_splat(ptr addrspace(1) %a) {
297 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
298 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
299 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
300 %fid = bitcast i32 %id to float
301 %tmp1 = insertelement <2 x float> undef, float %fid, i64 0
302 %k = insertelement <2 x float> %tmp1, float %fid, i64 1
303 %mul = fmul <2 x float> %load, %k
304 store <2 x float> %mul, ptr addrspace(1) %gep, align 8
308 ; GCN-LABEL: {{^}}fmul_v2_v_lit_splat:
309 ; GFX900-COUNT-2: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
310 ; PACKED-SDAG: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 4.0 op_sel_hi:[1,0]{{$}}
311 ; PACKED-GISEL: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
312 define amdgpu_kernel void @fmul_v2_v_lit_splat(ptr addrspace(1) %a) {
313 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
314 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
315 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
316 %mul = fmul <2 x float> %load, <float 4.0, float 4.0>
317 store <2 x float> %mul, ptr addrspace(1) %gep, align 8
321 ; GCN-LABEL: {{^}}fmul_v2_v_unfoldable_lit:
322 ; GFX900-DAG: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
323 ; GFX900-DAG: v_mul_f32_e32 v{{[0-9]+}}, 0x40400000, v{{[0-9]+}}
324 ; PACKED-DAG: s_mov_b32 s{{[0-9]+}}, 4.0
325 ; PACKED-DAG: s_mov_b32 s{{[0-9]+}}, 0x40400000
326 ; PACKED: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
327 define amdgpu_kernel void @fmul_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
328 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
329 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
330 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
331 %mul = fmul <2 x float> %load, <float 4.0, float 3.0>
332 store <2 x float> %mul, ptr addrspace(1) %gep, align 8
336 ; GCN-LABEL: {{^}}fmul_v2_v_fneg:
337 ; GFX900-COUNT-2: v_mul_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -s{{[0-9]+}}
338 ; PACKED-SDAG: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] op_sel_hi:[1,0] neg_lo:[0,1] neg_hi:[0,1]{{$}}
339 ; PACKED-GISEL: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
340 define amdgpu_kernel void @fmul_v2_v_fneg(ptr addrspace(1) %a, float %x) {
341 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
342 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
343 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
344 %fneg = fsub float -0.0, %x
345 %tmp1 = insertelement <2 x float> undef, float %fneg, i64 0
346 %k = insertelement <2 x float> %tmp1, float %fneg, i64 1
347 %mul = fmul <2 x float> %load, %k
348 store <2 x float> %mul, ptr addrspace(1) %gep, align 8
352 ; GCN-LABEL: {{^}}fma_v2_vv:
353 ; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
354 ; PACKED: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
355 define amdgpu_kernel void @fma_v2_vv(ptr addrspace(1) %a) {
356 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
357 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
358 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
359 %fma = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %load, <2 x float> %load, <2 x float> %load)
360 store <2 x float> %fma, ptr addrspace(1) %gep, align 8
364 ; GCN-LABEL: {{^}}fma_v2_vs:
365 ; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
366 ; PACKED: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
367 define amdgpu_kernel void @fma_v2_vs(ptr addrspace(1) %a, <2 x float> %x) {
368 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
369 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
370 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
371 %fma = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %load, <2 x float> %x, <2 x float> %x)
372 store <2 x float> %fma, ptr addrspace(1) %gep, align 8
376 ; GCN-LABEL: {{^}}fma_v4_vs:
377 ; GFX900-COUNT-4: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
378 ; PACKED-COUNT-2: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
379 define amdgpu_kernel void @fma_v4_vs(ptr addrspace(1) %a, <4 x float> %x) {
380 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
381 %gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %a, i32 %id
382 %load = load <4 x float>, ptr addrspace(1) %gep, align 16
383 %fma = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %load, <4 x float> %x, <4 x float> %x)
384 store <4 x float> %fma, ptr addrspace(1) %gep, align 16
388 ; GCN-LABEL: {{^}}fma_v32_vs:
389 ; GFX900-COUNT-32: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
390 ; PACKED-COUNT-16: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
391 define amdgpu_kernel void @fma_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
392 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
393 %gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %a, i32 %id
394 %load = load <32 x float>, ptr addrspace(1) %gep, align 128
395 %fma = tail call <32 x float> @llvm.fma.v32f32(<32 x float> %load, <32 x float> %x, <32 x float> %x)
396 store <32 x float> %fma, ptr addrspace(1) %gep, align 128
400 ; GCN-LABEL: {{^}}fma_v2_v_imm:
401 ; GCN-DAG: s_mov_b32 s[[K1:[0-9]+]], 0x42c80000
402 ; GFX900-DAG: v_mov_b32_e32 v[[K2:[0-9]+]], 0x43480000
403 ; PACKED-SDAG-DAG: v_mov_b32_e32 v[[K2:[0-9]+]], 0x43480000
404 ; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, s[[K1]], v[[K2]]
405 ; PACKED-SDAG: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[K1]]:{{[0-9:]+}}], v[[[K2]]:{{[0-9:]+}}] op_sel_hi:[1,0,0]{{$}}
406 ; PACKED-GISEL: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[K1]]:{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
407 define amdgpu_kernel void @fma_v2_v_imm(ptr addrspace(1) %a) {
408 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
409 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
410 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
411 %fma = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %load, <2 x float> <float 100.0, float 100.0>, <2 x float> <float 200.0, float 200.0>)
412 store <2 x float> %fma, ptr addrspace(1) %gep, align 8
416 ; GCN-LABEL: {{^}}fma_v2_v_v_splat:
417 ; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, v0, v0
418 ; PACKED-SDAG: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[0:1], v[0:1] op_sel_hi:[1,0,0]{{$}}
419 ; PACKED-GISEL: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[0:1], v[0:1]{{$}}
420 define amdgpu_kernel void @fma_v2_v_v_splat(ptr addrspace(1) %a) {
421 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
422 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
423 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
424 %fid = bitcast i32 %id to float
425 %tmp1 = insertelement <2 x float> undef, float %fid, i64 0
426 %k = insertelement <2 x float> %tmp1, float %fid, i64 1
427 %fma = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %load, <2 x float> %k, <2 x float> %k)
428 store <2 x float> %fma, ptr addrspace(1) %gep, align 8
432 ; GCN-LABEL: {{^}}fma_v2_v_lit_splat:
433 ; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, 4.0, 1.0
434 ; PACKED-SDAG: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 4.0, 1.0 op_sel_hi:[1,0,0]{{$}}
435 ; PACKED-GISEL: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
436 define amdgpu_kernel void @fma_v2_v_lit_splat(ptr addrspace(1) %a) {
437 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
438 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
439 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
440 %fma = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %load, <2 x float> <float 4.0, float 4.0>, <2 x float> <float 1.0, float 1.0>)
441 store <2 x float> %fma, ptr addrspace(1) %gep, align 8
445 ; GCN-LABEL: {{^}}fma_v2_v_unfoldable_lit:
446 ; GCN-DAG: s_mov_b32 s{{[0-9]+}}, 0x40400000
447 ; GFX900-DAG: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, 4.0, 1.0
448 ; GFX900-DAG: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, 2.0
449 ; PACKED-SDAG-DAG: s_mov_b32 s{{[0-9]+}}, 4.0
450 ; PACKED-SDAG-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0
451 ; PACKED-SDAG-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0
452 ; PACKED: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
453 define amdgpu_kernel void @fma_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
454 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
455 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
456 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
457 %fma = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %load, <2 x float> <float 4.0, float 3.0>, <2 x float> <float 1.0, float 2.0>)
458 store <2 x float> %fma, ptr addrspace(1) %gep, align 8
462 ; GCN-LABEL: {{^}}fma_v2_v_fneg:
463 ; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, -s{{[0-9]+}}, -s{{[0-9]+}}
464 ; PACKED-SDAG: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] op_sel_hi:[1,0,0] neg_lo:[0,1,1] neg_hi:[0,1,1]{{$}}
465 ; PACKED-GISEL: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
466 define amdgpu_kernel void @fma_v2_v_fneg(ptr addrspace(1) %a, float %x) {
467 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
468 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
469 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
470 %fneg = fsub float -0.0, %x
471 %tmp1 = insertelement <2 x float> undef, float %fneg, i64 0
472 %k = insertelement <2 x float> %tmp1, float %fneg, i64 1
473 %fma = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %load, <2 x float> %k, <2 x float> %k)
474 store <2 x float> %fma, ptr addrspace(1) %gep, align 8
478 ; GCN-LABEL: {{^}}add_vector_neg_bitcast_scalar_lo:
479 ; GFX900-COUNT-2: v_sub_f32_e32
480 ; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] op_sel_hi:[1,0] neg_lo:[0,1] neg_hi:[0,1]{{$}}
481 ; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
482 define amdgpu_kernel void @add_vector_neg_bitcast_scalar_lo(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) {
484 %vec0 = load volatile <2 x float>, ptr addrspace(3) %lds, align 4
485 %scalar0 = load volatile float, ptr addrspace(3) %arg2, align 4
486 %neg.scalar0 = fsub float -0.0, %scalar0
488 %neg.scalar0.vec = insertelement <2 x float> undef, float %neg.scalar0, i32 0
489 %neg.scalar0.broadcast = shufflevector <2 x float> %neg.scalar0.vec, <2 x float> undef, <2 x i32> zeroinitializer
491 %result = fadd <2 x float> %vec0, %neg.scalar0.broadcast
492 store <2 x float> %result, ptr addrspace(1) %out, align 4
496 ; GCN-LABEL: {{^}}fma_vector_vector_neg_scalar_lo_scalar_hi:
497 ; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
498 ; PACKED-SDAG: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] neg_lo:[0,0,1] neg_hi:[0,0,1]{{$}}
499 ; PACKED-GISEL: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
500 define amdgpu_kernel void @fma_vector_vector_neg_scalar_lo_scalar_hi(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) {
502 %lds.gep1 = getelementptr inbounds <2 x float>, ptr addrspace(3) %lds, i32 1
503 %arg2.gep = getelementptr inbounds float, ptr addrspace(3) %arg2, i32 2
505 %vec0 = load volatile <2 x float>, ptr addrspace(3) %lds, align 4
506 %vec1 = load volatile <2 x float>, ptr addrspace(3) %lds.gep1, align 4
508 %scalar0 = load volatile float, ptr addrspace(3) %arg2, align 4
509 %scalar1 = load volatile float, ptr addrspace(3) %arg2.gep, align 4
511 %vec.ins0 = insertelement <2 x float> undef, float %scalar0, i32 0
512 %vec2 = insertelement <2 x float> %vec.ins0, float %scalar1, i32 1
513 %neg.vec2 = fsub <2 x float> <float -0.0, float -0.0>, %vec2
515 %result = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %vec0, <2 x float> %vec1, <2 x float> %neg.vec2)
516 store <2 x float> %result, ptr addrspace(1) %out, align 4
520 ; GCN-LABEL: {{^}}shuffle_add_f32:
521 ; GFX900-COUNT-2: v_add_f32_e32
522 ; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] op_sel:[0,1] op_sel_hi:[1,0]{{$}}
523 ; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
524 define amdgpu_kernel void @shuffle_add_f32(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
526 %vec0 = load volatile <2 x float>, ptr addrspace(3) %lds, align 8
527 %lds.gep1 = getelementptr inbounds <2 x float>, ptr addrspace(3) %lds, i32 1
528 %vec1 = load volatile <2 x float>, ptr addrspace(3) %lds.gep1, align 8
529 %vec1.swap = shufflevector <2 x float> %vec1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
530 %result = fadd <2 x float> %vec0, %vec1.swap
531 store <2 x float> %result, ptr addrspace(1) %out, align 8
535 ; GCN-LABEL: {{^}}shuffle_neg_add_f32:
536 ; GFX900-COUNT-2: v_sub_f32_e32
537 ; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[0,1] neg_hi:[0,1]{{$}}
538 ; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
539 define amdgpu_kernel void @shuffle_neg_add_f32(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
541 %vec0 = load volatile <2 x float>, ptr addrspace(3) %lds, align 8
542 %lds.gep1 = getelementptr inbounds <2 x float>, ptr addrspace(3) %lds, i32 1
543 %f32 = load volatile float, ptr addrspace(3) undef, align 8
544 %vec1 = load volatile <2 x float>, ptr addrspace(3) %lds.gep1, align 8
545 %vec1.neg = fsub <2 x float> <float -0.0, float -0.0>, %vec1
546 %vec1.neg.swap = shufflevector <2 x float> %vec1.neg, <2 x float> undef, <2 x i32> <i32 1, i32 0>
547 %result = fadd <2 x float> %vec0, %vec1.neg.swap
548 store <2 x float> %result, ptr addrspace(1) %out, align 8
552 ; GCN-LABEL: {{^}}fadd_fadd_fsub:
553 ; GFX900: v_add_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, 0
554 ; GFX900: v_add_f32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
555 ; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], s[{{[0-9:]+}}], 0 op_sel_hi:[1,0]{{$}}
556 ; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 0 op_sel_hi:[1,0]{{$}}
557 ; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], s[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
558 ; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
559 define amdgpu_kernel void @fadd_fadd_fsub(<2 x float> %arg) {
561 %i12 = fadd <2 x float> zeroinitializer, %arg
562 %shift8 = shufflevector <2 x float> %i12, <2 x float> undef, <2 x i32> <i32 1, i32 undef>
563 %i13 = fadd <2 x float> zeroinitializer, %shift8
564 %i14 = shufflevector <2 x float> %arg, <2 x float> %i13, <2 x i32> <i32 0, i32 2>
565 %i15 = fsub <2 x float> %i14, zeroinitializer
566 store <2 x float> %i15, ptr undef
570 ; GCN-LABEL: {{^}}fadd_shuffle_v4:
571 ; GFX900-COUNT-4: v_add_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
572 ; PACKED-SDAG-COUNT-2: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] op_sel_hi:[1,0]{{$}}
573 ; PACKED-GISEL-COUNT-2: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
574 define amdgpu_kernel void @fadd_shuffle_v4(ptr addrspace(1) %arg) {
576 %tid = call i32 @llvm.amdgcn.workitem.id.x()
577 %gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i32 %tid
578 %in.1 = load <4 x float>, ptr addrspace(1) %gep
579 %shuf = shufflevector <4 x float> %in.1, <4 x float> undef, <4 x i32> zeroinitializer
580 %add.1 = fadd <4 x float> %in.1, %shuf
581 store <4 x float> %add.1, ptr addrspace(1) %gep
585 ; GCN-LABEL: {{^}}fneg_v2f32_vec:
586 ; GFX900-COUNT-2: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
587 ; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 0 neg_lo:[1,1] neg_hi:[1,1]{{$}}
588 ; PACKED-GISEL-COUNT-2: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
589 ; PACKED-GISEL: v_pk_mul_f32 v[{{[0-9:]+}}], 1.0, v[{{[0-9:]+}}] op_sel_hi:[0,1]{{$}}
590 define amdgpu_kernel void @fneg_v2f32_vec(ptr addrspace(1) %a) {
591 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
592 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
593 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
594 %fneg = fsub <2 x float> <float -0.0, float -0.0>, %load
595 store <2 x float> %fneg, ptr addrspace(1) %gep, align 8
599 ; GCN-LABEL: {{^}}fneg_v2f32_scalar:
600 ; GCN-COUNT-2: s_xor_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
601 define amdgpu_kernel void @fneg_v2f32_scalar(ptr addrspace(1) %a, <2 x float> %x) {
602 %fneg = fsub <2 x float> <float -0.0, float -0.0>, %x
603 store <2 x float> %fneg, ptr addrspace(1) %a, align 8
607 declare i32 @llvm.amdgcn.workitem.id.x()
608 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
609 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
610 declare <32 x float> @llvm.fma.v32f32(<32 x float>, <32 x float>, <32 x float>)