1 ; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GCN,SI %s
2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GCN,FP16 %s
5 define amdgpu_kernel void @divergent_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
6 ; GCN-LABEL: name: divergent_fneg_f32
7 ; GCN-LABEL: bb.0 (%ir-block.0)
8 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
9 ; GCN: V_XOR_B32_e64 killed %[[REG]]
11 %tid = call i32 @llvm.amdgcn.workitem.id.x()
12 %tid.ext = sext i32 %tid to i64
13 %in.gep = getelementptr inbounds float, float addrspace(1)* %in, i64 %tid.ext
14 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
15 %val = load volatile float, float addrspace(1)* %in.gep
16 %fneg = fneg float %val
17 store float %fneg, float addrspace(1)* %out.gep
21 define amdgpu_kernel void @uniform_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in, i64 %idx) {
22 ; GCN-LABEL: name: uniform_fneg_f32
23 ; GCN-LABEL: bb.0 (%ir-block.0)
24 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
25 ; GCN: S_XOR_B32 killed %{{[0-9]+}}, killed %[[REG]]
27 %in.gep = getelementptr inbounds float, float addrspace(1)* %in, i64 %idx
28 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %idx
29 %val = load volatile float, float addrspace(1)* %in.gep
30 %fneg = fneg float %val
31 store float %fneg, float addrspace(1)* %out.gep
35 define amdgpu_kernel void @divergent_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
36 ; GCN-LABEL: name: divergent_fabs_f32
37 ; GCN-LABEL: bb.0 (%ir-block.0)
38 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
39 ; GCN: V_AND_B32_e64 killed %[[REG]]
41 %tid = call i32 @llvm.amdgcn.workitem.id.x()
42 %tid.ext = sext i32 %tid to i64
43 %in.gep = getelementptr inbounds float, float addrspace(1)* %in, i64 %tid.ext
44 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
45 %val = load volatile float, float addrspace(1)* %in.gep
46 %fabs = call float @llvm.fabs.f32(float %val)
47 store float %fabs, float addrspace(1)* %out.gep
51 define amdgpu_kernel void @uniform_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in, i64 %idx) {
52 ; GCN-LABEL: name: uniform_fabs_f32
53 ; GCN-LABEL: bb.0 (%ir-block.0)
54 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
55 ; GCN: S_AND_B32 killed %{{[0-9]+}}, killed %[[REG]]
57 %in.gep = getelementptr inbounds float, float addrspace(1)* %in, i64 %idx
58 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %idx
59 %val = load volatile float, float addrspace(1)* %in.gep
60 %fabs = call float @llvm.fabs.f32(float %val)
61 store float %fabs, float addrspace(1)* %out.gep
65 define amdgpu_kernel void @divergent_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
66 ; GCN-LABEL: name: divergent_fneg_fabs_f32
67 ; GCN-LABEL: bb.0 (%ir-block.0)
68 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
69 ; GCN: V_OR_B32_e64 killed %[[REG]]
71 %tid = call i32 @llvm.amdgcn.workitem.id.x()
72 %tid.ext = sext i32 %tid to i64
73 %in.gep = getelementptr inbounds float, float addrspace(1)* %in, i64 %tid.ext
74 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
75 %val = load volatile float, float addrspace(1)* %in.gep
76 %fabs = call float @llvm.fabs.f32(float %val)
77 %fneg = fneg float %fabs
78 store float %fneg, float addrspace(1)* %out.gep
82 define amdgpu_kernel void @uniform_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in, i64 %idx) {
83 ; GCN-LABEL: name: uniform_fneg_fabs_f32
84 ; GCN-LABEL: bb.0 (%ir-block.0)
85 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
86 ; GCN: S_OR_B32 killed %{{[0-9]+}}, killed %[[REG]]
88 %in.gep = getelementptr inbounds float, float addrspace(1)* %in, i64 %idx
89 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %idx
90 %val = load volatile float, float addrspace(1)* %in.gep
91 %fabs = call float @llvm.fabs.f32(float %val)
92 %fneg = fneg float %fabs
93 store float %fneg, float addrspace(1)* %out.gep
98 define amdgpu_kernel void @divergent_fabs_f16(half addrspace(1)* %in, half addrspace(1)* %out) {
99 ; GCN-LABEL: name: divergent_fabs_f16
100 ; GCN-LABEL: bb.0 (%ir-block.0)
101 ; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32767
102 ; FP16: V_AND_B32_e64 killed %[[REG]]
104 %tid = call i32 @llvm.amdgcn.workitem.id.x()
105 %tid.ext = sext i32 %tid to i64
106 %in.gep = getelementptr inbounds half, half addrspace(1)* %in, i64 %tid.ext
107 %val = load volatile half, half addrspace(1)* %in.gep
108 %fabs = call half @llvm.fabs.f16(half %val)
109 store half %fabs, half addrspace(1)* %out
113 define amdgpu_kernel void @uniform_fabs_f16(half addrspace(1)* %in, half addrspace(1)* %out, i64 %idx) {
114 ; GCN-LABEL: name: uniform_fabs_f16
115 ; GCN-LABEL: bb.0 (%ir-block.0)
116 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32767
117 ; GCN: S_AND_B32 killed %{{[0-9]+}}, killed %[[REG]]
119 %in.gep = getelementptr inbounds half, half addrspace(1)* %in, i64 %idx
120 %val = load volatile half, half addrspace(1)* %in.gep
121 %fabs = call half @llvm.fabs.f16(half %val)
122 store half %fabs, half addrspace(1)* %out
126 define amdgpu_kernel void @divergent_fneg_f16(half addrspace(1)* %in, half addrspace(1)* %out) {
127 ; GCN-LABEL: name: divergent_fneg_f16
128 ; GCN-LABEL: bb.0 (%ir-block.0)
129 ; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768
130 ; FP16: V_XOR_B32_e64 killed %[[REG]]
132 %tid = call i32 @llvm.amdgcn.workitem.id.x()
133 %tid.ext = sext i32 %tid to i64
134 %in.gep = getelementptr inbounds half, half addrspace(1)* %in, i64 %tid.ext
135 %val = load volatile half, half addrspace(1)* %in.gep
136 %fneg = fneg half %val
137 store half %fneg, half addrspace(1)* %out
141 define amdgpu_kernel void @uniform_fneg_f16(half addrspace(1)* %in, half addrspace(1)* %out, i64 %idx) {
142 ; GCN-LABEL: name: uniform_fneg_f16
143 ; GCN-LABEL: bb.0 (%ir-block.0)
144 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768
145 ; GCN: S_XOR_B32 killed %{{[0-9]+}}, killed %[[REG]]
147 %in.gep = getelementptr inbounds half, half addrspace(1)* %in, i64 %idx
148 %val = load volatile half, half addrspace(1)* %in.gep
149 %fneg = fneg half %val
150 store half %fneg, half addrspace(1)* %out
154 define amdgpu_kernel void @divergent_fneg_fabs_f16(half addrspace(1)* %in, half addrspace(1)* %out) {
155 ; GCN-LABEL: name: divergent_fneg_fabs_f16
156 ; GCN-LABEL: bb.0 (%ir-block.0)
157 ; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768
158 ; FP16: V_OR_B32_e64 killed %[[REG]]
160 %tid = call i32 @llvm.amdgcn.workitem.id.x()
161 %tid.ext = sext i32 %tid to i64
162 %in.gep = getelementptr inbounds half, half addrspace(1)* %in, i64 %tid.ext
163 %val = load volatile half, half addrspace(1)* %in.gep
164 %fabs = call half @llvm.fabs.f16(half %val)
165 %fneg = fneg half %fabs
166 store half %fneg, half addrspace(1)* %out
170 define amdgpu_kernel void @uniform_fneg_fabs_f16(half addrspace(1)* %in, half addrspace(1)* %out, i64 %idx) {
171 ; GCN-LABEL: name: uniform_fneg_fabs_f16
172 ; GCN-LABEL: bb.0 (%ir-block.0)
173 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768
174 ; GCN: S_OR_B32 killed %{{[0-9]+}}, killed %[[REG]]
176 %in.gep = getelementptr inbounds half, half addrspace(1)* %in, i64 %idx
177 %val = load volatile half, half addrspace(1)* %in.gep
178 %fabs = call half @llvm.fabs.f16(half %val)
179 %fneg = fneg half %fabs
180 store half %fneg, half addrspace(1)* %out
184 define amdgpu_kernel void @divergent_fneg_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) {
185 ; GCN-LABEL: name: divergent_fneg_v2f16
186 ; GCN-LABEL: bb.0 (%ir-block.0)
187 ; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880
188 ; FP16: V_XOR_B32_e64 killed %[[REG]]
190 %tid = call i32 @llvm.amdgcn.workitem.id.x()
191 %gep.in = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
192 %gep.out = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
193 %val = load <2 x half>, <2 x half> addrspace(1)* %gep.in, align 2
194 %fneg = fneg <2 x half> %val
195 store <2 x half> %fneg, <2 x half> addrspace(1)* %gep.out
199 define amdgpu_kernel void @uniform_fneg_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in, i32 %idx) {
200 ; GCN-LABEL: name: uniform_fneg_v2f16
201 ; GCN-LABEL: bb.0 (%ir-block.0)
202 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880
203 ; GCN: S_XOR_B32 killed %{{[0-9]+}}, killed %[[REG]]
205 %gep.in = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %idx
206 %gep.out = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %idx
207 %val = load <2 x half>, <2 x half> addrspace(1)* %gep.in, align 2
208 %fneg = fneg <2 x half> %val
209 store <2 x half> %fneg, <2 x half> addrspace(1)* %gep.out
213 define amdgpu_kernel void @divergent_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) {
214 ; GCN-LABEL: name: divergent_fabs_v2f16
215 ; GCN-LABEL: bb.0 (%ir-block.0)
216 ; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147450879
217 ; FP16: V_AND_B32_e64 killed %[[REG]]
219 %tid = call i32 @llvm.amdgcn.workitem.id.x()
220 %gep.in = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
221 %gep.out = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
222 %val = load <2 x half>, <2 x half> addrspace(1)* %gep.in, align 2
223 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
224 store <2 x half> %fabs, <2 x half> addrspace(1)* %gep.out
228 define amdgpu_kernel void @uniform_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in, i32 %idx) {
229 ; GCN-LABEL: name: uniform_fabs_v2f16
230 ; GCN-LABEL: bb.0 (%ir-block.0)
231 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147450879
232 ; GCN: S_AND_B32 killed %{{[0-9]+}}, killed %[[REG]]
234 %gep.in = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %idx
235 %gep.out = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %idx
236 %val = load <2 x half>, <2 x half> addrspace(1)* %gep.in, align 2
237 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
238 store <2 x half> %fabs, <2 x half> addrspace(1)* %gep.out
242 define amdgpu_kernel void @divergent_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) {
243 ; GCN-LABEL: name: divergent_fneg_fabs_v2f16
244 ; GCN-LABEL: bb.0 (%ir-block.0)
245 ; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880
246 ; FP16: V_OR_B32_e64 killed %[[REG]]
248 %tid = call i32 @llvm.amdgcn.workitem.id.x()
249 %gep.in = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
250 %gep.out = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
251 %val = load <2 x half>, <2 x half> addrspace(1)* %gep.in, align 2
252 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
253 %fneg = fneg <2 x half> %fabs
254 store <2 x half> %fneg, <2 x half> addrspace(1)* %gep.out
258 define amdgpu_kernel void @uniform_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in, i32 %idx) {
259 ; GCN-LABEL: name: uniform_fneg_fabs_v2f16
260 ; GCN-LABEL: bb.0 (%ir-block.0)
261 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880
262 ; GCN: S_OR_B32 killed %{{[0-9]+}}, killed %[[REG]]
264 %gep.in = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %idx
265 %gep.out = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %idx
266 %val = load <2 x half>, <2 x half> addrspace(1)* %gep.in, align 2
267 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
268 %fneg = fneg <2 x half> %fabs
269 store <2 x half> %fneg, <2 x half> addrspace(1)* %gep.out
273 define amdgpu_kernel void @divergent_fneg_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
274 ; GCN-LABEL: name: divergent_fneg_v2f32
275 ; GCN-LABEL: bb.0 (%ir-block.0)
276 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
277 ; GCN: V_XOR_B32_e64 %[[REG]]
278 ; GCN: V_XOR_B32_e64 %[[REG]]
280 %tid = call i32 @llvm.amdgcn.workitem.id.x()
281 %gep.in = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid
282 %gep.out = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid
283 %val = load <2 x float>, <2 x float> addrspace(1)* %gep.in, align 4
284 %fneg = fneg <2 x float> %val
285 store <2 x float> %fneg, <2 x float> addrspace(1)* %gep.out
289 define amdgpu_kernel void @uniform_fneg_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in, i32 %idx) {
290 ; GCN-LABEL: name: uniform_fneg_v2f32
291 ; GCN-LABEL: bb.0 (%ir-block.0)
292 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
293 ; GCN: S_XOR_B32 killed %{{[0-9]+}}, %[[REG]]
294 ; GCN: S_XOR_B32 killed %{{[0-9]+}}, %[[REG]]
296 %gep.in = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %idx
297 %gep.out = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %idx
298 %val = load <2 x float>, <2 x float> addrspace(1)* %gep.in, align 4
299 %fneg = fneg <2 x float> %val
300 store <2 x float> %fneg, <2 x float> addrspace(1)* %gep.out
304 define amdgpu_kernel void @divergent_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
305 ; GCN-LABEL: name: divergent_fabs_v2f32
306 ; GCN-LABEL: bb.0 (%ir-block.0)
307 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
308 ; GCN: V_AND_B32_e64 %[[REG]]
309 ; GCN: V_AND_B32_e64 %[[REG]]
311 %tid = call i32 @llvm.amdgcn.workitem.id.x()
312 %gep.in = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid
313 %gep.out = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid
314 %val = load <2 x float>, <2 x float> addrspace(1)* %gep.in, align 4
315 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val)
316 store <2 x float> %fabs, <2 x float> addrspace(1)* %gep.out
320 define amdgpu_kernel void @uniform_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in, i32 %idx) {
321 ; GCN-LABEL: name: uniform_fabs_v2f32
322 ; GCN-LABEL: bb.0 (%ir-block.0)
323 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
324 ; GCN: S_AND_B32 killed %{{[0-9]+}}, %[[REG]]
325 ; GCN: S_AND_B32 killed %{{[0-9]+}}, %[[REG]]
327 %gep.in = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %idx
328 %gep.out = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %idx
329 %val = load <2 x float>, <2 x float> addrspace(1)* %gep.in, align 4
330 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val)
331 store <2 x float> %fabs, <2 x float> addrspace(1)* %gep.out
335 define amdgpu_kernel void @divergent_fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
336 ; GCN-LABEL: name: divergent_fneg_fabs_v2f32
337 ; GCN-LABEL: bb.0 (%ir-block.0)
338 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
339 ; GCN: V_OR_B32_e64 %[[REG]]
340 ; GCN: V_OR_B32_e64 %[[REG]]
342 %tid = call i32 @llvm.amdgcn.workitem.id.x()
343 %gep.in = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid
344 %gep.out = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid
345 %val = load <2 x float>, <2 x float> addrspace(1)* %gep.in, align 4
346 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val)
347 %fneg = fneg <2 x float> %fabs
348 store <2 x float> %fneg, <2 x float> addrspace(1)* %gep.out
352 define amdgpu_kernel void @uniform_fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in, i32 %idx) {
353 ; GCN-LABEL: name: uniform_fneg_fabs_v2f32
354 ; GCN-LABEL: bb.0 (%ir-block.0)
355 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
356 ; GCN: S_OR_B32 killed %{{[0-9]+}}, %[[REG]]
357 ; GCN: S_OR_B32 killed %{{[0-9]+}}, %[[REG]]
359 %gep.in = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %idx
360 %gep.out = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %idx
361 %val = load <2 x float>, <2 x float> addrspace(1)* %gep.in, align 4
362 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val)
363 %fneg = fneg <2 x float> %fabs
364 store <2 x float> %fneg, <2 x float> addrspace(1)* %gep.out
368 define amdgpu_kernel void @divergent_fneg_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
369 ; GCN-LABEL: name: divergent_fneg_f64
370 ; GCN-LABEL: bb.0 (%ir-block.0)
371 ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
372 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
373 ; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1
374 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
375 ; GCN: %[[XOR:[0-9]+]]:vgpr_32 = V_XOR_B32_e64 killed %[[SREG_MASK]], killed %[[HI32]]
376 ; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0
377 ; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[XOR]], %subreg.sub1
380 %tid = call i32 @llvm.amdgcn.workitem.id.x()
381 %tid.ext = sext i32 %tid to i64
382 %in.gep = getelementptr inbounds double, double addrspace(1)* %in, i64 %tid.ext
383 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
384 %val = load volatile double, double addrspace(1)* %in.gep
385 %fneg = fneg double %val
386 store double %fneg, double addrspace(1)* %out.gep
390 define amdgpu_kernel void @uniform_fneg_f64(double addrspace(1)* %out, double addrspace(1)* %in, i64 %idx) {
391 ; GCN-LABEL: name: uniform_fneg_f64
392 ; GCN-LABEL: bb.0 (%ir-block.0)
393 ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
394 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
395 ; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0
396 ; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1
397 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
398 ; GCN: %[[XOR:[0-9]+]]:sreg_32 = S_XOR_B32 killed %[[HI32]], killed %[[SREG_MASK]]
399 ; GCN: %[[XOR_COPY:[0-9]+]]:sreg_32 = COPY %[[XOR]]
400 ; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[XOR_COPY]], %subreg.sub1
402 %in.gep = getelementptr inbounds double, double addrspace(1)* %in, i64 %idx
403 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %idx
404 %val = load volatile double, double addrspace(1)* %in.gep
405 %fneg = fneg double %val
406 store double %fneg, double addrspace(1)* %out.gep
410 define amdgpu_kernel void @divergent_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
411 ; GCN-LABEL: name: divergent_fabs_f64
412 ; GCN-LABEL: bb.0 (%ir-block.0)
413 ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
414 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
415 ; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1
416 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
417 ; GCN: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed %[[SREG_MASK]], killed %[[HI32]]
418 ; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0
419 ; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[AND]], %subreg.sub1
422 %tid = call i32 @llvm.amdgcn.workitem.id.x()
423 %tid.ext = sext i32 %tid to i64
424 %in.gep = getelementptr inbounds double, double addrspace(1)* %in, i64 %tid.ext
425 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
426 %val = load volatile double, double addrspace(1)* %in.gep
427 %fabs = call double @llvm.fabs.f64(double %val)
428 store double %fabs, double addrspace(1)* %out.gep
432 define amdgpu_kernel void @uniform_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in, i64 %idx) {
433 ; GCN-LABEL: name: uniform_fabs_f64
434 ; GCN-LABEL: bb.0 (%ir-block.0)
435 ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
436 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
437 ; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0
438 ; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1
439 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
440 ; GCN: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %[[HI32]], killed %[[SREG_MASK]]
441 ; GCN: %[[AND_COPY:[0-9]+]]:sreg_32 = COPY %[[AND]]
442 ; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[AND_COPY]], %subreg.sub1
445 %in.gep = getelementptr inbounds double, double addrspace(1)* %in, i64 %idx
446 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %idx
447 %val = load volatile double, double addrspace(1)* %in.gep
448 %fabs = call double @llvm.fabs.f64(double %val)
449 store double %fabs, double addrspace(1)* %out.gep
453 define amdgpu_kernel void @divergent_fneg_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
454 ; GCN-LABEL: name: divergent_fneg_fabs_f64
455 ; GCN-LABEL: bb.0 (%ir-block.0)
456 ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
457 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
458 ; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1
459 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
460 ; GCN: %[[OR:[0-9]+]]:vgpr_32 = V_OR_B32_e64 killed %[[SREG_MASK]], killed %[[HI32]]
461 ; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0
462 ; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[OR]], %subreg.sub1
465 %tid = call i32 @llvm.amdgcn.workitem.id.x()
466 %tid.ext = sext i32 %tid to i64
467 %in.gep = getelementptr inbounds double, double addrspace(1)* %in, i64 %tid.ext
468 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
469 %val = load volatile double, double addrspace(1)* %in.gep
470 %fabs = call double @llvm.fabs.f64(double %val)
471 %fneg = fneg double %fabs
472 store double %fneg, double addrspace(1)* %out.gep
476 define amdgpu_kernel void @uniform_fneg_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in, i64 %idx) {
477 ; GCN-LABEL: name: uniform_fneg_fabs_f64
478 ; GCN-LABEL: bb.0 (%ir-block.0)
479 ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
480 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
481 ; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0
482 ; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1
483 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
484 ; GCN: %[[OR:[0-9]+]]:sreg_32 = S_OR_B32 killed %[[HI32]], killed %[[SREG_MASK]]
485 ; GCN: %[[OR_COPY:[0-9]+]]:sreg_32 = COPY %[[OR]]
486 ; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[OR_COPY]], %subreg.sub1
489 %in.gep = getelementptr inbounds double, double addrspace(1)* %in, i64 %idx
490 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %idx
491 %val = load volatile double, double addrspace(1)* %in.gep
492 %fabs = call double @llvm.fabs.f64(double %val)
493 %fneg = fneg double %fabs
494 store double %fneg, double addrspace(1)* %out.gep
498 declare float @llvm.fabs.f32(float)
499 declare half @llvm.fabs.f16(half)
500 declare double @llvm.fabs.f64(double)
501 declare <2 x half> @llvm.fabs.v2f16(<2 x half>)
502 declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
504 declare i32 @llvm.amdgcn.workitem.id.x()