1 ; RUN: llc -mtriple=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GCN,SI %s
2 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GCN,FP16 %s
3 ; RUN: llc -mtriple=amdgcn -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GCN,SI %s
4 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GCN,FP16 %s
7 define amdgpu_kernel void @divergent_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
8 ; GCN-LABEL: name: divergent_fneg_f32
9 ; GCN-LABEL: bb.0 (%ir-block.0)
10 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
11 ; GCN: V_XOR_B32_e64 killed %[[REG]]
13 %tid = call i32 @llvm.amdgcn.workitem.id.x()
14 %tid.ext = sext i32 %tid to i64
15 %in.gep = getelementptr inbounds float, ptr addrspace(1) %in, i64 %tid.ext
16 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
17 %val = load volatile float, ptr addrspace(1) %in.gep
18 %fneg = fneg float %val
19 store float %fneg, ptr addrspace(1) %out.gep
23 define amdgpu_kernel void @uniform_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %idx) {
24 ; GCN-LABEL: name: uniform_fneg_f32
25 ; GCN-LABEL: bb.0 (%ir-block.0)
26 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
27 ; GCN: S_XOR_B32 killed %{{[0-9]+}}, killed %[[REG]]
29 %in.gep = getelementptr inbounds float, ptr addrspace(1) %in, i64 %idx
30 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %idx
31 %val = load volatile float, ptr addrspace(1) %in.gep
32 %fneg = fneg float %val
33 store float %fneg, ptr addrspace(1) %out.gep
37 define amdgpu_kernel void @divergent_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
38 ; GCN-LABEL: name: divergent_fabs_f32
39 ; GCN-LABEL: bb.0 (%ir-block.0)
40 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
41 ; GCN: V_AND_B32_e64 killed %[[REG]]
43 %tid = call i32 @llvm.amdgcn.workitem.id.x()
44 %tid.ext = sext i32 %tid to i64
45 %in.gep = getelementptr inbounds float, ptr addrspace(1) %in, i64 %tid.ext
46 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
47 %val = load volatile float, ptr addrspace(1) %in.gep
48 %fabs = call float @llvm.fabs.f32(float %val)
49 store float %fabs, ptr addrspace(1) %out.gep
53 define amdgpu_kernel void @uniform_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %idx) {
54 ; GCN-LABEL: name: uniform_fabs_f32
55 ; GCN-LABEL: bb.0 (%ir-block.0)
56 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
57 ; GCN: S_AND_B32 killed %{{[0-9]+}}, killed %[[REG]]
59 %in.gep = getelementptr inbounds float, ptr addrspace(1) %in, i64 %idx
60 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %idx
61 %val = load volatile float, ptr addrspace(1) %in.gep
62 %fabs = call float @llvm.fabs.f32(float %val)
63 store float %fabs, ptr addrspace(1) %out.gep
67 define amdgpu_kernel void @divergent_fneg_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
68 ; GCN-LABEL: name: divergent_fneg_fabs_f32
69 ; GCN-LABEL: bb.0 (%ir-block.0)
70 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
71 ; GCN: V_OR_B32_e64 killed %[[REG]]
73 %tid = call i32 @llvm.amdgcn.workitem.id.x()
74 %tid.ext = sext i32 %tid to i64
75 %in.gep = getelementptr inbounds float, ptr addrspace(1) %in, i64 %tid.ext
76 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
77 %val = load volatile float, ptr addrspace(1) %in.gep
78 %fabs = call float @llvm.fabs.f32(float %val)
79 %fneg = fneg float %fabs
80 store float %fneg, ptr addrspace(1) %out.gep
84 define amdgpu_kernel void @uniform_fneg_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %idx) {
85 ; GCN-LABEL: name: uniform_fneg_fabs_f32
86 ; GCN-LABEL: bb.0 (%ir-block.0)
87 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
88 ; GCN: S_OR_B32 killed %{{[0-9]+}}, killed %[[REG]]
90 %in.gep = getelementptr inbounds float, ptr addrspace(1) %in, i64 %idx
91 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %idx
92 %val = load volatile float, ptr addrspace(1) %in.gep
93 %fabs = call float @llvm.fabs.f32(float %val)
94 %fneg = fneg float %fabs
95 store float %fneg, ptr addrspace(1) %out.gep
100 define amdgpu_kernel void @divergent_fabs_f16(ptr addrspace(1) %in, ptr addrspace(1) %out) {
101 ; GCN-LABEL: name: divergent_fabs_f16
102 ; GCN-LABEL: bb.0 (%ir-block.0)
103 ; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32767
104 ; FP16: V_AND_B32_e64 killed %[[REG]]
106 %tid = call i32 @llvm.amdgcn.workitem.id.x()
107 %tid.ext = sext i32 %tid to i64
108 %in.gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 %tid.ext
109 %val = load volatile half, ptr addrspace(1) %in.gep
110 %fabs = call half @llvm.fabs.f16(half %val)
111 store half %fabs, ptr addrspace(1) %out
115 define amdgpu_kernel void @uniform_fabs_f16(ptr addrspace(1) %in, ptr addrspace(1) %out, i64 %idx) {
116 ; GCN-LABEL: name: uniform_fabs_f16
117 ; GCN-LABEL: bb.0 (%ir-block.0)
118 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32767
119 ; GCN: S_AND_B32 killed %{{[0-9]+}}, killed %[[REG]]
121 %in.gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 %idx
122 %val = load volatile half, ptr addrspace(1) %in.gep
123 %fabs = call half @llvm.fabs.f16(half %val)
124 store half %fabs, ptr addrspace(1) %out
128 define amdgpu_kernel void @divergent_fneg_f16(ptr addrspace(1) %in, ptr addrspace(1) %out) {
129 ; GCN-LABEL: name: divergent_fneg_f16
130 ; GCN-LABEL: bb.0 (%ir-block.0)
131 ; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768
132 ; FP16: V_XOR_B32_e64 killed %[[REG]]
134 %tid = call i32 @llvm.amdgcn.workitem.id.x()
135 %tid.ext = sext i32 %tid to i64
136 %in.gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 %tid.ext
137 %val = load volatile half, ptr addrspace(1) %in.gep
138 %fneg = fneg half %val
139 store half %fneg, ptr addrspace(1) %out
143 define amdgpu_kernel void @uniform_fneg_f16(ptr addrspace(1) %in, ptr addrspace(1) %out, i64 %idx) {
144 ; GCN-LABEL: name: uniform_fneg_f16
145 ; GCN-LABEL: bb.0 (%ir-block.0)
146 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768
147 ; GCN: S_XOR_B32 killed %{{[0-9]+}}, killed %[[REG]]
149 %in.gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 %idx
150 %val = load volatile half, ptr addrspace(1) %in.gep
151 %fneg = fneg half %val
152 store half %fneg, ptr addrspace(1) %out
156 define amdgpu_kernel void @divergent_fneg_fabs_f16(ptr addrspace(1) %in, ptr addrspace(1) %out) {
157 ; GCN-LABEL: name: divergent_fneg_fabs_f16
158 ; GCN-LABEL: bb.0 (%ir-block.0)
159 ; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768
160 ; FP16: V_OR_B32_e64 killed %[[REG]]
162 %tid = call i32 @llvm.amdgcn.workitem.id.x()
163 %tid.ext = sext i32 %tid to i64
164 %in.gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 %tid.ext
165 %val = load volatile half, ptr addrspace(1) %in.gep
166 %fabs = call half @llvm.fabs.f16(half %val)
167 %fneg = fneg half %fabs
168 store half %fneg, ptr addrspace(1) %out
172 define amdgpu_kernel void @uniform_fneg_fabs_f16(ptr addrspace(1) %in, ptr addrspace(1) %out, i64 %idx) {
173 ; GCN-LABEL: name: uniform_fneg_fabs_f16
174 ; GCN-LABEL: bb.0 (%ir-block.0)
175 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768
176 ; GCN: S_OR_B32 killed %{{[0-9]+}}, killed %[[REG]]
178 %in.gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 %idx
179 %val = load volatile half, ptr addrspace(1) %in.gep
180 %fabs = call half @llvm.fabs.f16(half %val)
181 %fneg = fneg half %fabs
182 store half %fneg, ptr addrspace(1) %out
186 define amdgpu_kernel void @divergent_fneg_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
187 ; GCN-LABEL: name: divergent_fneg_v2f16
188 ; GCN-LABEL: bb.0 (%ir-block.0)
189 ; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880
190 ; FP16: V_XOR_B32_e64 killed %[[REG]]
192 %tid = call i32 @llvm.amdgcn.workitem.id.x()
193 %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid
194 %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid
195 %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2
196 %fneg = fneg <2 x half> %val
197 store <2 x half> %fneg, ptr addrspace(1) %gep.out
201 define amdgpu_kernel void @uniform_fneg_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) {
202 ; GCN-LABEL: name: uniform_fneg_v2f16
203 ; GCN-LABEL: bb.0 (%ir-block.0)
204 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880
205 ; GCN: S_XOR_B32 killed %{{[0-9]+}}, killed %[[REG]]
207 %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %idx
208 %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %idx
209 %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2
210 %fneg = fneg <2 x half> %val
211 store <2 x half> %fneg, ptr addrspace(1) %gep.out
215 define amdgpu_kernel void @divergent_fabs_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
216 ; GCN-LABEL: name: divergent_fabs_v2f16
217 ; GCN-LABEL: bb.0 (%ir-block.0)
218 ; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147450879
219 ; FP16: V_AND_B32_e64 killed %[[REG]]
221 %tid = call i32 @llvm.amdgcn.workitem.id.x()
222 %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid
223 %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid
224 %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2
225 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
226 store <2 x half> %fabs, ptr addrspace(1) %gep.out
230 define amdgpu_kernel void @uniform_fabs_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) {
231 ; GCN-LABEL: name: uniform_fabs_v2f16
232 ; GCN-LABEL: bb.0 (%ir-block.0)
233 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147450879
234 ; GCN: S_AND_B32 killed %{{[0-9]+}}, killed %[[REG]]
236 %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %idx
237 %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %idx
238 %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2
239 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
240 store <2 x half> %fabs, ptr addrspace(1) %gep.out
244 define amdgpu_kernel void @divergent_fneg_fabs_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
245 ; GCN-LABEL: name: divergent_fneg_fabs_v2f16
246 ; GCN-LABEL: bb.0 (%ir-block.0)
247 ; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880
248 ; FP16: V_OR_B32_e64 killed %[[REG]]
250 %tid = call i32 @llvm.amdgcn.workitem.id.x()
251 %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid
252 %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid
253 %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2
254 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
255 %fneg = fneg <2 x half> %fabs
256 store <2 x half> %fneg, ptr addrspace(1) %gep.out
260 define amdgpu_kernel void @uniform_fneg_fabs_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) {
261 ; GCN-LABEL: name: uniform_fneg_fabs_v2f16
262 ; GCN-LABEL: bb.0 (%ir-block.0)
263 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880
264 ; GCN: S_OR_B32 killed %{{[0-9]+}}, killed %[[REG]]
266 %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %idx
267 %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %idx
268 %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2
269 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
270 %fneg = fneg <2 x half> %fabs
271 store <2 x half> %fneg, ptr addrspace(1) %gep.out
275 define amdgpu_kernel void @divergent_fneg_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
276 ; GCN-LABEL: name: divergent_fneg_v2f32
277 ; GCN-LABEL: bb.0 (%ir-block.0)
278 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
279 ; GCN: V_XOR_B32_e64 %[[REG]]
280 ; GCN: V_XOR_B32_e64 %[[REG]]
282 %tid = call i32 @llvm.amdgcn.workitem.id.x()
283 %gep.in = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %tid
284 %gep.out = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %tid
285 %val = load <2 x float>, ptr addrspace(1) %gep.in, align 4
286 %fneg = fneg <2 x float> %val
287 store <2 x float> %fneg, ptr addrspace(1) %gep.out
291 define amdgpu_kernel void @uniform_fneg_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) {
292 ; GCN-LABEL: name: uniform_fneg_v2f32
293 ; GCN-LABEL: bb.0 (%ir-block.0)
294 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
295 ; GCN: S_XOR_B32 killed %{{[0-9]+}}, %[[REG]]
296 ; GCN: S_XOR_B32 killed %{{[0-9]+}}, %[[REG]]
298 %gep.in = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %idx
299 %gep.out = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %idx
300 %val = load <2 x float>, ptr addrspace(1) %gep.in, align 4
301 %fneg = fneg <2 x float> %val
302 store <2 x float> %fneg, ptr addrspace(1) %gep.out
306 define amdgpu_kernel void @divergent_fabs_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
307 ; GCN-LABEL: name: divergent_fabs_v2f32
308 ; GCN-LABEL: bb.0 (%ir-block.0)
309 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
310 ; GCN: V_AND_B32_e64 %[[REG]]
311 ; GCN: V_AND_B32_e64 %[[REG]]
313 %tid = call i32 @llvm.amdgcn.workitem.id.x()
314 %gep.in = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %tid
315 %gep.out = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %tid
316 %val = load <2 x float>, ptr addrspace(1) %gep.in, align 4
317 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val)
318 store <2 x float> %fabs, ptr addrspace(1) %gep.out
322 define amdgpu_kernel void @uniform_fabs_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) {
323 ; GCN-LABEL: name: uniform_fabs_v2f32
324 ; GCN-LABEL: bb.0 (%ir-block.0)
325 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
326 ; GCN: S_AND_B32 killed %{{[0-9]+}}, %[[REG]]
327 ; GCN: S_AND_B32 killed %{{[0-9]+}}, %[[REG]]
329 %gep.in = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %idx
330 %gep.out = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %idx
331 %val = load <2 x float>, ptr addrspace(1) %gep.in, align 4
332 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val)
333 store <2 x float> %fabs, ptr addrspace(1) %gep.out
337 define amdgpu_kernel void @divergent_fneg_fabs_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
338 ; GCN-LABEL: name: divergent_fneg_fabs_v2f32
339 ; GCN-LABEL: bb.0 (%ir-block.0)
340 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
341 ; GCN: V_OR_B32_e64 %[[REG]]
342 ; GCN: V_OR_B32_e64 %[[REG]]
344 %tid = call i32 @llvm.amdgcn.workitem.id.x()
345 %gep.in = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %tid
346 %gep.out = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %tid
347 %val = load <2 x float>, ptr addrspace(1) %gep.in, align 4
348 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val)
349 %fneg = fneg <2 x float> %fabs
350 store <2 x float> %fneg, ptr addrspace(1) %gep.out
354 define amdgpu_kernel void @uniform_fneg_fabs_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) {
355 ; GCN-LABEL: name: uniform_fneg_fabs_v2f32
356 ; GCN-LABEL: bb.0 (%ir-block.0)
357 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
358 ; GCN: S_OR_B32 killed %{{[0-9]+}}, %[[REG]]
359 ; GCN: S_OR_B32 killed %{{[0-9]+}}, %[[REG]]
361 %gep.in = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %idx
362 %gep.out = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %idx
363 %val = load <2 x float>, ptr addrspace(1) %gep.in, align 4
364 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val)
365 %fneg = fneg <2 x float> %fabs
366 store <2 x float> %fneg, ptr addrspace(1) %gep.out
370 define amdgpu_kernel void @divergent_fneg_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
371 ; GCN-LABEL: name: divergent_fneg_f64
372 ; GCN-LABEL: bb.0 (%ir-block.0)
373 ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
374 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
375 ; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1
376 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
377 ; GCN: %[[XOR:[0-9]+]]:vgpr_32 = V_XOR_B32_e64 killed %[[SREG_MASK]], killed %[[HI32]]
378 ; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0
379 ; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[XOR]], %subreg.sub1
382 %tid = call i32 @llvm.amdgcn.workitem.id.x()
383 %tid.ext = sext i32 %tid to i64
384 %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %tid.ext
385 %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
386 %val = load volatile double, ptr addrspace(1) %in.gep
387 %fneg = fneg double %val
388 store double %fneg, ptr addrspace(1) %out.gep
392 define amdgpu_kernel void @uniform_fneg_f64(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %idx) {
393 ; GCN-LABEL: name: uniform_fneg_f64
394 ; GCN-LABEL: bb.0 (%ir-block.0)
395 ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
396 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
397 ; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0
398 ; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1
399 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
400 ; GCN: %[[XOR:[0-9]+]]:sreg_32 = S_XOR_B32 killed %[[HI32]], killed %[[SREG_MASK]]
401 ; GCN: %[[XOR_COPY:[0-9]+]]:sreg_32 = COPY %[[XOR]]
402 ; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[XOR_COPY]], %subreg.sub1
404 %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %idx
405 %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %idx
406 %val = load volatile double, ptr addrspace(1) %in.gep
407 %fneg = fneg double %val
408 store double %fneg, ptr addrspace(1) %out.gep
412 define amdgpu_kernel void @divergent_fabs_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
413 ; GCN-LABEL: name: divergent_fabs_f64
414 ; GCN-LABEL: bb.0 (%ir-block.0)
415 ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
416 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
417 ; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1
418 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
419 ; GCN: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed %[[SREG_MASK]], killed %[[HI32]]
420 ; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0
421 ; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[AND]], %subreg.sub1
424 %tid = call i32 @llvm.amdgcn.workitem.id.x()
425 %tid.ext = sext i32 %tid to i64
426 %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %tid.ext
427 %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
428 %val = load volatile double, ptr addrspace(1) %in.gep
429 %fabs = call double @llvm.fabs.f64(double %val)
430 store double %fabs, ptr addrspace(1) %out.gep
434 define amdgpu_kernel void @uniform_fabs_f64(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %idx) {
435 ; GCN-LABEL: name: uniform_fabs_f64
436 ; GCN-LABEL: bb.0 (%ir-block.0)
437 ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
438 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
439 ; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0
440 ; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1
441 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
442 ; GCN: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %[[HI32]], killed %[[SREG_MASK]]
443 ; GCN: %[[AND_COPY:[0-9]+]]:sreg_32 = COPY %[[AND]]
444 ; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[AND_COPY]], %subreg.sub1
447 %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %idx
448 %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %idx
449 %val = load volatile double, ptr addrspace(1) %in.gep
450 %fabs = call double @llvm.fabs.f64(double %val)
451 store double %fabs, ptr addrspace(1) %out.gep
455 define amdgpu_kernel void @divergent_fneg_fabs_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
456 ; GCN-LABEL: name: divergent_fneg_fabs_f64
457 ; GCN-LABEL: bb.0 (%ir-block.0)
458 ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
459 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
460 ; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1
461 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
462 ; GCN: %[[OR:[0-9]+]]:vgpr_32 = V_OR_B32_e64 killed %[[SREG_MASK]], killed %[[HI32]]
463 ; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0
464 ; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[OR]], %subreg.sub1
467 %tid = call i32 @llvm.amdgcn.workitem.id.x()
468 %tid.ext = sext i32 %tid to i64
469 %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %tid.ext
470 %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
471 %val = load volatile double, ptr addrspace(1) %in.gep
472 %fabs = call double @llvm.fabs.f64(double %val)
473 %fneg = fneg double %fabs
474 store double %fneg, ptr addrspace(1) %out.gep
478 define amdgpu_kernel void @uniform_fneg_fabs_f64(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %idx) {
479 ; GCN-LABEL: name: uniform_fneg_fabs_f64
480 ; GCN-LABEL: bb.0 (%ir-block.0)
481 ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
482 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
483 ; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0
484 ; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1
485 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
486 ; GCN: %[[OR:[0-9]+]]:sreg_32 = S_OR_B32 killed %[[HI32]], killed %[[SREG_MASK]]
487 ; GCN: %[[OR_COPY:[0-9]+]]:sreg_32 = COPY %[[OR]]
488 ; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[OR_COPY]], %subreg.sub1
491 %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %idx
492 %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %idx
493 %val = load volatile double, ptr addrspace(1) %in.gep
494 %fabs = call double @llvm.fabs.f64(double %val)
495 %fneg = fneg double %fabs
496 store double %fneg, ptr addrspace(1) %out.gep
500 declare float @llvm.fabs.f32(float)
501 declare half @llvm.fabs.f16(half)
502 declare double @llvm.fabs.f64(double)
503 declare <2 x half> @llvm.fabs.v2f16(<2 x half>)
504 declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
506 declare i32 @llvm.amdgcn.workitem.id.x()