1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
5 declare double @fabs(double) readnone
6 declare double @llvm.fabs.f64(double) readnone
7 declare <2 x double> @llvm.fabs.v2f64(<2 x double>) readnone
8 declare <4 x double> @llvm.fabs.v4f64(<4 x double>) readnone
10 ; FUNC-LABEL: {{^}}v_fabs_f64:
13 define amdgpu_kernel void @v_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
14 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
15 %tidext = sext i32 %tid to i64
16 %gep = getelementptr double, double addrspace(1)* %in, i64 %tidext
17 %val = load double, double addrspace(1)* %gep, align 8
18 %fabs = call double @llvm.fabs.f64(double %val)
19 store double %fabs, double addrspace(1)* %out
23 ; FUNC-LABEL: {{^}}fabs_f64:
27 define amdgpu_kernel void @fabs_f64(double addrspace(1)* %out, double %in) {
28 %fabs = call double @llvm.fabs.f64(double %in)
29 store double %fabs, double addrspace(1)* %out
33 ; FUNC-LABEL: {{^}}fabs_v2f64:
37 define amdgpu_kernel void @fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
38 %fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in)
39 store <2 x double> %fabs, <2 x double> addrspace(1)* %out
43 ; FUNC-LABEL: {{^}}fabs_v4f64:
49 define amdgpu_kernel void @fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
50 %fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in)
51 store <4 x double> %fabs, <4 x double> addrspace(1)* %out
55 ; SI-LABEL: {{^}}fabs_fold_f64:
56 ; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
58 ; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
60 define amdgpu_kernel void @fabs_fold_f64(double addrspace(1)* %out, [8 x i32], double %in0, [8 x i32], double %in1) {
61 %fabs = call double @llvm.fabs.f64(double %in0)
62 %fmul = fmul double %fabs, %in1
63 store double %fmul, double addrspace(1)* %out
67 ; SI-LABEL: {{^}}fabs_fn_fold_f64:
68 ; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
70 ; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
72 define amdgpu_kernel void @fabs_fn_fold_f64(double addrspace(1)* %out, [8 x i32], double %in0, [8 x i32], double %in1) {
73 %fabs = call double @fabs(double %in0)
74 %fmul = fmul double %fabs, %in1
75 store double %fmul, double addrspace(1)* %out
79 ; FUNC-LABEL: {{^}}fabs_free_f64:
82 define amdgpu_kernel void @fabs_free_f64(double addrspace(1)* %out, i64 %in) {
83 %bc= bitcast i64 %in to double
84 %fabs = call double @llvm.fabs.f64(double %bc)
85 store double %fabs, double addrspace(1)* %out
89 ; FUNC-LABEL: {{^}}fabs_fn_free_f64:
92 define amdgpu_kernel void @fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {
93 %bc= bitcast i64 %in to double
94 %fabs = call double @fabs(double %bc)
95 store double %fabs, double addrspace(1)* %out