llvm/test/CodeGen/AMDGPU/fabs.f64.ll

   1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
   2
   3 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   4
   5 declare double @fabs(double) readnone
   6 declare double @llvm.fabs.f64(double) readnone
   7 declare <2 x double> @llvm.fabs.v2f64(<2 x double>) readnone
   8 declare <4 x double> @llvm.fabs.v4f64(<4 x double>) readnone
   9
  10 ; FUNC-LABEL: {{^}}v_fabs_f64:
  11 ; SI: v_and_b32
  12 ; SI: s_endpgm
  13 define amdgpu_kernel void @v_fabs_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
  14   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
  15   %tidext = sext i32 %tid to i64
  16   %gep = getelementptr double, ptr addrspace(1) %in, i64 %tidext
  17   %val = load double, ptr addrspace(1) %gep, align 8
  18   %fabs = call double @llvm.fabs.f64(double %val)
  19   store double %fabs, ptr addrspace(1) %out
  20   ret void
  21 }
  22
  23 ; FUNC-LABEL: {{^}}fabs_f64:
  24 ; SI: s_bitset0_b32
  25 ; SI: s_endpgm
  26 define amdgpu_kernel void @fabs_f64(ptr addrspace(1) %out, double %in) {
  27   %fabs = call double @llvm.fabs.f64(double %in)
  28   store double %fabs, ptr addrspace(1) %out
  29   ret void
  30 }
  31
  32 ; FUNC-LABEL: {{^}}fabs_v2f64:
  33 ; SI: s_bitset0_b32
  34 ; SI: s_bitset0_b32
  35 ; SI: s_endpgm
  36 define amdgpu_kernel void @fabs_v2f64(ptr addrspace(1) %out, <2 x double> %in) {
  37   %fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in)
  38   store <2 x double> %fabs, ptr addrspace(1) %out
  39   ret void
  40 }
  41
  42 ; FUNC-LABEL: {{^}}fabs_v4f64:
  43 ; SI: s_bitset0_b32
  44 ; SI: s_bitset0_b32
  45 ; SI: s_bitset0_b32
  46 ; SI: s_bitset0_b32
  47 ; SI: s_endpgm
  48 define amdgpu_kernel void @fabs_v4f64(ptr addrspace(1) %out, <4 x double> %in) {
  49   %fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in)
  50   store <4 x double> %fabs, ptr addrspace(1) %out
  51   ret void
  52 }
  53
  54 ; SI-LABEL: {{^}}fabs_fold_f64:
  55 ; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
  56 ; SI-NOT: and
  57 ; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
  58 ; SI: s_endpgm
  59 define amdgpu_kernel void @fabs_fold_f64(ptr addrspace(1) %out, [8 x i32], double %in0, [8 x i32], double %in1) {
  60   %fabs = call double @llvm.fabs.f64(double %in0)
  61   %fmul = fmul double %fabs, %in1
  62   store double %fmul, ptr addrspace(1) %out
  63   ret void
  64 }
  65
  66 ; SI-LABEL: {{^}}fabs_fn_fold_f64:
  67 ; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
  68 ; SI-NOT: and
  69 ; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
  70 ; SI: s_endpgm
  71 define amdgpu_kernel void @fabs_fn_fold_f64(ptr addrspace(1) %out, [8 x i32], double %in0, [8 x i32], double %in1) {
  72   %fabs = call double @fabs(double %in0)
  73   %fmul = fmul double %fabs, %in1
  74   store double %fmul, ptr addrspace(1) %out
  75   ret void
  76 }
  77
  78 ; FUNC-LABEL: {{^}}fabs_free_f64:
  79 ; SI: s_bitset0_b32
  80 ; SI: s_endpgm
  81 define amdgpu_kernel void @fabs_free_f64(ptr addrspace(1) %out, i64 %in) {
  82   %bc= bitcast i64 %in to double
  83   %fabs = call double @llvm.fabs.f64(double %bc)
  84   store double %fabs, ptr addrspace(1) %out
  85   ret void
  86 }
  87
  88 ; FUNC-LABEL: {{^}}fabs_fn_free_f64:
  89 ; SI: s_bitset0_b32
  90 ; SI: s_endpgm
  91 define amdgpu_kernel void @fabs_fn_free_f64(ptr addrspace(1) %out, i64 %in) {
  92   %bc= bitcast i64 %in to double
  93   %fabs = call double @fabs(double %bc)
  94   store double %fabs, ptr addrspace(1) %out
  95   ret void
  96 }