llvm/test/CodeGen/AMDGPU/fminnum.f64.ll

   1 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX678 %s
   2 ; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX678 %s
   3 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
   4
   5 declare double @llvm.minnum.f64(double, double) #0
   6 declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) #0
   7 declare <4 x double> @llvm.minnum.v4f64(<4 x double>, <4 x double>) #0
   8 declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>) #0
   9 declare <16 x double> @llvm.minnum.v16f64(<16 x double>, <16 x double>) #0
  10
  11 ; GCN-LABEL: {{^}}test_fmin_f64_ieee_noflush:
  12 ; GCN: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]]
  13 ; GCN: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]]
  14
  15 ; GCN-DAG: v_max_f64 [[QUIETA:v\[[0-9]+:[0-9]+\]]], [[A]], [[A]]
  16 ; GCN-DAG: v_max_f64 [[QUIETB:v\[[0-9]+:[0-9]+\]]], [[B]], [[B]]
  17
  18 ; GCN: v_min_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[QUIETB]], [[QUIETA]]
  19 define amdgpu_kernel void @test_fmin_f64_ieee_noflush([8 x i32], double %a, [8 x i32], double %b) #1 {
  20   %val = call double @llvm.minnum.f64(double %a, double %b) #0
  21   store double %val, ptr addrspace(1) undef, align 8
  22   ret void
  23 }
  24
  25 ; GCN-LABEL: {{^}}test_fmin_f64_ieee_flush:
  26 ; GCN: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]]
  27 ; GCN: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]]
  28 ; GFX678-DAG: v_mul_f64 [[QUIETA:v\[[0-9]+:[0-9]+\]]], 1.0, [[A]]
  29 ; GFX678-DAG: v_mul_f64 [[QUIETB:v\[[0-9]+:[0-9]+\]]], 1.0, [[B]]
  30
  31 ; GFX9-DAG: v_max_f64 [[QUIETA:v\[[0-9]+:[0-9]+\]]], [[A]], [[A]]
  32 ; GFX9-DAG: v_max_f64 [[QUIETB:v\[[0-9]+:[0-9]+\]]], [[B]], [[B]]
  33
  34 ; GCN: v_min_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[QUIETB]], [[QUIETA]]
  35 define amdgpu_kernel void @test_fmin_f64_ieee_flush([8 x i32], double %a, [8 x i32], double %b) #2 {
  36   %val = call double @llvm.minnum.f64(double %a, double %b) #0
  37   store double %val, ptr addrspace(1) undef, align 8
  38   ret void
  39 }
  40
  41 ; GCN-LABEL: {{^}}test_fmin_f64_no_ieee:
  42 ; GCN: ds_read_b64 [[VAL0:v\[[0-9]+:[0-9]+\]]]
  43 ; GCN: ds_read_b64 [[VAL1:v\[[0-9]+:[0-9]+\]]]
  44 ; GCN-NOT: [[VAL0]]
  45 ; GCN-NOT: [[VAL1]]
  46 ; GCN: v_min_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VAL0]], [[VAL1]]
  47 ; GCN-NOT: [[RESULT]]
  48 ; GCN: ds_write_b64 v{{[0-9]+}}, [[RESULT]]
  49 define amdgpu_ps void @test_fmin_f64_no_ieee() nounwind {
  50   %a = load volatile double, ptr addrspace(3) undef
  51   %b = load volatile double, ptr addrspace(3) undef
  52   %val = call double @llvm.minnum.f64(double %a, double %b) #0
  53   store volatile double %val, ptr addrspace(3) undef
  54   ret void
  55 }
  56
  57 ; GCN-LABEL: {{^}}test_fmin_v2f64:
  58 ; GCN: v_min_f64
  59 ; GCN: v_min_f64
  60 define amdgpu_kernel void @test_fmin_v2f64(ptr addrspace(1) %out, <2 x double> %a, <2 x double> %b) nounwind {
  61   %val = call <2 x double> @llvm.minnum.v2f64(<2 x double> %a, <2 x double> %b) #0
  62   store <2 x double> %val, ptr addrspace(1) %out, align 16
  63   ret void
  64 }
  65
  66 ; GCN-LABEL: {{^}}test_fmin_v4f64:
  67 ; GCN: v_min_f64
  68 ; GCN: v_min_f64
  69 ; GCN: v_min_f64
  70 ; GCN: v_min_f64
  71 define amdgpu_kernel void @test_fmin_v4f64(ptr addrspace(1) %out, <4 x double> %a, <4 x double> %b) nounwind {
  72   %val = call <4 x double> @llvm.minnum.v4f64(<4 x double> %a, <4 x double> %b) #0
  73   store <4 x double> %val, ptr addrspace(1) %out, align 32
  74   ret void
  75 }
  76
  77 ; GCN-LABEL: {{^}}test_fmin_v8f64:
  78 ; GCN: v_min_f64
  79 ; GCN: v_min_f64
  80 ; GCN: v_min_f64
  81 ; GCN: v_min_f64
  82 ; GCN: v_min_f64
  83 ; GCN: v_min_f64
  84 ; GCN: v_min_f64
  85 ; GCN: v_min_f64
  86 define amdgpu_kernel void @test_fmin_v8f64(ptr addrspace(1) %out, <8 x double> %a, <8 x double> %b) nounwind {
  87   %val = call <8 x double> @llvm.minnum.v8f64(<8 x double> %a, <8 x double> %b) #0
  88   store <8 x double> %val, ptr addrspace(1) %out, align 64
  89   ret void
  90 }
  91
  92 ; GCN-LABEL: {{^}}test_fmin_v16f64:
  93 ; GCN: v_min_f64
  94 ; GCN: v_min_f64
  95 ; GCN: v_min_f64
  96 ; GCN: v_min_f64
  97 ; GCN: v_min_f64
  98 ; GCN: v_min_f64
  99 ; GCN: v_min_f64
 100 ; GCN: v_min_f64
 101 ; GCN: v_min_f64
 102 ; GCN: v_min_f64
 103 ; GCN: v_min_f64
 104 ; GCN: v_min_f64
 105 ; GCN: v_min_f64
 106 ; GCN: v_min_f64
 107 ; GCN: v_min_f64
 108 ; GCN: v_min_f64
 109 define amdgpu_kernel void @test_fmin_v16f64(ptr addrspace(1) %out, <16 x double> %a, <16 x double> %b) nounwind {
 110   %val = call <16 x double> @llvm.minnum.v16f64(<16 x double> %a, <16 x double> %b) #0
 111   store <16 x double> %val, ptr addrspace(1) %out, align 128
 112   ret void
 113 }
 114
 115 attributes #0 = { nounwind readnone }
 116 attributes #1 = { nounwind "denormal-fp-math"="ieee,ieee" }
 117 attributes #2 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }