clang/test/CodeGenOpenCL/sqrt-fpmath.cl

   1 // Test that float variants of sqrt are emitted as available_externally inline
   2 // definitions that call the sqrt intrinsic with appropriate !fpmath metadata
   3 // depending on -cl-fp32-correctly-rounded-divide-sqrt
   4
   5 // Test with -fdeclare-opencl-builtins
   6 // RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT %s
   7 // RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED %s
   8
   9 // RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-unsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT-UNSAFE %s
  10 // RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -cl-unsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED-UNSAFE %s
  11
  12 // Test without -fdeclare-opencl-builtins
  13 // RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT %s
  14 // RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED %s
  15
  16 // RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -cl-unsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT-UNSAFE %s
  17 // RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -cl-unsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED-UNSAFE %s
  18
  19 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  20
  21 // CHECK-LABEL: define {{.*}} float @call_sqrt_f32(
  22 // CHECK: call {{.*}} float @_Z4sqrtf(float noundef %{{.+}}) #{{[0-9]+$}}
  23 float call_sqrt_f32(float x) {
  24   return sqrt(x);
  25 }
  26
  27 // CHECK-LABEL: define available_externally float @_Z4sqrtf(float noundef %__x)
  28 // DEFAULT: call float @llvm.sqrt.f32(float %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
  29 // CORRECTLYROUNDED: call float @llvm.sqrt.f32(float %{{.+}}){{$}}
  30
  31 // DEFAULT-UNSAFE: call reassoc nsz arcp contract afn float @llvm.sqrt.f32(float %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
  32 // CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn float @llvm.sqrt.f32(float %{{.+}}){{$}}
  33
  34 // CHECK-LABEL: define {{.*}} <2 x float> @call_sqrt_v2f32(
  35 // CHECK: call {{.*}} <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %{{.*}}) #{{[0-9]+$}}
  36 float2 call_sqrt_v2f32(float2 x) {
  37   return sqrt(x);
  38 }
  39
  40 // CHECK-LABEL: define available_externally <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %__x)
  41 // DEFAULT: call <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
  42 // CORRECTLYROUNDED: call <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.+}}){{$}}
  43
  44 // DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
  45 // CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.+}}){{$}}
  46
  47 // CHECK-LABEL: define {{.*}} <3 x float> @call_sqrt_v3f32(
  48 // CHECK: call {{.*}} <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %{{.*}}) #{{[0-9]+$}}
  49 float3 call_sqrt_v3f32(float3 x) {
  50   return sqrt(x);
  51 }
  52
  53 // CHECK-LABEL: define available_externally <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %__x)
  54 // DEFAULT: call <3 x float> @llvm.sqrt.v3f32(<3 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
  55 // CORRECTLYROUNDED: call <3 x float> @llvm.sqrt.v3f32(<3 x float> %{{.+}}){{$}}
  56
  57 // DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <3 x float> @llvm.sqrt.v3f32(<3 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
  58 // CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <3 x float> @llvm.sqrt.v3f32(<3 x float> %{{.+}}){{$}}
  59
  60
  61 // CHECK-LABEL: define {{.*}} <4 x float> @call_sqrt_v4f32(
  62 // CHECK: call {{.*}} <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %{{.*}}) #{{[0-9]+$}}
  63 float4 call_sqrt_v4f32(float4 x) {
  64   return sqrt(x);
  65 }
  66
  67 // CHECK-LABEL: define available_externally <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %__x)
  68 // DEFAULT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
  69 // CORRECTLYROUNDED: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}){{$}}
  70
  71 // DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
  72 // CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}){{$}}
  73
  74 // CHECK-LABEL: define {{.*}} <8 x float> @call_sqrt_v8f32(
  75 // CHECK: call {{.*}} <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %{{.*}}) #{{[0-9]+$}}
  76 float8 call_sqrt_v8f32(float8 x) {
  77   return sqrt(x);
  78 }
  79
  80 // CHECK-LABEL: define available_externally <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %__x)
  81 // DEFAULT: call <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
  82 // CORRECTLYROUNDED: call <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.+}}){{$}}
  83
  84 // DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
  85 // CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.+}}){{$}}
  86
  87
  88 // CHECK-LABEL: define {{.*}} <16 x float> @call_sqrt_v16f32(
  89 // CHECK: call {{.*}} <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %{{.*}}) #{{[0-9]+$}}
  90 float16 call_sqrt_v16f32(float16 x) {
  91   return sqrt(x);
  92 }
  93
  94 // CHECK-LABEL: define available_externally <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %__x)
  95 // DEFAULT: call <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
  96 // CORRECTLYROUNDED: call <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.+}}){{$}}
  97
  98 // DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
  99 // CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.+}}){{$}}
 100
 101
 102 // Not for f64
 103 // CHECK-LABEL: define {{.*}} double @call_sqrt_f64(
 104 // CHECK: call {{.*}} double @_Z4sqrtd(double noundef %{{.+}}) #{{[0-9]+$}}
 105 double call_sqrt_f64(double x) {
 106   return sqrt(x);
 107 }
 108
 109 // CHECK-NOT: define
 110
 111 // Not for f64
 112 // CHECK-LABEL: define {{.*}} <2 x double> @call_sqrt_v2f64(
 113 // CHECK: call {{.*}} <2 x double> @_Z4sqrtDv2_d(<2 x double> noundef %{{.+}}) #{{[0-9]+$}}
 114 double2 call_sqrt_v2f64(double2 x) {
 115   return sqrt(x);
 116 }
 117
 118 // CHECK-NOT: define
 119
 120 // CHECK-LABEL: define {{.*}} <3 x double> @call_sqrt_v3f64(
 121 // CHECK: call {{.*}} <3 x double> @_Z4sqrtDv3_d(<3 x double> noundef %{{.+}}) #{{[0-9]+$}}
 122 double3 call_sqrt_v3f64(double3 x) {
 123   return sqrt(x);
 124 }
 125
 126 // CHECK-NOT: define
 127
 128 // CHECK-LABEL: define {{.*}} <4 x double> @call_sqrt_v4f64(
 129 // CHECK: call {{.*}} <4 x double> @_Z4sqrtDv4_d(<4 x double> noundef %{{.+}}) #{{[0-9]+$}}
 130 double4 call_sqrt_v4f64(double4 x) {
 131   return sqrt(x);
 132 }
 133
 134 // CHECK-NOT: define
 135
 136 // CHECK-LABEL: define {{.*}} <8 x double> @call_sqrt_v8f64(
 137 // CHECK: call {{.*}} <8 x double> @_Z4sqrtDv8_d(<8 x double> noundef %{{.+}}) #{{[0-9]+$}}
 138 double8 call_sqrt_v8f64(double8 x) {
 139   return sqrt(x);
 140 }
 141
 142 // CHECK-NOT: define
 143
 144 // CHECK-LABEL: define {{.*}} <16 x double> @call_sqrt_v16f64(
 145 // CHECK: call {{.*}} <16 x double> @_Z4sqrtDv16_d(<16 x double> noundef %{{.+}}) #{{[0-9]+$}}
 146 double16 call_sqrt_v16f64(double16 x) {
 147   return sqrt(x);
 148 }
 149
 150 // CHECK-NOT: define
 151
 152 // Not for f16
 153 // CHECK-LABEL: define {{.*}} half @call_sqrt_f16(
 154 // CHECK: call {{.*}} half @_Z4sqrtDh(half noundef %{{.+}}) #{{[0-9]+$}}
 155 half call_sqrt_f16(half x) {
 156   return sqrt(x);
 157 }
 158
 159 // CHECK-NOT: define
 160
 161 // CHECK-LABEL: define {{.*}} <2 x half> @call_sqrt_v2f16(
 162 // CHECK: call {{.*}} <2 x half> @_Z4sqrtDv2_Dh(<2 x half> noundef %{{.+}}) #{{[0-9]+$}}
 163 half2 call_sqrt_v2f16(half2 x) {
 164   return sqrt(x);
 165 }
 166
 167 // CHECK-NOT: define
 168
 169 // CHECK-LABEL: define {{.*}} <3 x half> @call_sqrt_v3f16(
 170 // CHECK: call {{.*}} <3 x half> @_Z4sqrtDv3_Dh(<3 x half> noundef %{{.+}}) #{{[0-9]+$}}
 171 half3 call_sqrt_v3f16(half3 x) {
 172   return sqrt(x);
 173 }
 174
 175 // CHECK-NOT: define
 176
 177 // CHECK-LABEL: define {{.*}} <4 x half> @call_sqrt_v4f16(
 178 // CHECK: call {{.*}} <4 x half> @_Z4sqrtDv4_Dh(<4 x half> noundef %{{.+}}) #{{[0-9]+$}}
 179 half4 call_sqrt_v4f16(half4 x) {
 180   return sqrt(x);
 181 }
 182
 183 // CHECK-NOT: define
 184
 185 // CHECK-LABEL: define {{.*}} <8 x half> @call_sqrt_v8f16(
 186 // CHECK: call {{.*}} <8 x half> @_Z4sqrtDv8_Dh(<8 x half> noundef %{{.+}}) #{{[0-9]+$}}
 187 half8 call_sqrt_v8f16(half8 x) {
 188   return sqrt(x);
 189 }
 190
 191 // CHECK-NOT: define
 192
 193 // CHECK-LABEL: define {{.*}} <16 x half> @call_sqrt_v16f16(
 194 // CHECK: call {{.*}} <16 x half> @_Z4sqrtDv16_Dh(<16 x half> noundef %{{.+}}) #{{[0-9]+$}}
 195 half16 call_sqrt_v16f16(half16 x) {
 196   return sqrt(x);
 197 }
 198
 199 // CHECK-NOT: define
 200
 201 // DEFAULT: [[$FPMATH]] = !{float 3.000000e+00}