llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll

   1 ; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VF4
   2 ; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VF2
   3 ; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -mattr=+avx512f -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VF8
   4 ; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=16 -force-vector-interleave=1 -mattr=+avx512f -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VF16
   5
   6 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
   7 target triple = "x86_64-unknown-linux-gnu"
   8
   9 declare double @sin(double) #0
  10 declare float @sinf(float) #0
  11 declare double @llvm.sin.f64(double) #0
  12 declare float @llvm.sin.f32(float) #0
  13
  14 declare double @cos(double) #0
  15 declare float @cosf(float) #0
  16 declare double @llvm.cos.f64(double) #0
  17 declare float @llvm.cos.f32(float) #0
  18
  19 declare double @tan(double) #0
  20 declare float @tanf(float) #0
  21 declare double @llvm.tan.f64(double) #0
  22 declare float @llvm.tan.f32(float) #0
  23
  24 declare double @acos(double) #0
  25 declare float @acosf(float) #0
  26 declare double @llvm.acos.f64(double) #0
  27 declare float @llvm.acos.f32(float) #0
  28
  29 declare double @asin(double) #0
  30 declare float @asinf(float) #0
  31 declare double @llvm.asin.f64(double) #0
  32 declare float @llvm.asin.f32(float) #0
  33
  34 declare double @atan(double) #0
  35 declare float @atanf(float) #0
  36 declare double @llvm.atan.f64(double) #0
  37 declare float @llvm.atan.f32(float) #0
  38
  39 declare double @sinh(double) #0
  40 declare float @sinhf(float) #0
  41 declare double @llvm.sinh.f64(double) #0
  42 declare float @llvm.sinh.f32(float) #0
  43
  44 declare double @cosh(double) #0
  45 declare float @coshf(float) #0
  46 declare double @llvm.cosh.f64(double) #0
  47 declare float @llvm.cosh.f32(float) #0
  48
  49 declare double @tanh(double) #0
  50 declare float @tanhf(float) #0
  51 declare double @llvm.tanh.f64(double) #0
  52 declare float @llvm.tanh.f32(float) #0
  53
  54 declare double @pow(double, double) #0
  55 declare float @powf(float, float) #0
  56 declare double @llvm.pow.f64(double, double) #0
  57 declare float @llvm.pow.f32(float, float) #0
  58
  59 declare double @exp(double) #0
  60 declare float @expf(float) #0
  61 declare double @llvm.exp.f64(double) #0
  62 declare float @llvm.exp.f32(float) #0
  63
  64 declare double @log(double) #0
  65 declare float @logf(float) #0
  66 declare double @llvm.log.f64(double) #0
  67 declare float @llvm.log.f32(float) #0
  68
  69 declare double @log2(double) #0
  70 declare float @log2f(float) #0
  71 declare double @llvm.log2.f64(double) #0
  72 declare float @llvm.log2.f32(float) #0
  73
  74 declare double @log10(double) #0
  75 declare float @log10f(float) #0
  76 declare double @llvm.log10.f64(double) #0
  77 declare float @llvm.log10.f32(float) #0
  78
  79 declare double @sqrt(double) #0
  80 declare float @sqrtf(float) #0
  81
  82 declare double @exp2(double) #0
  83 declare float @exp2f(float) #0
  84 declare double @llvm.exp2.f64(double) #0
  85 declare float @llvm.exp2.f32(float) #0
  86
  87 define void @sin_f64(ptr nocapture %varray) {
  88 ; CHECK-LABEL: @sin_f64(
  89 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_sin(<2 x double> [[TMP4:%.*]])
  90 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_sin(<4 x double> [[TMP4:%.*]])
  91 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_sin(<8 x double> [[TMP4:%.*]])
  92 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.sin.v16f64(<16 x double> [[TMP4:%.*]])
  93 ; CHECK:        ret void
  94 ;
  95 entry:
  96   br label %for.body
  97
  98 for.body:
  99   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 100   %tmp = trunc i64 %iv to i32
 101   %conv = sitofp i32 %tmp to double
 102   %call = tail call double @sin(double %conv)
 103   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 104   store double %call, ptr %arrayidx, align 4
 105   %iv.next = add nuw nsw i64 %iv, 1
 106   %exitcond = icmp eq i64 %iv.next, 1000
 107   br i1 %exitcond, label %for.end, label %for.body
 108
 109 for.end:
 110   ret void
 111 }
 112
 113 define void @sin_f32(ptr nocapture %varray) {
 114 ; CHECK-LABEL: @sin_f32(
 115 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP4:%.*]])
 116 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_sinf(<4 x float> [[TMP4:%.*]])
 117 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_sinf(<8 x float> [[TMP4:%.*]])
 118 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_sinf(<16 x float> [[TMP4:%.*]])
 119 ; CHECK:        ret void
 120 ;
 121 entry:
 122   br label %for.body
 123
 124 for.body:
 125   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 126   %tmp = trunc i64 %iv to i32
 127   %conv = sitofp i32 %tmp to float
 128   %call = tail call float @sinf(float %conv)
 129   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 130   store float %call, ptr %arrayidx, align 4
 131   %iv.next = add nuw nsw i64 %iv, 1
 132   %exitcond = icmp eq i64 %iv.next, 1000
 133   br i1 %exitcond, label %for.end, label %for.body
 134
 135 for.end:
 136   ret void
 137 }
 138
 139 define void @sin_f64_intrinsic(ptr nocapture %varray) {
 140 ; CHECK-LABEL: @sin_f64_intrinsic(
 141 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_sin(<2 x double> [[TMP4:%.*]])
 142 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_sin(<4 x double> [[TMP4:%.*]])
 143 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_sin(<8 x double> [[TMP4:%.*]])
 144 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.sin.v16f64(<16 x double> [[TMP4:%.*]])
 145 ; CHECK:        ret void
 146 ;
 147 entry:
 148   br label %for.body
 149
 150 for.body:
 151   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 152   %tmp = trunc i64 %iv to i32
 153   %conv = sitofp i32 %tmp to double
 154   %call = tail call double @llvm.sin.f64(double %conv)
 155   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 156   store double %call, ptr %arrayidx, align 4
 157   %iv.next = add nuw nsw i64 %iv, 1
 158   %exitcond = icmp eq i64 %iv.next, 1000
 159   br i1 %exitcond, label %for.end, label %for.body
 160
 161 for.end:
 162   ret void
 163 }
 164
 165 define void @sin_f32_intrinsic(ptr nocapture %varray) {
 166 ; CHECK-LABEL: @sin_f32_intrinsic(
 167 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP4:%.*]])
 168 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_sinf(<4 x float> [[TMP4:%.*]])
 169 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_sinf(<8 x float> [[TMP4:%.*]])
 170 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_sinf(<16 x float> [[TMP4:%.*]])
 171 ; CHECK:        ret void
 172 ;
 173 entry:
 174   br label %for.body
 175
 176 for.body:
 177   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 178   %tmp = trunc i64 %iv to i32
 179   %conv = sitofp i32 %tmp to float
 180   %call = tail call float @llvm.sin.f32(float %conv)
 181   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 182   store float %call, ptr %arrayidx, align 4
 183   %iv.next = add nuw nsw i64 %iv, 1
 184   %exitcond = icmp eq i64 %iv.next, 1000
 185   br i1 %exitcond, label %for.end, label %for.body
 186
 187 for.end:
 188   ret void
 189 }
 190
 191 define void @cos_f64(ptr nocapture %varray) {
 192 ; CHECK-LABEL: @cos_f64(
 193 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cos(<2 x double> [[TMP4:%.*]])
 194 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_cos(<4 x double> [[TMP4:%.*]])
 195 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_cos(<8 x double> [[TMP4:%.*]])
 196 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.cos.v16f64(<16 x double> [[TMP4:%.*]])
 197 ; CHECK:        ret void
 198 ;
 199 entry:
 200   br label %for.body
 201
 202 for.body:
 203   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 204   %tmp = trunc i64 %iv to i32
 205   %conv = sitofp i32 %tmp to double
 206   %call = tail call double @cos(double %conv)
 207   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 208   store double %call, ptr %arrayidx, align 4
 209   %iv.next = add nuw nsw i64 %iv, 1
 210   %exitcond = icmp eq i64 %iv.next, 1000
 211   br i1 %exitcond, label %for.end, label %for.body
 212
 213 for.end:
 214   ret void
 215 }
 216
 217 define void @cos_f32(ptr nocapture %varray) {
 218 ; CHECK-LABEL: @cos_f32(
 219 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP4:%.*]])
 220 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_cosf(<4 x float> [[TMP4:%.*]])
 221 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_cosf(<8 x float> [[TMP4:%.*]])
 222 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_cosf(<16 x float> [[TMP4:%.*]])
 223 ; CHECK:        ret void
 224 ;
 225 entry:
 226   br label %for.body
 227
 228 for.body:
 229   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 230   %tmp = trunc i64 %iv to i32
 231   %conv = sitofp i32 %tmp to float
 232   %call = tail call float @cosf(float %conv)
 233   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 234   store float %call, ptr %arrayidx, align 4
 235   %iv.next = add nuw nsw i64 %iv, 1
 236   %exitcond = icmp eq i64 %iv.next, 1000
 237   br i1 %exitcond, label %for.end, label %for.body
 238
 239 for.end:
 240   ret void
 241 }
 242
 243 define void @cos_f64_intrinsic(ptr nocapture %varray) {
 244 ; CHECK-LABEL: @cos_f64_intrinsic(
 245 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cos(<2 x double> [[TMP4:%.*]])
 246 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_cos(<4 x double> [[TMP4:%.*]])
 247 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_cos(<8 x double> [[TMP4:%.*]])
 248 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.cos.v16f64(<16 x double> [[TMP4:%.*]])
 249 ; CHECK:        ret void
 250 ;
 251 entry:
 252   br label %for.body
 253
 254 for.body:
 255   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 256   %tmp = trunc i64 %iv to i32
 257   %conv = sitofp i32 %tmp to double
 258   %call = tail call double @llvm.cos.f64(double %conv)
 259   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 260   store double %call, ptr %arrayidx, align 4
 261   %iv.next = add nuw nsw i64 %iv, 1
 262   %exitcond = icmp eq i64 %iv.next, 1000
 263   br i1 %exitcond, label %for.end, label %for.body
 264
 265 for.end:
 266   ret void
 267 }
 268
 269 define void @cos_f32_intrinsic(ptr nocapture %varray) {
 270 ; CHECK-LABEL: @cos_f32_intrinsic(
 271 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP4:%.*]])
 272 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_cosf(<4 x float> [[TMP4:%.*]])
 273 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_cosf(<8 x float> [[TMP4:%.*]])
 274 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_cosf(<16 x float> [[TMP4:%.*]])
 275 ; CHECK:        ret void
 276 ;
 277 entry:
 278   br label %for.body
 279
 280 for.body:
 281   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 282   %tmp = trunc i64 %iv to i32
 283   %conv = sitofp i32 %tmp to float
 284   %call = tail call float @llvm.cos.f32(float %conv)
 285   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 286   store float %call, ptr %arrayidx, align 4
 287   %iv.next = add nuw nsw i64 %iv, 1
 288   %exitcond = icmp eq i64 %iv.next, 1000
 289   br i1 %exitcond, label %for.end, label %for.body
 290
 291 for.end:
 292   ret void
 293 }
 294
 295 define void @tan_f64(ptr nocapture %varray) {
 296 ; CHECK-LABEL: @tan_f64(
 297 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_tan(<2 x double> [[TMP4:%.*]])
 298 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]])
 299 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]])
 300 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.tan.v16f64(<16 x double> [[TMP4:%.*]])
 301 ; CHECK:        ret void
 302 ;
 303 entry:
 304   br label %for.body
 305
 306 for.body:
 307   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 308   %tmp = trunc i64 %iv to i32
 309   %conv = sitofp i32 %tmp to double
 310   %call = tail call double @tan(double %conv)
 311   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 312   store double %call, ptr %arrayidx, align 4
 313   %iv.next = add nuw nsw i64 %iv, 1
 314   %exitcond = icmp eq i64 %iv.next, 1000
 315   br i1 %exitcond, label %for.end, label %for.body
 316
 317 for.end:
 318   ret void
 319 }
 320
 321 define void @tan_f32(ptr nocapture %varray) {
 322 ; CHECK-LABEL: @tan_f32(
 323 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP4:%.*]])
 324 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]])
 325 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanf(<8 x float> [[TMP4:%.*]])
 326 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]])
 327 ; CHECK:        ret void
 328 ;
 329 entry:
 330   br label %for.body
 331
 332 for.body:
 333   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 334   %tmp = trunc i64 %iv to i32
 335   %conv = sitofp i32 %tmp to float
 336   %call = tail call float @tanf(float %conv)
 337   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 338   store float %call, ptr %arrayidx, align 4
 339   %iv.next = add nuw nsw i64 %iv, 1
 340   %exitcond = icmp eq i64 %iv.next, 1000
 341   br i1 %exitcond, label %for.end, label %for.body
 342
 343 for.end:
 344   ret void
 345 }
 346
 347 define void @tan_f64_intrinsic(ptr nocapture %varray) {
 348 ; CHECK-LABEL: @tan_f64_intrinsic(
 349 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_tan(<2 x double> [[TMP4:%.*]])
 350 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]])
 351 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]])
 352 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.tan.v16f64(<16 x double> [[TMP4:%.*]])
 353 ; CHECK:        ret void
 354 ;
 355 entry:
 356   br label %for.body
 357
 358 for.body:
 359   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 360   %tmp = trunc i64 %iv to i32
 361   %conv = sitofp i32 %tmp to double
 362   %call = tail call double @llvm.tan.f64(double %conv)
 363   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 364   store double %call, ptr %arrayidx, align 4
 365   %iv.next = add nuw nsw i64 %iv, 1
 366   %exitcond = icmp eq i64 %iv.next, 1000
 367   br i1 %exitcond, label %for.end, label %for.body
 368
 369 for.end:
 370   ret void
 371 }
 372
 373 define void @tan_f32_intrinsic(ptr nocapture %varray) {
 374 ; CHECK-LABEL: @tan_f32_intrinsic(
 375 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP4:%.*]])
 376 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]])
 377 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanf(<8 x float> [[TMP4:%.*]])
 378 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]])
 379 ; CHECK:        ret void
 380 ;
 381 entry:
 382   br label %for.body
 383
 384 for.body:
 385   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 386   %tmp = trunc i64 %iv to i32
 387   %conv = sitofp i32 %tmp to float
 388   %call = tail call float @llvm.tan.f32(float %conv)
 389   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 390   store float %call, ptr %arrayidx, align 4
 391   %iv.next = add nuw nsw i64 %iv, 1
 392   %exitcond = icmp eq i64 %iv.next, 1000
 393   br i1 %exitcond, label %for.end, label %for.body
 394
 395 for.end:
 396   ret void
 397 }
 398
 399 define void @acos_f64(ptr nocapture %varray) {
 400 ; CHECK-LABEL: @acos_f64(
 401 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @llvm.acos.v2f64(<2 x double> [[TMP4:%.*]])
 402 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.acos.v4f64(<4 x double> [[TMP4:%.*]])
 403 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.acos.v8f64(<8 x double> [[TMP4:%.*]])
 404 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.acos.v16f64(<16 x double> [[TMP4:%.*]])
 405 ; CHECK:        ret void
 406 ;
 407 entry:
 408   br label %for.body
 409
 410 for.body:
 411   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 412   %tmp = trunc i64 %iv to i32
 413   %conv = sitofp i32 %tmp to double
 414   %call = tail call double @acos(double %conv)
 415   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 416   store double %call, ptr %arrayidx, align 4
 417   %iv.next = add nuw nsw i64 %iv, 1
 418   %exitcond = icmp eq i64 %iv.next, 1000
 419   br i1 %exitcond, label %for.end, label %for.body
 420
 421 for.end:
 422   ret void
 423 }
 424
 425 define void @acos_f32(ptr nocapture %varray) {
 426 ; CHECK-LABEL: @acos_f32(
 427 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP4:%.*]])
 428 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_acosf(<4 x float> [[TMP4:%.*]])
 429 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_acosf(<8 x float> [[TMP4:%.*]])
 430 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_acosf(<16 x float> [[TMP4:%.*]])
 431 ; CHECK:        ret void
 432 ;
 433 entry:
 434   br label %for.body
 435
 436 for.body:
 437   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 438   %tmp = trunc i64 %iv to i32
 439   %conv = sitofp i32 %tmp to float
 440   %call = tail call float @acosf(float %conv)
 441   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 442   store float %call, ptr %arrayidx, align 4
 443   %iv.next = add nuw nsw i64 %iv, 1
 444   %exitcond = icmp eq i64 %iv.next, 1000
 445   br i1 %exitcond, label %for.end, label %for.body
 446
 447 for.end:
 448   ret void
 449 }
 450
 451 define void @acos_f64_intrinsic(ptr nocapture %varray) {
 452 ; CHECK-LABEL: @acos_f64_intrinsic(
 453 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @llvm.acos.v2f64(<2 x double> [[TMP4:%.*]])
 454 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.acos.v4f64(<4 x double> [[TMP4:%.*]])
 455 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.acos.v8f64(<8 x double> [[TMP4:%.*]])
 456 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.acos.v16f64(<16 x double> [[TMP4:%.*]])
 457 ; CHECK:        ret void
 458 ;
 459 entry:
 460   br label %for.body
 461
 462 for.body:
 463   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 464   %tmp = trunc i64 %iv to i32
 465   %conv = sitofp i32 %tmp to double
 466   %call = tail call double @llvm.acos.f64(double %conv)
 467   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 468   store double %call, ptr %arrayidx, align 4
 469   %iv.next = add nuw nsw i64 %iv, 1
 470   %exitcond = icmp eq i64 %iv.next, 1000
 471   br i1 %exitcond, label %for.end, label %for.body
 472
 473 for.end:
 474   ret void
 475 }
 476
 477 define void @acos_f32_intrinsic(ptr nocapture %varray) {
 478 ; CHECK-LABEL: @acos_f32_intrinsic(
 479 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP4:%.*]])
 480 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_acosf(<4 x float> [[TMP4:%.*]])
 481 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_acosf(<8 x float> [[TMP4:%.*]])
 482 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_acosf(<16 x float> [[TMP4:%.*]])
 483 ; CHECK:        ret void
 484 ;
 485 entry:
 486   br label %for.body
 487
 488 for.body:
 489   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 490   %tmp = trunc i64 %iv to i32
 491   %conv = sitofp i32 %tmp to float
 492   %call = tail call float @llvm.acos.f32(float %conv)
 493   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 494   store float %call, ptr %arrayidx, align 4
 495   %iv.next = add nuw nsw i64 %iv, 1
 496   %exitcond = icmp eq i64 %iv.next, 1000
 497   br i1 %exitcond, label %for.end, label %for.body
 498
 499 for.end:
 500   ret void
 501 }
 502
 503 define void @asin_f64(ptr nocapture %varray) {
 504 ; CHECK-LABEL: @asin_f64(
 505 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @llvm.asin.v2f64(<2 x double> [[TMP4:%.*]])
 506 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.asin.v4f64(<4 x double> [[TMP4:%.*]])
 507 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_asin(<8 x double> [[TMP4:%.*]])
 508 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.asin.v16f64(<16 x double> [[TMP4:%.*]])
 509 ; CHECK:        ret void
 510 ;
 511 entry:
 512   br label %for.body
 513
 514 for.body:
 515   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 516   %tmp = trunc i64 %iv to i32
 517   %conv = sitofp i32 %tmp to double
 518   %call = tail call double @asin(double %conv)
 519   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 520   store double %call, ptr %arrayidx, align 4
 521   %iv.next = add nuw nsw i64 %iv, 1
 522   %exitcond = icmp eq i64 %iv.next, 1000
 523   br i1 %exitcond, label %for.end, label %for.body
 524
 525 for.end:
 526   ret void
 527 }
 528
 529 define void @asin_f32(ptr nocapture %varray) {
 530 ; CHECK-LABEL: @asin_f32(
 531 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP4:%.*]])
 532 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_asinf(<4 x float> [[TMP4:%.*]])
 533 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_asinf(<8 x float> [[TMP4:%.*]])
 534 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_asinf(<16 x float> [[TMP4:%.*]])
 535 ; CHECK:        ret void
 536 ;
 537 entry:
 538   br label %for.body
 539
 540 for.body:
 541   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 542   %tmp = trunc i64 %iv to i32
 543   %conv = sitofp i32 %tmp to float
 544   %call = tail call float @asinf(float %conv)
 545   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 546   store float %call, ptr %arrayidx, align 4
 547   %iv.next = add nuw nsw i64 %iv, 1
 548   %exitcond = icmp eq i64 %iv.next, 1000
 549   br i1 %exitcond, label %for.end, label %for.body
 550
 551 for.end:
 552   ret void
 553 }
 554
 555 define void @asin_f64_intrinsic(ptr nocapture %varray) {
 556 ; CHECK-LABEL: @asin_f64_intrinsic(
 557 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @llvm.asin.v2f64(<2 x double> [[TMP4:%.*]])
 558 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.asin.v4f64(<4 x double> [[TMP4:%.*]])
 559 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_asin(<8 x double> [[TMP4:%.*]])
 560 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.asin.v16f64(<16 x double> [[TMP4:%.*]])
 561 ; CHECK:        ret void
 562 ;
 563 entry:
 564   br label %for.body
 565
 566 for.body:
 567   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 568   %tmp = trunc i64 %iv to i32
 569   %conv = sitofp i32 %tmp to double
 570   %call = tail call double @llvm.asin.f64(double %conv)
 571   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 572   store double %call, ptr %arrayidx, align 4
 573   %iv.next = add nuw nsw i64 %iv, 1
 574   %exitcond = icmp eq i64 %iv.next, 1000
 575   br i1 %exitcond, label %for.end, label %for.body
 576
 577 for.end:
 578   ret void
 579 }
 580
 581 define void @asin_f32_intrinsic(ptr nocapture %varray) {
 582 ; CHECK-LABEL: @asin_f32_intrinsic(
 583 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP4:%.*]])
 584 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_asinf(<4 x float> [[TMP4:%.*]])
 585 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_asinf(<8 x float> [[TMP4:%.*]])
 586 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_asinf(<16 x float> [[TMP4:%.*]])
 587 ; CHECK:        ret void
 588 ;
 589 entry:
 590   br label %for.body
 591
 592 for.body:
 593   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 594   %tmp = trunc i64 %iv to i32
 595   %conv = sitofp i32 %tmp to float
 596   %call = tail call float @llvm.asin.f32(float %conv)
 597   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 598   store float %call, ptr %arrayidx, align 4
 599   %iv.next = add nuw nsw i64 %iv, 1
 600   %exitcond = icmp eq i64 %iv.next, 1000
 601   br i1 %exitcond, label %for.end, label %for.body
 602
 603 for.end:
 604   ret void
 605 }
 606
 607 define void @atan_f64(ptr nocapture %varray) {
 608 ; CHECK-LABEL: @atan_f64(
 609 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_atan(<2 x double> [[TMP4:%.*]])
 610 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_atan(<4 x double> [[TMP4:%.*]])
 611 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_atan(<8 x double> [[TMP4:%.*]])
 612 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.atan.v16f64(<16 x double> [[TMP4:%.*]])
 613 ; CHECK:        ret void
 614 ;
 615 entry:
 616   br label %for.body
 617
 618 for.body:
 619   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 620   %tmp = trunc i64 %iv to i32
 621   %conv = sitofp i32 %tmp to double
 622   %call = tail call double @atan(double %conv)
 623   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 624   store double %call, ptr %arrayidx, align 4
 625   %iv.next = add nuw nsw i64 %iv, 1
 626   %exitcond = icmp eq i64 %iv.next, 1000
 627   br i1 %exitcond, label %for.end, label %for.body
 628
 629 for.end:
 630   ret void
 631 }
 632
 633 define void @atan_f32(ptr nocapture %varray) {
 634 ; CHECK-LABEL: @atan_f32(
 635 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP4:%.*]])
 636 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_atanf(<4 x float> [[TMP4:%.*]])
 637 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_atanf(<8 x float> [[TMP4:%.*]])
 638 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_atanf(<16 x float> [[TMP4:%.*]])
 639 ; CHECK:        ret void
 640 ;
 641 entry:
 642   br label %for.body
 643
 644 for.body:
 645   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 646   %tmp = trunc i64 %iv to i32
 647   %conv = sitofp i32 %tmp to float
 648   %call = tail call float @atanf(float %conv)
 649   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 650   store float %call, ptr %arrayidx, align 4
 651   %iv.next = add nuw nsw i64 %iv, 1
 652   %exitcond = icmp eq i64 %iv.next, 1000
 653   br i1 %exitcond, label %for.end, label %for.body
 654
 655 for.end:
 656   ret void
 657 }
 658
 659 define void @atan_f64_intrinsic(ptr nocapture %varray) {
 660 ; CHECK-LABEL: @atan_f64_intrinsic(
 661 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_atan(<2 x double> [[TMP4:%.*]])
 662 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_atan(<4 x double> [[TMP4:%.*]])
 663 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_atan(<8 x double> [[TMP4:%.*]])
 664 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.atan.v16f64(<16 x double> [[TMP4:%.*]])
 665 ; CHECK:        ret void
 666 ;
 667 entry:
 668   br label %for.body
 669
 670 for.body:
 671   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 672   %tmp = trunc i64 %iv to i32
 673   %conv = sitofp i32 %tmp to double
 674   %call = tail call double @llvm.atan.f64(double %conv)
 675   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 676   store double %call, ptr %arrayidx, align 4
 677   %iv.next = add nuw nsw i64 %iv, 1
 678   %exitcond = icmp eq i64 %iv.next, 1000
 679   br i1 %exitcond, label %for.end, label %for.body
 680
 681 for.end:
 682   ret void
 683 }
 684
 685 define void @atan_f32_intrinsic(ptr nocapture %varray) {
 686 ; CHECK-LABEL: @atan_f32_intrinsic(
 687 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP4:%.*]])
 688 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_atanf(<4 x float> [[TMP4:%.*]])
 689 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_atanf(<8 x float> [[TMP4:%.*]])
 690 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_atanf(<16 x float> [[TMP4:%.*]])
 691 ; CHECK:        ret void
 692 ;
 693 entry:
 694   br label %for.body
 695
 696 for.body:
 697   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 698   %tmp = trunc i64 %iv to i32
 699   %conv = sitofp i32 %tmp to float
 700   %call = tail call float @llvm.atan.f32(float %conv)
 701   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 702   store float %call, ptr %arrayidx, align 4
 703   %iv.next = add nuw nsw i64 %iv, 1
 704   %exitcond = icmp eq i64 %iv.next, 1000
 705   br i1 %exitcond, label %for.end, label %for.body
 706
 707 for.end:
 708   ret void
 709 }
 710
 711 define void @sinh_f64(ptr nocapture %varray) {
 712 ; CHECK-LABEL: @sinh_f64(
 713 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @llvm.sinh.v2f64(<2 x double> [[TMP4:%.*]])
 714 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.sinh.v4f64(<4 x double> [[TMP4:%.*]])
 715 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.sinh.v8f64(<8 x double> [[TMP4:%.*]])
 716 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.sinh.v16f64(<16 x double> [[TMP4:%.*]])
 717 ; CHECK:        ret void
 718 ;
 719 entry:
 720   br label %for.body
 721
 722 for.body:
 723   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 724   %tmp = trunc i64 %iv to i32
 725   %conv = sitofp i32 %tmp to double
 726   %call = tail call double @sinh(double %conv)
 727   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 728   store double %call, ptr %arrayidx, align 4
 729   %iv.next = add nuw nsw i64 %iv, 1
 730   %exitcond = icmp eq i64 %iv.next, 1000
 731   br i1 %exitcond, label %for.end, label %for.body
 732
 733 for.end:
 734   ret void
 735 }
 736
 737 define void @sinh_f32(ptr nocapture %varray) {
 738 ; CHECK-LABEL: @sinh_f32(
 739 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP4:%.*]])
 740 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @llvm.sinh.v4f32(<4 x float> [[TMP4:%.*]])
 741 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @llvm.sinh.v8f32(<8 x float> [[TMP4:%.*]])
 742 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @llvm.sinh.v16f32(<16 x float> [[TMP4:%.*]])
 743 ; CHECK:        ret void
 744 ;
 745 entry:
 746   br label %for.body
 747
 748 for.body:
 749   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 750   %tmp = trunc i64 %iv to i32
 751   %conv = sitofp i32 %tmp to float
 752   %call = tail call float @sinhf(float %conv)
 753   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 754   store float %call, ptr %arrayidx, align 4
 755   %iv.next = add nuw nsw i64 %iv, 1
 756   %exitcond = icmp eq i64 %iv.next, 1000
 757   br i1 %exitcond, label %for.end, label %for.body
 758
 759 for.end:
 760   ret void
 761 }
 762
 763 define void @sinh_f64_intrinsic(ptr nocapture %varray) {
 764 ; CHECK-LABEL: @sinh_f64_intrinsic(
 765 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @llvm.sinh.v2f64(<2 x double> [[TMP4:%.*]])
 766 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.sinh.v4f64(<4 x double> [[TMP4:%.*]])
 767 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.sinh.v8f64(<8 x double> [[TMP4:%.*]])
 768 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.sinh.v16f64(<16 x double> [[TMP4:%.*]])
 769 ; CHECK:        ret void
 770 ;
 771 entry:
 772   br label %for.body
 773
 774 for.body:
 775   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 776   %tmp = trunc i64 %iv to i32
 777   %conv = sitofp i32 %tmp to double
 778   %call = tail call double @llvm.sinh.f64(double %conv)
 779   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 780   store double %call, ptr %arrayidx, align 4
 781   %iv.next = add nuw nsw i64 %iv, 1
 782   %exitcond = icmp eq i64 %iv.next, 1000
 783   br i1 %exitcond, label %for.end, label %for.body
 784
 785 for.end:
 786   ret void
 787 }
 788
 789 define void @sinh_f32_intrinsic(ptr nocapture %varray) {
 790 ; CHECK-LABEL: @sinh_f32_intrinsic(
 791 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP4:%.*]])
 792 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @llvm.sinh.v4f32(<4 x float> [[TMP4:%.*]])
 793 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @llvm.sinh.v8f32(<8 x float> [[TMP4:%.*]])
 794 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @llvm.sinh.v16f32(<16 x float> [[TMP4:%.*]])
 795 ; CHECK:        ret void
 796 ;
 797 entry:
 798   br label %for.body
 799
 800 for.body:
 801   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 802   %tmp = trunc i64 %iv to i32
 803   %conv = sitofp i32 %tmp to float
 804   %call = tail call float @llvm.sinh.f32(float %conv)
 805   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 806   store float %call, ptr %arrayidx, align 4
 807   %iv.next = add nuw nsw i64 %iv, 1
 808   %exitcond = icmp eq i64 %iv.next, 1000
 809   br i1 %exitcond, label %for.end, label %for.body
 810
 811 for.end:
 812   ret void
 813 }
 814
 815 define void @cosh_f64(ptr nocapture %varray) {
 816 ; CHECK-LABEL: @cosh_f64(
 817 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cosh(<2 x double> [[TMP4:%.*]])
 818 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.cosh.v4f64(<4 x double> [[TMP4:%.*]])
 819 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.cosh.v8f64(<8 x double> [[TMP4:%.*]])
 820 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.cosh.v16f64(<16 x double> [[TMP4:%.*]])
 821 ; CHECK:        ret void
 822 ;
 823 entry:
 824   br label %for.body
 825
 826 for.body:
 827   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 828   %tmp = trunc i64 %iv to i32
 829   %conv = sitofp i32 %tmp to double
 830   %call = tail call double @cosh(double %conv)
 831   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 832   store double %call, ptr %arrayidx, align 4
 833   %iv.next = add nuw nsw i64 %iv, 1
 834   %exitcond = icmp eq i64 %iv.next, 1000
 835   br i1 %exitcond, label %for.end, label %for.body
 836
 837 for.end:
 838   ret void
 839 }
 840
 841 define void @cosh_f32(ptr nocapture %varray) {
 842 ; CHECK-LABEL: @cosh_f32(
 843 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP4:%.*]])
 844 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]])
 845 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_coshf(<8 x float> [[TMP4:%.*]])
 846 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @llvm.cosh.v16f32(<16 x float> [[TMP4:%.*]])
 847 ; CHECK:        ret void
 848 ;
 849 entry:
 850   br label %for.body
 851
 852 for.body:
 853   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 854   %tmp = trunc i64 %iv to i32
 855   %conv = sitofp i32 %tmp to float
 856   %call = tail call float @coshf(float %conv)
 857   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 858   store float %call, ptr %arrayidx, align 4
 859   %iv.next = add nuw nsw i64 %iv, 1
 860   %exitcond = icmp eq i64 %iv.next, 1000
 861   br i1 %exitcond, label %for.end, label %for.body
 862
 863 for.end:
 864   ret void
 865 }
 866
 867 define void @cosh_f64_intrinsic(ptr nocapture %varray) {
 868 ; CHECK-LABEL: @cosh_f64_intrinsic(
 869 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cosh(<2 x double> [[TMP4:%.*]])
 870 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.cosh.v4f64(<4 x double> [[TMP4:%.*]])
 871 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.cosh.v8f64(<8 x double> [[TMP4:%.*]])
 872 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.cosh.v16f64(<16 x double> [[TMP4:%.*]])
 873 ; CHECK:        ret void
 874 ;
 875 entry:
 876   br label %for.body
 877
 878 for.body:
 879   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 880   %tmp = trunc i64 %iv to i32
 881   %conv = sitofp i32 %tmp to double
 882   %call = tail call double @llvm.cosh.f64(double %conv)
 883   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 884   store double %call, ptr %arrayidx, align 4
 885   %iv.next = add nuw nsw i64 %iv, 1
 886   %exitcond = icmp eq i64 %iv.next, 1000
 887   br i1 %exitcond, label %for.end, label %for.body
 888
 889 for.end:
 890   ret void
 891 }
 892
 893 define void @cosh_f32_intrinsic(ptr nocapture %varray) {
 894 ; CHECK-LABEL: @cosh_f32_intrinsic(
 895 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP4:%.*]])
 896 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]])
 897 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_coshf(<8 x float> [[TMP4:%.*]])
 898 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @llvm.cosh.v16f32(<16 x float> [[TMP4:%.*]])
 899 ; CHECK:        ret void
 900 ;
 901 entry:
 902   br label %for.body
 903
 904 for.body:
 905   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 906   %tmp = trunc i64 %iv to i32
 907   %conv = sitofp i32 %tmp to float
 908   %call = tail call float @llvm.cosh.f32(float %conv)
 909   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 910   store float %call, ptr %arrayidx, align 4
 911   %iv.next = add nuw nsw i64 %iv, 1
 912   %exitcond = icmp eq i64 %iv.next, 1000
 913   br i1 %exitcond, label %for.end, label %for.body
 914
 915 for.end:
 916   ret void
 917 }
 918
 919 define void @tanh_f64(ptr nocapture %varray) {
 920 ; CHECK-LABEL: @tanh_f64(
 921 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @llvm.tanh.v2f64(<2 x double> [[TMP4:%.*]])
 922 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.tanh.v4f64(<4 x double> [[TMP4:%.*]])
 923 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.tanh.v8f64(<8 x double> [[TMP4:%.*]])
 924 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.tanh.v16f64(<16 x double> [[TMP4:%.*]])
 925 ; CHECK:        ret void
 926 ;
 927 entry:
 928   br label %for.body
 929
 930 for.body:
 931   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 932   %tmp = trunc i64 %iv to i32
 933   %conv = sitofp i32 %tmp to double
 934   %call = tail call double @tanh(double %conv)
 935   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 936   store double %call, ptr %arrayidx, align 4
 937   %iv.next = add nuw nsw i64 %iv, 1
 938   %exitcond = icmp eq i64 %iv.next, 1000
 939   br i1 %exitcond, label %for.end, label %for.body
 940
 941 for.end:
 942   ret void
 943 }
 944
 945 define void @tanh_f32(ptr nocapture %varray) {
 946 ; CHECK-LABEL: @tanh_f32(
 947 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP4:%.*]])
 948 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanhf(<4 x float> [[TMP4:%.*]])
 949 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanhf(<8 x float> [[TMP4:%.*]])
 950 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanhf(<16 x float> [[TMP4:%.*]])
 951 ; CHECK:        ret void
 952 ;
 953 entry:
 954   br label %for.body
 955
 956 for.body:
 957   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 958   %tmp = trunc i64 %iv to i32
 959   %conv = sitofp i32 %tmp to float
 960   %call = tail call float @tanhf(float %conv)
 961   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 962   store float %call, ptr %arrayidx, align 4
 963   %iv.next = add nuw nsw i64 %iv, 1
 964   %exitcond = icmp eq i64 %iv.next, 1000
 965   br i1 %exitcond, label %for.end, label %for.body
 966
 967 for.end:
 968   ret void
 969 }
 970
 971 define void @tanh_f64_intrinsic(ptr nocapture %varray) {
 972 ; CHECK-LABEL: @tanh_f64_intrinsic(
 973 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @llvm.tanh.v2f64(<2 x double> [[TMP4:%.*]])
 974 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.tanh.v4f64(<4 x double> [[TMP4:%.*]])
 975 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.tanh.v8f64(<8 x double> [[TMP4:%.*]])
 976 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.tanh.v16f64(<16 x double> [[TMP4:%.*]])
 977 ; CHECK:        ret void
 978 ;
 979 entry:
 980   br label %for.body
 981
 982 for.body:
 983   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 984   %tmp = trunc i64 %iv to i32
 985   %conv = sitofp i32 %tmp to double
 986   %call = tail call double @llvm.tanh.f64(double %conv)
 987   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 988   store double %call, ptr %arrayidx, align 4
 989   %iv.next = add nuw nsw i64 %iv, 1
 990   %exitcond = icmp eq i64 %iv.next, 1000
 991   br i1 %exitcond, label %for.end, label %for.body
 992
 993 for.end:
 994   ret void
 995 }
 996
 997 define void @tanh_f32_intrinsic(ptr nocapture %varray) {
 998 ; CHECK-LABEL: @tanh_f32_intrinsic(
 999 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP4:%.*]])
1000 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanhf(<4 x float> [[TMP4:%.*]])
1001 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanhf(<8 x float> [[TMP4:%.*]])
1002 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanhf(<16 x float> [[TMP4:%.*]])
1003 ; CHECK:        ret void
1004 ;
1005 entry:
1006   br label %for.body
1007
1008 for.body:
1009   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1010   %tmp = trunc i64 %iv to i32
1011   %conv = sitofp i32 %tmp to float
1012   %call = tail call float @llvm.tanh.f32(float %conv)
1013   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1014   store float %call, ptr %arrayidx, align 4
1015   %iv.next = add nuw nsw i64 %iv, 1
1016   %exitcond = icmp eq i64 %iv.next, 1000
1017   br i1 %exitcond, label %for.end, label %for.body
1018
1019 for.end:
1020   ret void
1021 }
1022
1023 define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
1024 ; CHECK-LABEL: @pow_f64(
1025 ; CHECK-VF2:    [[TMP8:%.*]] = call <2 x double> @amd_vrd2_pow(<2 x double> [[TMP4:%.*]], <2 x double> [[WIDE_LOAD:%.*]])
1026 ; CHECK-VF4:    [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
1027 ; CHECK-VF8:    [[TMP8:%.*]] = call <8 x double> @amd_vrd8_pow(<8 x double> [[TMP4:%.*]], <8 x double> [[WIDE_LOAD:%.*]])
1028 ; CHECK-VF16:   [[TMP8:%.*]] = call <16 x double> @llvm.pow.v16f64(<16 x double> [[TMP4:%.*]], <16 x double> [[WIDE_LOAD:%.*]])
1029 ; CHECK:        ret void
1030 ;
1031 entry:
1032   br label %for.body
1033
1034 for.body:
1035   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1036   %tmp = trunc i64 %iv to i32
1037   %conv = sitofp i32 %tmp to double
1038   %arrayidx = getelementptr inbounds double, ptr %exp, i64 %iv
1039   %tmp1 = load double, ptr %arrayidx, align 4
1040   %tmp2 = tail call double @pow(double %conv, double %tmp1)
1041   %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %iv
1042   store double %tmp2, ptr %arrayidx2, align 4
1043   %iv.next = add nuw nsw i64 %iv, 1
1044   %exitcond = icmp eq i64 %iv.next, 1000
1045   br i1 %exitcond, label %for.end, label %for.body
1046
1047 for.end:
1048   ret void
1049 }
1050
1051 define void @pow_f64_intrinsic(ptr nocapture %varray, ptr nocapture readonly %exp) {
1052 ; CHECK-LABEL: @pow_f64_intrinsic(
1053 ; CHECK-VF2:    [[TMP8:%.*]] = call <2 x double> @amd_vrd2_pow(<2 x double> [[TMP4:%.*]], <2 x double> [[WIDE_LOAD:%.*]])
1054 ; CHECK-VF4:    [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
1055 ; CHECK-VF8:    [[TMP8:%.*]] = call <8 x double> @amd_vrd8_pow(<8 x double> [[TMP4:%.*]], <8 x double> [[WIDE_LOAD:%.*]])
1056 ; CHECK-VF16:   [[TMP8:%.*]] = call <16 x double> @llvm.pow.v16f64(<16 x double> [[TMP4:%.*]], <16 x double> [[WIDE_LOAD:%.*]])
1057 ; CHECK:        ret void
1058 ;
1059 entry:
1060   br label %for.body
1061
1062 for.body:
1063   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1064   %tmp = trunc i64 %iv to i32
1065   %conv = sitofp i32 %tmp to double
1066   %arrayidx = getelementptr inbounds double, ptr %exp, i64 %iv
1067   %tmp1 = load double, ptr %arrayidx, align 4
1068   %tmp2 = tail call double @llvm.pow.f64(double %conv, double %tmp1)
1069   %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %iv
1070   store double %tmp2, ptr %arrayidx2, align 4
1071   %iv.next = add nuw nsw i64 %iv, 1
1072   %exitcond = icmp eq i64 %iv.next, 1000
1073   br i1 %exitcond, label %for.end, label %for.body
1074
1075 for.end:
1076   ret void
1077 }
1078
1079 define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) {
1080 ; CHECK-LABEL: @pow_f32(
1081 ; CHECK-VF2:    [[TMP8:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[TMP4:%.*]], <2 x float> [[WIDE_LOAD:%.*]])
1082 ; CHECK-VF4:    [[TMP8:%.*]] = call <4 x float> @amd_vrs4_powf(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
1083 ; CHECK-VF8:    [[TMP8:%.*]] = call <8 x float> @amd_vrs8_powf(<8 x float> [[TMP4:%.*]], <8 x float> [[WIDE_LOAD:%.*]])
1084 ; CHECK-VF16:   [[TMP8:%.*]] = call <16 x float> @amd_vrs16_powf(<16 x float> [[TMP4:%.*]], <16 x float> [[WIDE_LOAD:%.*]])
1085 ; CHECK:        ret void
1086 ;
1087 entry:
1088   br label %for.body
1089
1090 for.body:
1091   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1092   %tmp = trunc i64 %iv to i32
1093   %conv = sitofp i32 %tmp to float
1094   %arrayidx = getelementptr inbounds float, ptr %exp, i64 %iv
1095   %tmp1 = load float, ptr %arrayidx, align 4
1096   %tmp2 = tail call float @powf(float %conv, float %tmp1)
1097   %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %iv
1098   store float %tmp2, ptr %arrayidx2, align 4
1099   %iv.next = add nuw nsw i64 %iv, 1
1100   %exitcond = icmp eq i64 %iv.next, 1000
1101   br i1 %exitcond, label %for.end, label %for.body
1102
1103 for.end:
1104   ret void
1105 }
1106
1107 define void @pow_f32_intrinsic(ptr nocapture %varray, ptr nocapture readonly %exp) {
1108 ; CHECK-LABEL: @pow_f32_intrinsic(
1109 ; CHECK-VF2:    [[TMP8:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[TMP4:%.*]], <2 x float> [[WIDE_LOAD:%.*]])
1110 ; CHECK-VF4:    [[TMP8:%.*]] = call <4 x float> @amd_vrs4_powf(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
1111 ; CHECK-VF8:    [[TMP8:%.*]] = call <8 x float> @amd_vrs8_powf(<8 x float> [[TMP4:%.*]], <8 x float> [[WIDE_LOAD:%.*]])
1112 ; CHECK-VF16:   [[TMP8:%.*]] = call <16 x float> @amd_vrs16_powf(<16 x float> [[TMP4:%.*]], <16 x float> [[WIDE_LOAD:%.*]])
1113 ; CHECK:        ret void
1114 ;
1115 entry:
1116   br label %for.body
1117
1118 for.body:
1119   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1120   %tmp = trunc i64 %iv to i32
1121   %conv = sitofp i32 %tmp to float
1122   %arrayidx = getelementptr inbounds float, ptr %exp, i64 %iv
1123   %tmp1 = load float, ptr %arrayidx, align 4
1124   %tmp2 = tail call float @llvm.pow.f32(float %conv, float %tmp1)
1125   %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %iv
1126   store float %tmp2, ptr %arrayidx2, align 4
1127   %iv.next = add nuw nsw i64 %iv, 1
1128   %exitcond = icmp eq i64 %iv.next, 1000
1129   br i1 %exitcond, label %for.end, label %for.body
1130
1131 for.end:
1132   ret void
1133 }
1134
1135 define void @exp_f64(ptr nocapture %varray) {
1136 ; CHECK-LABEL: @exp_f64(
1137 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp(<2 x double> [[TMP4:%.*]])
1138 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp(<4 x double> [[TMP4:%.*]])
1139 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp(<8 x double> [[TMP4:%.*]])
1140 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.exp.v16f64(<16 x double> [[TMP4:%.*]])
1141 ; CHECK:        ret void
1142 ;
1143 entry:
1144   br label %for.body
1145
1146 for.body:
1147   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1148   %tmp = trunc i64 %iv to i32
1149   %conv = sitofp i32 %tmp to double
1150   %call = tail call double @exp(double %conv)
1151   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1152   store double %call, ptr %arrayidx, align 4
1153   %iv.next = add nuw nsw i64 %iv, 1
1154   %exitcond = icmp eq i64 %iv.next, 1000
1155   br i1 %exitcond, label %for.end, label %for.body
1156
1157 for.end:
1158   ret void
1159 }
1160
1161 define void @exp_f32(ptr nocapture %varray) {
1162 ; CHECK-LABEL: @exp_f32(
1163 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP4:%.*]])
1164 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_expf(<4 x float> [[TMP4:%.*]])
1165 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_expf(<8 x float> [[TMP4:%.*]])
1166 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_expf(<16 x float> [[TMP4:%.*]])
1167 ; CHECK:        ret void
1168 ;
1169 entry:
1170   br label %for.body
1171
1172 for.body:
1173   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1174   %tmp = trunc i64 %iv to i32
1175   %conv = sitofp i32 %tmp to float
1176   %call = tail call float @expf(float %conv)
1177   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1178   store float %call, ptr %arrayidx, align 4
1179   %iv.next = add nuw nsw i64 %iv, 1
1180   %exitcond = icmp eq i64 %iv.next, 1000
1181   br i1 %exitcond, label %for.end, label %for.body
1182
1183 for.end:
1184   ret void
1185 }
1186
1187 define void @exp_f64_intrinsic(ptr nocapture %varray) {
1188 ; CHECK-LABEL: @exp_f64_intrinsic(
1189 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp(<2 x double> [[TMP4:%.*]])
1190 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp(<4 x double> [[TMP4:%.*]])
1191 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp(<8 x double> [[TMP4:%.*]])
1192 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.exp.v16f64(<16 x double> [[TMP4:%.*]])
1193 ; CHECK:        ret void
1194 ;
1195 entry:
1196   br label %for.body
1197
1198 for.body:
1199   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1200   %tmp = trunc i64 %iv to i32
1201   %conv = sitofp i32 %tmp to double
1202   %call = tail call double @llvm.exp.f64(double %conv)
1203   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1204   store double %call, ptr %arrayidx, align 4
1205   %iv.next = add nuw nsw i64 %iv, 1
1206   %exitcond = icmp eq i64 %iv.next, 1000
1207   br i1 %exitcond, label %for.end, label %for.body
1208
1209 for.end:
1210   ret void
1211 }
1212
1213 define void @exp_f32_intrinsic(ptr nocapture %varray) {
1214 ; CHECK-LABEL: @exp_f32_intrinsic(
1215 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP4:%.*]])
1216 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_expf(<4 x float> [[TMP4:%.*]])
1217 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_expf(<8 x float> [[TMP4:%.*]])
1218 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_expf(<16 x float> [[TMP4:%.*]])
1219 ; CHECK:        ret void
1220 ;
1221 entry:
1222   br label %for.body
1223
1224 for.body:
1225   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1226   %tmp = trunc i64 %iv to i32
1227   %conv = sitofp i32 %tmp to float
1228   %call = tail call float @llvm.exp.f32(float %conv)
1229   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1230   store float %call, ptr %arrayidx, align 4
1231   %iv.next = add nuw nsw i64 %iv, 1
1232   %exitcond = icmp eq i64 %iv.next, 1000
1233   br i1 %exitcond, label %for.end, label %for.body
1234
1235 for.end:
1236   ret void
1237 }
1238
1239 define void @log_f64(ptr nocapture %varray) {
1240 ; CHECK-LABEL: @log_f64(
1241 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log(<2 x double> [[TMP4:%.*]])
1242 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log(<4 x double> [[TMP4:%.*]])
1243 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log(<8 x double> [[TMP4:%.*]])
1244 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.log.v16f64(<16 x double> [[TMP4:%.*]])
1245 ; CHECK:        ret void
1246 ;
1247 entry:
1248   br label %for.body
1249
1250 for.body:
1251   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1252   %tmp = trunc i64 %iv to i32
1253   %conv = sitofp i32 %tmp to double
1254   %call = tail call double @log(double %conv)
1255   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1256   store double %call, ptr %arrayidx, align 4
1257   %iv.next = add nuw nsw i64 %iv, 1
1258   %exitcond = icmp eq i64 %iv.next, 1000
1259   br i1 %exitcond, label %for.end, label %for.body
1260
1261 for.end:
1262   ret void
1263 }
1264
1265 define void @log_f32(ptr nocapture %varray) {
1266 ; CHECK-LABEL: @log_f32(
1267 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.log.v2f32(<2 x float> [[TMP4:%.*]])
1268 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_logf(<4 x float> [[TMP4:%.*]])
1269 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_logf(<8 x float> [[TMP4:%.*]])
1270 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_logf(<16 x float> [[TMP4:%.*]])
1271 ; CHECK:        ret void
1272 ;
1273 entry:
1274   br label %for.body
1275
1276 for.body:
1277   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1278   %tmp = trunc i64 %iv to i32
1279   %conv = sitofp i32 %tmp to float
1280   %call = tail call float @logf(float %conv)
1281   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1282   store float %call, ptr %arrayidx, align 4
1283   %iv.next = add nuw nsw i64 %iv, 1
1284   %exitcond = icmp eq i64 %iv.next, 1000
1285   br i1 %exitcond, label %for.end, label %for.body
1286
1287 for.end:
1288   ret void
1289 }
1290
1291 define void @log_f64_intrinsic(ptr nocapture %varray) {
1292 ; CHECK-LABEL: @log_f64_intrinsic(
1293 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log(<2 x double> [[TMP4:%.*]])
1294 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log(<4 x double> [[TMP4:%.*]])
1295 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log(<8 x double> [[TMP4:%.*]])
1296 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.log.v16f64(<16 x double> [[TMP4:%.*]])
1297 ; CHECK:        ret void
1298 ;
1299 entry:
1300   br label %for.body
1301
1302 for.body:
1303   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1304   %tmp = trunc i64 %iv to i32
1305   %conv = sitofp i32 %tmp to double
1306   %call = tail call double @llvm.log.f64(double %conv)
1307   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1308   store double %call, ptr %arrayidx, align 4
1309   %iv.next = add nuw nsw i64 %iv, 1
1310   %exitcond = icmp eq i64 %iv.next, 1000
1311   br i1 %exitcond, label %for.end, label %for.body
1312
1313 for.end:
1314   ret void
1315 }
1316
1317 define void @log_f32_intrinsic(ptr nocapture %varray) {
1318 ; CHECK-LABEL: @log_f32_intrinsic(
1319 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.log.v2f32(<2 x float> [[TMP4:%.*]])
1320 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_logf(<4 x float> [[TMP4:%.*]])
1321 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_logf(<8 x float> [[TMP4:%.*]])
1322 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_logf(<16 x float> [[TMP4:%.*]])
1323 ; CHECK:        ret void
1324 ;
1325 entry:
1326   br label %for.body
1327
1328 for.body:
1329   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1330   %tmp = trunc i64 %iv to i32
1331   %conv = sitofp i32 %tmp to float
1332   %call = tail call float @llvm.log.f32(float %conv)
1333   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1334   store float %call, ptr %arrayidx, align 4
1335   %iv.next = add nuw nsw i64 %iv, 1
1336   %exitcond = icmp eq i64 %iv.next, 1000
1337   br i1 %exitcond, label %for.end, label %for.body
1338
1339 for.end:
1340   ret void
1341 }
1342
1343 define void @log2_f64(ptr nocapture %varray) {
1344 ; CHECK-LABEL: @log2_f64(
1345 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log2(<2 x double> [[TMP4:%.*]])
1346 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log2(<4 x double> [[TMP4:%.*]])
1347 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log2(<8 x double> [[TMP4:%.*]])
1348 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.log2.v16f64(<16 x double> [[TMP4:%.*]])
1349 ; CHECK:        ret void
1350 ;
1351 entry:
1352   br label %for.body
1353
1354 for.body:
1355   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1356   %tmp = trunc i64 %iv to i32
1357   %conv = sitofp i32 %tmp to double
1358   %call = tail call double @log2(double %conv)
1359   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1360   store double %call, ptr %arrayidx, align 4
1361   %iv.next = add nuw nsw i64 %iv, 1
1362   %exitcond = icmp eq i64 %iv.next, 1000
1363   br i1 %exitcond, label %for.end, label %for.body
1364
1365 for.end:
1366   ret void
1367 }
1368
1369 define void @log2_f32(ptr nocapture %varray) {
1370 ; CHECK-LABEL: @log2_f32(
1371 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.log2.v2f32(<2 x float> [[TMP4:%.*]])
1372 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log2f(<4 x float> [[TMP4:%.*]])
1373 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_log2f(<8 x float> [[TMP4:%.*]])
1374 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log2f(<16 x float> [[TMP4:%.*]])
1375 ; CHECK:        ret void
1376 ;
1377 entry:
1378   br label %for.body
1379
1380 for.body:
1381   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1382   %tmp = trunc i64 %iv to i32
1383   %conv = sitofp i32 %tmp to float
1384   %call = tail call float @log2f(float %conv)
1385   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1386   store float %call, ptr %arrayidx, align 4
1387   %iv.next = add nuw nsw i64 %iv, 1
1388   %exitcond = icmp eq i64 %iv.next, 1000
1389   br i1 %exitcond, label %for.end, label %for.body
1390
1391 for.end:
1392   ret void
1393 }
1394
1395 define void @log2_f64_intrinsic(ptr nocapture %varray) {
1396 ; CHECK-LABEL: @log2_f64_intrinsic(
1397 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log2(<2 x double> [[TMP4:%.*]])
1398 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log2(<4 x double> [[TMP4:%.*]])
1399 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log2(<8 x double> [[TMP4:%.*]])
1400 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.log2.v16f64(<16 x double> [[TMP4:%.*]])
1401 ; CHECK:        ret void
1402 ;
1403 entry:
1404   br label %for.body
1405
1406 for.body:
1407   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1408   %tmp = trunc i64 %iv to i32
1409   %conv = sitofp i32 %tmp to double
1410   %call = tail call double @llvm.log2.f64(double %conv)
1411   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1412   store double %call, ptr %arrayidx, align 4
1413   %iv.next = add nuw nsw i64 %iv, 1
1414   %exitcond = icmp eq i64 %iv.next, 1000
1415   br i1 %exitcond, label %for.end, label %for.body
1416
1417 for.end:
1418   ret void
1419 }
1420
1421 define void @log2_f32_intrinsic(ptr nocapture %varray) {
1422 ; CHECK-LABEL: @log2_f32_intrinsic(
1423 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.log2.v2f32(<2 x float> [[TMP4:%.*]])
1424 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log2f(<4 x float> [[TMP4:%.*]])
1425 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_log2f(<8 x float> [[TMP4:%.*]])
1426 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log2f(<16 x float> [[TMP4:%.*]])
1427 ; CHECK:        ret void
1428 ;
1429 entry:
1430   br label %for.body
1431
1432 for.body:
1433   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1434   %tmp = trunc i64 %iv to i32
1435   %conv = sitofp i32 %tmp to float
1436   %call = tail call float @llvm.log2.f32(float %conv)
1437   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1438   store float %call, ptr %arrayidx, align 4
1439   %iv.next = add nuw nsw i64 %iv, 1
1440   %exitcond = icmp eq i64 %iv.next, 1000
1441   br i1 %exitcond, label %for.end, label %for.body
1442
1443 for.end:
1444   ret void
1445 }
1446
1447 define void @log10_f64(ptr nocapture %varray) {
1448 ; CHECK-LABEL: @log10_f64(
1449 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log10(<2 x double> [[TMP4:%.*]])
1450 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.log10.v4f64(<4 x double> [[TMP4:%.*]])
1451 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.log10.v8f64(<8 x double> [[TMP4:%.*]])
1452 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.log10.v16f64(<16 x double> [[TMP4:%.*]])
1453 ; CHECK:        ret void
1454 ;
1455 entry:
1456   br label %for.body
1457
1458 for.body:
1459   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1460   %tmp = trunc i64 %iv to i32
1461   %conv = sitofp i32 %tmp to double
1462   %call = tail call double @log10(double %conv)
1463   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1464   store double %call, ptr %arrayidx, align 4
1465   %iv.next = add nuw nsw i64 %iv, 1
1466   %exitcond = icmp eq i64 %iv.next, 1000
1467   br i1 %exitcond, label %for.end, label %for.body
1468
1469 for.end:
1470   ret void
1471 }
1472
1473 define void @log10_f32(ptr nocapture %varray) {
1474 ; CHECK-LABEL: @log10_f32(
1475 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.log10.v2f32(<2 x float> [[TMP4:%.*]])
1476 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log10f(<4 x float> [[TMP4:%.*]])
1477 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_log10f(<8 x float> [[TMP4:%.*]])
1478 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log10f(<16 x float> [[TMP4:%.*]])
1479 ; CHECK:        ret void
1480 ;
1481 entry:
1482   br label %for.body
1483
1484 for.body:
1485   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1486   %tmp = trunc i64 %iv to i32
1487   %conv = sitofp i32 %tmp to float
1488   %call = tail call float @log10f(float %conv)
1489   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1490   store float %call, ptr %arrayidx, align 4
1491   %iv.next = add nuw nsw i64 %iv, 1
1492   %exitcond = icmp eq i64 %iv.next, 1000
1493   br i1 %exitcond, label %for.end, label %for.body
1494
1495 for.end:
1496   ret void
1497 }
1498
1499 define void @log10_f64_intrinsic(ptr nocapture %varray) {
1500 ; CHECK-LABEL: @log10_f64_intrinsic(
1501 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log10(<2 x double> [[TMP4:%.*]])
1502 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.log10.v4f64(<4 x double> [[TMP4:%.*]])
1503 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.log10.v8f64(<8 x double> [[TMP4:%.*]])
1504 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.log10.v16f64(<16 x double> [[TMP4:%.*]])
1505 ; CHECK:        ret void
1506 ;
1507 entry:
1508   br label %for.body
1509
1510 for.body:
1511   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1512   %tmp = trunc i64 %iv to i32
1513   %conv = sitofp i32 %tmp to double
1514   %call = tail call double @llvm.log10.f64(double %conv)
1515   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1516   store double %call, ptr %arrayidx, align 4
1517   %iv.next = add nuw nsw i64 %iv, 1
1518   %exitcond = icmp eq i64 %iv.next, 1000
1519   br i1 %exitcond, label %for.end, label %for.body
1520
1521 for.end:
1522   ret void
1523 }
1524
1525 define void @log10_f32_intrinsic(ptr nocapture %varray) {
1526 ; CHECK-LABEL: @log10_f32_intrinsic(
1527 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.log10.v2f32(<2 x float> [[TMP4:%.*]])
1528 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log10f(<4 x float> [[TMP4:%.*]])
1529 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_log10f(<8 x float> [[TMP4:%.*]])
1530 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log10f(<16 x float> [[TMP4:%.*]])
1531 ; CHECK:        ret void
1532 ;
1533 entry:
1534   br label %for.body
1535
1536 for.body:
1537   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1538   %tmp = trunc i64 %iv to i32
1539   %conv = sitofp i32 %tmp to float
1540   %call = tail call float @llvm.log10.f32(float %conv)
1541   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1542   store float %call, ptr %arrayidx, align 4
1543   %iv.next = add nuw nsw i64 %iv, 1
1544   %exitcond = icmp eq i64 %iv.next, 1000
1545   br i1 %exitcond, label %for.end, label %for.body
1546
1547 for.end:
1548   ret void
1549 }
1550
1551 define void @exp2_f64(ptr nocapture %varray) {
1552 ; CHECK-LABEL: @exp2_f64(
1553 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp2(<2 x double> [[TMP4:%.*]])
1554 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp2(<4 x double> [[TMP4:%.*]])
1555 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp2(<8 x double> [[TMP4:%.*]])
1556 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.exp2.v16f64(<16 x double> [[TMP4:%.*]])
1557 ; CHECK:        ret void
1558 ;
1559 entry:
1560   br label %for.body
1561
1562 for.body:
1563   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1564   %tmp = trunc i64 %iv to i32
1565   %conv = sitofp i32 %tmp to double
1566   %call = tail call double @exp2(double %conv)
1567   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1568   store double %call, ptr %arrayidx, align 4
1569   %iv.next = add nuw nsw i64 %iv, 1
1570   %exitcond = icmp eq i64 %iv.next, 1000
1571   br i1 %exitcond, label %for.end, label %for.body
1572
1573 for.end:
1574   ret void
1575 }
1576
1577 define void @exp2_f32(ptr nocapture %varray) {
1578 ; CHECK-LABEL: @exp2_f32(
1579 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[TMP4:%.*]])
1580 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp2f(<4 x float> [[TMP4:%.*]])
1581 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_exp2f(<8 x float> [[TMP4:%.*]])
1582 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_exp2f(<16 x float> [[TMP4:%.*]])
1583 ; CHECK:        ret void
1584 ;
1585 entry:
1586   br label %for.body
1587
1588 for.body:
1589   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1590   %tmp = trunc i64 %iv to i32
1591   %conv = sitofp i32 %tmp to float
1592   %call = tail call float @exp2f(float %conv)
1593   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1594   store float %call, ptr %arrayidx, align 4
1595   %iv.next = add nuw nsw i64 %iv, 1
1596   %exitcond = icmp eq i64 %iv.next, 1000
1597   br i1 %exitcond, label %for.end, label %for.body
1598
1599 for.end:
1600   ret void
1601 }
1602
1603 define void @exp2_f64_intrinsic(ptr nocapture %varray) {
1604 ; CHECK-LABEL: @exp2_f64_intrinsic(
1605 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp2(<2 x double> [[TMP4:%.*]])
1606 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp2(<4 x double> [[TMP4:%.*]])
1607 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp2(<8 x double> [[TMP4:%.*]])
1608 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.exp2.v16f64(<16 x double> [[TMP4:%.*]])
1609 ; CHECK:        ret void
1610 ;
1611 entry:
1612   br label %for.body
1613
1614 for.body:
1615   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1616   %tmp = trunc i64 %iv to i32
1617   %conv = sitofp i32 %tmp to double
1618   %call = tail call double @llvm.exp2.f64(double %conv)
1619   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1620   store double %call, ptr %arrayidx, align 4
1621   %iv.next = add nuw nsw i64 %iv, 1
1622   %exitcond = icmp eq i64 %iv.next, 1000
1623   br i1 %exitcond, label %for.end, label %for.body
1624
1625 for.end:
1626   ret void
1627 }
1628
1629 define void @exp2_f32_intrinsic(ptr nocapture %varray) {
1630 ; CHECK-LABEL: @exp2_f32_intrinsic(
1631 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[TMP4:%.*]])
1632 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp2f(<4 x float> [[TMP4:%.*]])
1633 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_exp2f(<8 x float> [[TMP4:%.*]])
1634 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_exp2f(<16 x float> [[TMP4:%.*]])
1635 ; CHECK:        ret void
1636 ;
1637 entry:
1638   br label %for.body
1639
1640 for.body:
1641   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1642   %tmp = trunc i64 %iv to i32
1643   %conv = sitofp i32 %tmp to float
1644   %call = tail call float @llvm.exp2.f32(float %conv)
1645   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1646   store float %call, ptr %arrayidx, align 4
1647   %iv.next = add nuw nsw i64 %iv, 1
1648   %exitcond = icmp eq i64 %iv.next, 1000
1649   br i1 %exitcond, label %for.end, label %for.body
1650
1651 for.end:
1652   ret void
1653 }
1654
1655 define void @exp10_f64(ptr nocapture %varray) {
1656 ; CHECK-LABEL: @exp10_f64(
1657 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp10(<2 x double> [[TMP4:%.*]])
1658 ; CHECK-VF4:    call <4 x double> @llvm.exp10.v4f64(<4 x double> [[TMP4:%.*]])
1659 ; CHECK-VF8:    call <8 x double> @llvm.exp10.v8f64(<8 x double> [[TMP4:%.*]])
1660 ; CHECK-VF16:    call <16 x double> @llvm.exp10.v16f64(<16 x double> [[TMP4:%.*]])
1661 ; CHECK:        ret void
1662 ;
1663 entry:
1664   br label %for.body
1665
1666 for.body:
1667   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1668   %tmp = trunc i64 %iv to i32
1669   %conv = sitofp i32 %tmp to double
1670   %call = tail call double @exp10(double %conv)
1671   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1672   store double %call, ptr %arrayidx, align 4
1673   %iv.next = add nuw nsw i64 %iv, 1
1674   %exitcond = icmp eq i64 %iv.next, 1000
1675   br i1 %exitcond, label %for.end, label %for.body
1676
1677 for.end:
1678   ret void
1679 }
1680
1681 define void @exp10_f32(ptr nocapture %varray) {
1682 ; CHECK-LABEL: @exp10_f32(
1683 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> [[TMP4:%.*]])
1684 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp10f(<4 x float> [[TMP4:%.*]])
1685 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @llvm.exp10.v8f32(<8 x float> [[TMP4:%.*]])
1686 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @llvm.exp10.v16f32(<16 x float> [[TMP4:%.*]])
1687 ; CHECK:        ret void
1688 ;
1689 entry:
1690   br label %for.body
1691
1692 for.body:
1693   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1694   %tmp = trunc i64 %iv to i32
1695   %conv = sitofp i32 %tmp to float
1696   %call = tail call float @exp10f(float %conv)
1697   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1698   store float %call, ptr %arrayidx, align 4
1699   %iv.next = add nuw nsw i64 %iv, 1
1700   %exitcond = icmp eq i64 %iv.next, 1000
1701   br i1 %exitcond, label %for.end, label %for.body
1702
1703 for.end:
1704   ret void
1705 }
1706
1707 define void @exp10_f64_intrinsic(ptr nocapture %varray) {
1708 ; CHECK-LABEL: @exp10_f64_intrinsic(
1709 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp10(<2 x double> [[TMP4:%.*]])
1710 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.exp10.v4f64(<4 x double> [[TMP4:%.*]])
1711 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.exp10.v8f64(<8 x double> [[TMP4:%.*]])
1712 ; CHECK-VF16:    [[TMP5:%.*]] = call <16 x double> @llvm.exp10.v16f64(<16 x double> [[TMP4:%.*]])
1713 ; CHECK:        ret void
1714 ;
1715 entry:
1716   br label %for.body
1717
1718 for.body:
1719   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1720   %tmp = trunc i64 %iv to i32
1721   %conv = sitofp i32 %tmp to double
1722   %call = tail call double @llvm.exp10.f64(double %conv)
1723   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1724   store double %call, ptr %arrayidx, align 4
1725   %iv.next = add nuw nsw i64 %iv, 1
1726   %exitcond = icmp eq i64 %iv.next, 1000
1727   br i1 %exitcond, label %for.end, label %for.body
1728
1729 for.end:
1730   ret void
1731 }
1732
1733 define void @exp10_f32_intrinsic(ptr nocapture %varray) {
1734 ; CHECK-LABEL: @exp10_f32_intrinsic(
1735 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> [[TMP4:%.*]])
1736 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp10f(<4 x float> [[TMP4:%.*]])
1737 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @llvm.exp10.v8f32(<8 x float> [[TMP4:%.*]])
1738 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @llvm.exp10.v16f32(<16 x float> [[TMP4:%.*]])
1739 ; CHECK:        ret void
1740 ;
1741 entry:
1742   br label %for.body
1743
1744 for.body:
1745   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1746   %tmp = trunc i64 %iv to i32
1747   %conv = sitofp i32 %tmp to float
1748   %call = tail call float @llvm.exp10.f32(float %conv)
1749   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1750   store float %call, ptr %arrayidx, align 4
1751   %iv.next = add nuw nsw i64 %iv, 1
1752   %exitcond = icmp eq i64 %iv.next, 1000
1753   br i1 %exitcond, label %for.end, label %for.body
1754
1755 for.end:
1756   ret void
1757 }
1758
1759
1760 define void @sincos_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
1761 ; CHECK-LABEL: define void @sincos_f64
1762 ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]])
1763 ; CHECK-VF2-NOT:    call void @amd_vrd2_sincos(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
1764 ; CHECK-VF4-NOT:    call void @amd_vrd4_sincos(<4 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
1765 ; CHECK-VF8-NOT:    call void @amd_vrd8_sincos(<8 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
1766 ; CHECK:        ret void
1767 ;
1768 entry:
1769   br label %for.body
1770
1771 for.body:
1772   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
1773   %gepa = getelementptr double, ptr %a, i64 %indvars.iv
1774   %num = load double, ptr %gepa, align 8
1775   %gepb = getelementptr double, ptr %b, i64 %indvars.iv
1776   %gepc = getelementptr double, ptr %c, i64 %indvars.iv
1777   call void @sincos(double %num, ptr %gepb, ptr %gepc)
1778   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1779   %exitcond = icmp eq i64 %indvars.iv.next, 1000
1780   br i1 %exitcond, label %for.cond.cleanup, label %for.body
1781
1782 for.cond.cleanup:
1783   ret void
1784 }
1785
1786 define void @sincos_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
1787 ; CHECK-LABEL: define void @sincos_f32
1788 ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]])
1789 ; CHECK-VF4-NOT:    call void @amd_vrs4_sincosf(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
1790 ; CHECK-VF8-NOT:    call void @amd_vrs8_sincosf(<8 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
1791 ; CHECK-VF16-NOT:    call void @amd_vrs16_sincosf(<16 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
1792 ; CHECK:        ret void
1793 ;
1794 entry:
1795   br label %for.body
1796
1797 for.body:
1798   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
1799   %gepa = getelementptr float, ptr %a, i64 %indvars.iv
1800   %num = load float, ptr %gepa, align 8
1801   %gepb = getelementptr float, ptr %b, i64 %indvars.iv
1802   %gepc = getelementptr float, ptr %c, i64 %indvars.iv
1803   call void @sincosf(float %num, ptr %gepb, ptr %gepc)
1804   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1805   %exitcond = icmp eq i64 %indvars.iv.next, 1000
1806   br i1 %exitcond, label %for.cond.cleanup, label %for.body
1807
1808 for.cond.cleanup:
1809   ret void
1810 }
1811
1812 attributes #0 = { nounwind readnone }
1813
1814 declare double @exp10(double) #0
1815 declare float @exp10f(float) #0
1816 declare double @llvm.exp10.f64(double) #0
1817 declare float @llvm.exp10.f32(float) #0
1818 declare void @sincos(double, ptr, ptr)
1819 declare void @sincosf(float, ptr, ptr)