llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll

   1 ; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s
   2 ; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -mattr=+avx512f -S < %s | FileCheck %s --check-prefix=CHECK-AVX512-VF8
   3 ; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=16 -force-vector-interleave=1 -mattr=+avx512f -S < %s | FileCheck %s --check-prefix=CHECK-AVX512-VF16
   4
   5 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
   6 target triple = "x86_64-unknown-linux-gnu"
   7
   8 declare double @sin(double) #0
   9 declare float @sinf(float) #0
  10 declare double @llvm.sin.f64(double) #0
  11 declare float @llvm.sin.f32(float) #0
  12
  13 declare double @cos(double) #0
  14 declare float @cosf(float) #0
  15 declare double @llvm.cos.f64(double) #0
  16 declare float @llvm.cos.f32(float) #0
  17
  18 declare double @tan(double) #0
  19 declare float @tanf(float) #0
  20 declare double @llvm.tan.f64(double) #0
  21 declare float @llvm.tan.f32(float) #0
  22
  23 declare double @pow(double, double) #0
  24 declare float @powf(float, float) #0
  25 declare double @llvm.pow.f64(double, double) #0
  26 declare float @llvm.pow.f32(float, float) #0
  27
  28 declare double @exp(double) #0
  29 declare float @expf(float) #0
  30 declare double @llvm.exp.f64(double) #0
  31 declare float @llvm.exp.f32(float) #0
  32
  33 declare double @log(double) #0
  34 declare float @logf(float) #0
  35 declare double @llvm.log.f64(double) #0
  36 declare float @llvm.log.f32(float) #0
  37
  38 declare double @log2(double) #0
  39 declare float @log2f(float) #0
  40 declare double @llvm.log2.f64(double) #0
  41 declare float @llvm.log2.f32(float) #0
  42
  43 declare double @log10(double) #0
  44 declare float @log10f(float) #0
  45 declare double @llvm.log10.f64(double) #0
  46 declare float @llvm.log10.f32(float) #0
  47
  48 declare double @sqrt(double) #0
  49 declare float @sqrtf(float) #0
  50
  51 declare double @exp2(double) #0
  52 declare float @exp2f(float) #0
  53 declare double @llvm.exp2.f64(double) #0
  54 declare float @llvm.exp2.f32(float) #0
  55
  56 define void @sin_f64(ptr nocapture %varray) {
  57 ; CHECK-LABEL: @sin_f64(
  58 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_sin(<4 x double> [[TMP4:%.*]])
  59 ; CHECK:    ret void
  60 ;
  61 ; CHECK-AVX512-VF8-LABEL: @sin_f64(
  62 ; CHECK-AVX512-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_sin(<8 x double> [[TMP4:%.*]])
  63 ; CHECK-AVX512-VF8:    ret void
  64 ;
  65 entry:
  66   br label %for.body
  67
  68 for.body:
  69   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
  70   %tmp = trunc i64 %iv to i32
  71   %conv = sitofp i32 %tmp to double
  72   %call = tail call double @sin(double %conv)
  73   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
  74   store double %call, ptr %arrayidx, align 4
  75   %iv.next = add nuw nsw i64 %iv, 1
  76   %exitcond = icmp eq i64 %iv.next, 1000
  77   br i1 %exitcond, label %for.end, label %for.body
  78
  79 for.end:
  80   ret void
  81 }
  82
  83 define void @sin_f32(ptr nocapture %varray) {
  84 ; CHECK-LABEL: @sin_f32(
  85 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_sinf(<4 x float> [[TMP4:%.*]])
  86 ; CHECK:    ret void
  87 ;
  88 ; CHECK-AVX512-VF16-LABEL: @sin_f32(
  89 ; CHECK-AVX512-VF16:    [[TMP5:%.*]] = call <16 x float> @amd_vrs16_sinf(<16 x float> [[TMP4:%.*]])
  90 ; CHECK-AVX512-VF16:    ret void
  91 ;
  92 entry:
  93   br label %for.body
  94
  95 for.body:
  96   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
  97   %tmp = trunc i64 %iv to i32
  98   %conv = sitofp i32 %tmp to float
  99   %call = tail call float @sinf(float %conv)
 100   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 101   store float %call, ptr %arrayidx, align 4
 102   %iv.next = add nuw nsw i64 %iv, 1
 103   %exitcond = icmp eq i64 %iv.next, 1000
 104   br i1 %exitcond, label %for.end, label %for.body
 105
 106 for.end:
 107   ret void
 108 }
 109
 110 define void @sin_f64_intrinsic(ptr nocapture %varray) {
 111 ; CHECK-LABEL: @sin_f64_intrinsic(
 112 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_sin(<4 x double> [[TMP4:%.*]])
 113 ; CHECK:    ret void
 114 ;
 115 ; CHECK-AVX512-VF8-LABEL: @sin_f64_intrinsic(
 116 ; CHECK-AVX512-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_sin(<8 x double> [[TMP4:%.*]])
 117 ; CHECK-AVX512-VF8:    ret void
 118 ;
 119 entry:
 120   br label %for.body
 121
 122 for.body:
 123   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 124   %tmp = trunc i64 %iv to i32
 125   %conv = sitofp i32 %tmp to double
 126   %call = tail call double @llvm.sin.f64(double %conv)
 127   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 128   store double %call, ptr %arrayidx, align 4
 129   %iv.next = add nuw nsw i64 %iv, 1
 130   %exitcond = icmp eq i64 %iv.next, 1000
 131   br i1 %exitcond, label %for.end, label %for.body
 132
 133 for.end:
 134   ret void
 135 }
 136
 137 define void @sin_f32_intrinsic(ptr nocapture %varray) {
 138 ; CHECK-LABEL: @sin_f32_intrinsic(
 139 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_sinf(<4 x float> [[TMP4:%.*]])
 140 ; CHECK:    ret void
 141 ;
 142 ; CHECK-AVX512-VF16-LABEL: @sin_f32_intrinsic(
 143 ; CHECK-AVX512-VF16:    [[TMP5:%.*]] = call <16 x float> @amd_vrs16_sinf(<16 x float> [[TMP4:%.*]])
 144 ; CHECK-AVX512-VF16:    ret void
 145 ;
 146 entry:
 147   br label %for.body
 148
 149 for.body:
 150   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 151   %tmp = trunc i64 %iv to i32
 152   %conv = sitofp i32 %tmp to float
 153   %call = tail call float @llvm.sin.f32(float %conv)
 154   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 155   store float %call, ptr %arrayidx, align 4
 156   %iv.next = add nuw nsw i64 %iv, 1
 157   %exitcond = icmp eq i64 %iv.next, 1000
 158   br i1 %exitcond, label %for.end, label %for.body
 159
 160 for.end:
 161   ret void
 162 }
 163
 164 define void @cos_f64(ptr nocapture %varray) {
 165 ; CHECK-LABEL: @cos_f64(
 166 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_cos(<4 x double> [[TMP4:%.*]])
 167 ; CHECK:    ret void
 168 ;
 169 ; CHECK-AVX512-VF8-LABEL: @cos_f64(
 170 ; CHECK-AVX512-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_cos(<8 x double> [[TMP4:%.*]])
 171 ; CHECK-AVX512-VF8:    ret void
 172 ;
 173 entry:
 174   br label %for.body
 175
 176 for.body:
 177   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 178   %tmp = trunc i64 %iv to i32
 179   %conv = sitofp i32 %tmp to double
 180   %call = tail call double @cos(double %conv)
 181   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 182   store double %call, ptr %arrayidx, align 4
 183   %iv.next = add nuw nsw i64 %iv, 1
 184   %exitcond = icmp eq i64 %iv.next, 1000
 185   br i1 %exitcond, label %for.end, label %for.body
 186
 187 for.end:
 188   ret void
 189 }
 190
 191 define void @cos_f32(ptr nocapture %varray) {
 192 ; CHECK-LABEL: @cos_f32(
 193 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_cosf(<4 x float> [[TMP4:%.*]])
 194 ; CHECK:    ret void
 195 ;
 196 ; CHECK-AVX512-VF16-LABEL: @cos_f32(
 197 ; CHECK-AVX512-VF16:    [[TMP5:%.*]] = call <16 x float> @amd_vrs16_cosf(<16 x float> [[TMP4:%.*]])
 198 ; CHECK-AVX512-VF16:    ret void
 199 ;
 200 entry:
 201   br label %for.body
 202
 203 for.body:
 204   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 205   %tmp = trunc i64 %iv to i32
 206   %conv = sitofp i32 %tmp to float
 207   %call = tail call float @cosf(float %conv)
 208   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 209   store float %call, ptr %arrayidx, align 4
 210   %iv.next = add nuw nsw i64 %iv, 1
 211   %exitcond = icmp eq i64 %iv.next, 1000
 212   br i1 %exitcond, label %for.end, label %for.body
 213
 214 for.end:
 215   ret void
 216 }
 217
 218 define void @cos_f64_intrinsic(ptr nocapture %varray) {
 219 ; CHECK-LABEL: @cos_f64_intrinsic(
 220 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_cos(<4 x double> [[TMP4:%.*]])
 221 ; CHECK:    ret void
 222 ;
 223 ; CHECK-AVX512-VF8-LABEL: @cos_f64_intrinsic(
 224 ; CHECK-AVX512-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_cos(<8 x double> [[TMP4:%.*]])
 225 ; CHECK-AVX512-VF8:    ret void
 226 ;
 227 entry:
 228   br label %for.body
 229
 230 for.body:
 231   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 232   %tmp = trunc i64 %iv to i32
 233   %conv = sitofp i32 %tmp to double
 234   %call = tail call double @llvm.cos.f64(double %conv)
 235   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 236   store double %call, ptr %arrayidx, align 4
 237   %iv.next = add nuw nsw i64 %iv, 1
 238   %exitcond = icmp eq i64 %iv.next, 1000
 239   br i1 %exitcond, label %for.end, label %for.body
 240
 241 for.end:
 242   ret void
 243 }
 244
 245 define void @cos_f32_intrinsic(ptr nocapture %varray) {
 246 ; CHECK-LABEL: @cos_f32_intrinsic(
 247 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_cosf(<4 x float> [[TMP4:%.*]])
 248 ; CHECK:    ret void
 249 ;
 250 ; CHECK-AVX512-VF16-LABEL: @cos_f32_intrinsic(
 251 ; CHECK-AVX512-VF16:    [[TMP5:%.*]] = call <16 x float> @amd_vrs16_cosf(<16 x float> [[TMP4:%.*]])
 252 ; CHECK-AVX512-VF16:    ret void
 253 ;
 254 entry:
 255   br label %for.body
 256
 257 for.body:
 258   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 259   %tmp = trunc i64 %iv to i32
 260   %conv = sitofp i32 %tmp to float
 261   %call = tail call float @llvm.cos.f32(float %conv)
 262   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 263   store float %call, ptr %arrayidx, align 4
 264   %iv.next = add nuw nsw i64 %iv, 1
 265   %exitcond = icmp eq i64 %iv.next, 1000
 266   br i1 %exitcond, label %for.end, label %for.body
 267
 268 for.end:
 269   ret void
 270 }
 271
 272 define void @tan_f64(ptr nocapture %varray) {
 273 ; CHECK-LABEL: @tan_f64(
 274 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]])
 275 ; CHECK:    ret void
 276 ;
 277 ; CHECK-AVX512-VF8-LABEL: @tan_f64(
 278 ; CHECK-AVX512-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]])
 279 ; CHECK-AVX512-VF8:    ret void
 280 ;
 281 entry:
 282   br label %for.body
 283
 284 for.body:
 285   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 286   %tmp = trunc i64 %iv to i32
 287   %conv = sitofp i32 %tmp to double
 288   %call = tail call double @tan(double %conv)
 289   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 290   store double %call, ptr %arrayidx, align 4
 291   %iv.next = add nuw nsw i64 %iv, 1
 292   %exitcond = icmp eq i64 %iv.next, 1000
 293   br i1 %exitcond, label %for.end, label %for.body
 294
 295 for.end:
 296   ret void
 297 }
 298
 299 define void @tan_f32(ptr nocapture %varray) {
 300 ; CHECK-LABEL: @tan_f32(
 301 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]])
 302 ; CHECK:    ret void
 303 ;
 304 ; CHECK-AVX512-VF16-LABEL: @tan_f32(
 305 ; CHECK-AVX512-VF16:    [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]])
 306 ; CHECK-AVX512-VF16:    ret void
 307 ;
 308 entry:
 309   br label %for.body
 310
 311 for.body:
 312   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 313   %tmp = trunc i64 %iv to i32
 314   %conv = sitofp i32 %tmp to float
 315   %call = tail call float @tanf(float %conv)
 316   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 317   store float %call, ptr %arrayidx, align 4
 318   %iv.next = add nuw nsw i64 %iv, 1
 319   %exitcond = icmp eq i64 %iv.next, 1000
 320   br i1 %exitcond, label %for.end, label %for.body
 321
 322 for.end:
 323   ret void
 324 }
 325
 326 define void @tan_f64_intrinsic(ptr nocapture %varray) {
 327 ; CHECK-LABEL: @tan_f64_intrinsic(
 328 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]])
 329 ; CHECK:    ret void
 330 ;
 331 ; CHECK-AVX512-VF8-LABEL: @tan_f64_intrinsic(
 332 ; CHECK-AVX512-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]])
 333 ; CHECK-AVX512-VF8:    ret void
 334 ;
 335 entry:
 336   br label %for.body
 337
 338 for.body:
 339   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 340   %tmp = trunc i64 %iv to i32
 341   %conv = sitofp i32 %tmp to double
 342   %call = tail call double @llvm.tan.f64(double %conv)
 343   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 344   store double %call, ptr %arrayidx, align 4
 345   %iv.next = add nuw nsw i64 %iv, 1
 346   %exitcond = icmp eq i64 %iv.next, 1000
 347   br i1 %exitcond, label %for.end, label %for.body
 348
 349 for.end:
 350   ret void
 351 }
 352
 353 define void @tan_f32_intrinsic(ptr nocapture %varray) {
 354 ; CHECK-LABEL: @tan_f32_intrinsic(
 355 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]])
 356 ; CHECK:    ret void
 357 ;
 358 ; CHECK-AVX512-VF16-LABEL: @tan_f32_intrinsic(
 359 ; CHECK-AVX512-VF16:    [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]])
 360 ; CHECK-AVX512-VF16:    ret void
 361 ;
 362 entry:
 363   br label %for.body
 364
 365 for.body:
 366   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 367   %tmp = trunc i64 %iv to i32
 368   %conv = sitofp i32 %tmp to float
 369   %call = tail call float @llvm.tan.f32(float %conv)
 370   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 371   store float %call, ptr %arrayidx, align 4
 372   %iv.next = add nuw nsw i64 %iv, 1
 373   %exitcond = icmp eq i64 %iv.next, 1000
 374   br i1 %exitcond, label %for.end, label %for.body
 375
 376 for.end:
 377   ret void
 378 }
 379
 380 define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
 381 ; CHECK-LABEL: @pow_f64(
 382 ; CHECK:    [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
 383 ; CHECK:    ret void
 384 ;
 385 ; CHECK-AVX512-VF8-LABEL: @pow_f64(
 386 ; CHECK-AVX512-VF8:    [[TMP8:%.*]] = call <8 x double> @amd_vrd8_pow(<8 x double> [[TMP4:%.*]], <8 x double> [[WIDE_LOAD:%.*]])
 387 ; CHECK-AVX512-VF8:    ret void
 388 ;
 389 entry:
 390   br label %for.body
 391
 392 for.body:
 393   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 394   %tmp = trunc i64 %iv to i32
 395   %conv = sitofp i32 %tmp to double
 396   %arrayidx = getelementptr inbounds double, ptr %exp, i64 %iv
 397   %tmp1 = load double, ptr %arrayidx, align 4
 398   %tmp2 = tail call double @pow(double %conv, double %tmp1)
 399   %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %iv
 400   store double %tmp2, ptr %arrayidx2, align 4
 401   %iv.next = add nuw nsw i64 %iv, 1
 402   %exitcond = icmp eq i64 %iv.next, 1000
 403   br i1 %exitcond, label %for.end, label %for.body
 404
 405 for.end:
 406   ret void
 407 }
 408
 409 define void @pow_f64_intrinsic(ptr nocapture %varray, ptr nocapture readonly %exp) {
 410 ; CHECK-LABEL: @pow_f64_intrinsic(
 411 ; CHECK:    [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
 412 ; CHECK:    ret void
 413 ;
 414 ; CHECK-AVX512-VF8-LABEL: @pow_f64_intrinsic(
 415 ; CHECK-AVX512-VF8:    [[TMP8:%.*]] = call <8 x double> @amd_vrd8_pow(<8 x double> [[TMP4:%.*]], <8 x double> [[WIDE_LOAD:%.*]])
 416 ; CHECK-AVX512-VF8:    ret void
 417 ;
 418 entry:
 419   br label %for.body
 420
 421 for.body:
 422   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 423   %tmp = trunc i64 %iv to i32
 424   %conv = sitofp i32 %tmp to double
 425   %arrayidx = getelementptr inbounds double, ptr %exp, i64 %iv
 426   %tmp1 = load double, ptr %arrayidx, align 4
 427   %tmp2 = tail call double @llvm.pow.f64(double %conv, double %tmp1)
 428   %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %iv
 429   store double %tmp2, ptr %arrayidx2, align 4
 430   %iv.next = add nuw nsw i64 %iv, 1
 431   %exitcond = icmp eq i64 %iv.next, 1000
 432   br i1 %exitcond, label %for.end, label %for.body
 433
 434 for.end:
 435   ret void
 436 }
 437
 438 define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) {
 439 ; CHECK-LABEL: @pow_f32(
 440 ; CHECK:    [[TMP8:%.*]] = call <4 x float> @amd_vrs4_powf(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
 441 ; CHECK:    ret void
 442 ;
 443 ; CHECK-AVX512-VF16-LABEL: @pow_f32(
 444 ; CHECK-AVX512-VF16:    [[TMP8:%.*]] = call <16 x float> @amd_vrs16_powf(<16 x float> [[TMP4:%.*]], <16 x float> [[WIDE_LOAD:%.*]])
 445 ; CHECK-AVX512-VF16:    ret void
 446 ;
 447 entry:
 448   br label %for.body
 449
 450 for.body:
 451   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 452   %tmp = trunc i64 %iv to i32
 453   %conv = sitofp i32 %tmp to float
 454   %arrayidx = getelementptr inbounds float, ptr %exp, i64 %iv
 455   %tmp1 = load float, ptr %arrayidx, align 4
 456   %tmp2 = tail call float @powf(float %conv, float %tmp1)
 457   %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %iv
 458   store float %tmp2, ptr %arrayidx2, align 4
 459   %iv.next = add nuw nsw i64 %iv, 1
 460   %exitcond = icmp eq i64 %iv.next, 1000
 461   br i1 %exitcond, label %for.end, label %for.body
 462
 463 for.end:
 464   ret void
 465 }
 466
 467 define void @pow_f32_intrinsic(ptr nocapture %varray, ptr nocapture readonly %exp) {
 468 ; CHECK-LABEL: @pow_f32_intrinsic(
 469 ; CHECK:    [[TMP8:%.*]] = call <4 x float> @amd_vrs4_powf(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
 470 ; CHECK:    ret void
 471 ;
 472 ; CHECK-AVX512-VF16-LABEL: @pow_f32_intrinsic(
 473 ; CHECK-AVX512-VF16:    [[TMP8:%.*]] = call <16 x float> @amd_vrs16_powf(<16 x float> [[TMP4:%.*]], <16 x float> [[WIDE_LOAD:%.*]])
 474 ; CHECK-AVX512-VF16:    ret void
 475 ;
 476 entry:
 477   br label %for.body
 478
 479 for.body:
 480   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 481   %tmp = trunc i64 %iv to i32
 482   %conv = sitofp i32 %tmp to float
 483   %arrayidx = getelementptr inbounds float, ptr %exp, i64 %iv
 484   %tmp1 = load float, ptr %arrayidx, align 4
 485   %tmp2 = tail call float @llvm.pow.f32(float %conv, float %tmp1)
 486   %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %iv
 487   store float %tmp2, ptr %arrayidx2, align 4
 488   %iv.next = add nuw nsw i64 %iv, 1
 489   %exitcond = icmp eq i64 %iv.next, 1000
 490   br i1 %exitcond, label %for.end, label %for.body
 491
 492 for.end:
 493   ret void
 494 }
 495
 496 define void @exp_f64(ptr nocapture %varray) {
 497 ; CHECK-LABEL: @exp_f64(
 498 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp(<4 x double> [[TMP4:%.*]])
 499 ; CHECK:    ret void
 500 ;
 501 ; CHECK-AVX512-VF8-LABEL: @exp_f64(
 502 ; CHECK-AVX512-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp(<8 x double> [[TMP4:%.*]])
 503 ; CHECK-AVX512-VF8:    ret void
 504 ;
 505 entry:
 506   br label %for.body
 507
 508 for.body:
 509   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 510   %tmp = trunc i64 %iv to i32
 511   %conv = sitofp i32 %tmp to double
 512   %call = tail call double @exp(double %conv)
 513   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 514   store double %call, ptr %arrayidx, align 4
 515   %iv.next = add nuw nsw i64 %iv, 1
 516   %exitcond = icmp eq i64 %iv.next, 1000
 517   br i1 %exitcond, label %for.end, label %for.body
 518
 519 for.end:
 520   ret void
 521 }
 522
 523 define void @exp_f32(ptr nocapture %varray) {
 524 ; CHECK-LABEL: @exp_f32(
 525 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_expf(<4 x float> [[TMP4:%.*]])
 526 ; CHECK:    ret void
 527 ;
 528 ; CHECK-AVX512-VF16-LABEL: @exp_f32(
 529 ; CHECK-AVX512-VF16:    [[TMP5:%.*]] = call <16 x float> @amd_vrs16_expf(<16 x float> [[TMP4:%.*]])
 530 ; CHECK-AVX512-VF16:    ret void
 531 ;
 532 entry:
 533   br label %for.body
 534
 535 for.body:
 536   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 537   %tmp = trunc i64 %iv to i32
 538   %conv = sitofp i32 %tmp to float
 539   %call = tail call float @expf(float %conv)
 540   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 541   store float %call, ptr %arrayidx, align 4
 542   %iv.next = add nuw nsw i64 %iv, 1
 543   %exitcond = icmp eq i64 %iv.next, 1000
 544   br i1 %exitcond, label %for.end, label %for.body
 545
 546 for.end:
 547   ret void
 548 }
 549
 550 define void @exp_f64_intrinsic(ptr nocapture %varray) {
 551 ; CHECK-LABEL: @exp_f64_intrinsic(
 552 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp(<4 x double> [[TMP4:%.*]])
 553 ; CHECK:    ret void
 554 ;
 555 ; CHECK-AVX512-VF8-LABEL: @exp_f64_intrinsic(
 556 ; CHECK-AVX512-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp(<8 x double> [[TMP4:%.*]])
 557 ; CHECK-AVX512-VF8:    ret void
 558 ;
 559 entry:
 560   br label %for.body
 561
 562 for.body:
 563   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 564   %tmp = trunc i64 %iv to i32
 565   %conv = sitofp i32 %tmp to double
 566   %call = tail call double @llvm.exp.f64(double %conv)
 567   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 568   store double %call, ptr %arrayidx, align 4
 569   %iv.next = add nuw nsw i64 %iv, 1
 570   %exitcond = icmp eq i64 %iv.next, 1000
 571   br i1 %exitcond, label %for.end, label %for.body
 572
 573 for.end:
 574   ret void
 575 }
 576
 577 define void @exp_f32_intrinsic(ptr nocapture %varray) {
 578 ; CHECK-LABEL: @exp_f32_intrinsic(
 579 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_expf(<4 x float> [[TMP4:%.*]])
 580 ; CHECK:    ret void
 581 ;
 582 ; CHECK-AVX512-VF16-LABEL: @exp_f32_intrinsic(
 583 ; CHECK-AVX512-VF16:    [[TMP5:%.*]] = call <16 x float> @amd_vrs16_expf(<16 x float> [[TMP4:%.*]])
 584 ; CHECK-AVX512-VF16:    ret void
 585 ;
 586 entry:
 587   br label %for.body
 588
 589 for.body:
 590   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 591   %tmp = trunc i64 %iv to i32
 592   %conv = sitofp i32 %tmp to float
 593   %call = tail call float @llvm.exp.f32(float %conv)
 594   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 595   store float %call, ptr %arrayidx, align 4
 596   %iv.next = add nuw nsw i64 %iv, 1
 597   %exitcond = icmp eq i64 %iv.next, 1000
 598   br i1 %exitcond, label %for.end, label %for.body
 599
 600 for.end:
 601   ret void
 602 }
 603
 604 define void @log_f64(ptr nocapture %varray) {
 605 ; CHECK-LABEL: @log_f64(
 606 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log(<4 x double> [[TMP4:%.*]])
 607 ; CHECK:    ret void
 608 ;
 609 ; CHECK-AVX512-VF8-LABEL: @log_f64(
 610 ; CHECK-AVX512-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log(<8 x double> [[TMP4:%.*]])
 611 ; CHECK-AVX512-VF8:    ret void
 612 ;
 613 entry:
 614   br label %for.body
 615
 616 for.body:
 617   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 618   %tmp = trunc i64 %iv to i32
 619   %conv = sitofp i32 %tmp to double
 620   %call = tail call double @log(double %conv)
 621   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 622   store double %call, ptr %arrayidx, align 4
 623   %iv.next = add nuw nsw i64 %iv, 1
 624   %exitcond = icmp eq i64 %iv.next, 1000
 625   br i1 %exitcond, label %for.end, label %for.body
 626
 627 for.end:
 628   ret void
 629 }
 630
 631 define void @log_f32(ptr nocapture %varray) {
 632 ; CHECK-LABEL: @log_f32(
 633 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_logf(<4 x float> [[TMP4:%.*]])
 634 ; CHECK:    ret void
 635 ;
 636 ; CHECK-AVX512-VF16-LABEL: @log_f32(
 637 ; CHECK-AVX512-VF16:    [[TMP5:%.*]] = call <16 x float> @amd_vrs16_logf(<16 x float> [[TMP4:%.*]])
 638 ; CHECK-AVX512-VF16:    ret void
 639 ;
 640 entry:
 641   br label %for.body
 642
 643 for.body:
 644   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 645   %tmp = trunc i64 %iv to i32
 646   %conv = sitofp i32 %tmp to float
 647   %call = tail call float @logf(float %conv)
 648   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 649   store float %call, ptr %arrayidx, align 4
 650   %iv.next = add nuw nsw i64 %iv, 1
 651   %exitcond = icmp eq i64 %iv.next, 1000
 652   br i1 %exitcond, label %for.end, label %for.body
 653
 654 for.end:
 655   ret void
 656 }
 657
 658 define void @log_f64_intrinsic(ptr nocapture %varray) {
 659 ; CHECK-LABEL: @log_f64_intrinsic(
 660 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log(<4 x double> [[TMP4:%.*]])
 661 ; CHECK:    ret void
 662 ;
 663 ; CHECK-AVX512-VF8-LABEL: @log_f64_intrinsic(
 664 ; CHECK-AVX512-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log(<8 x double> [[TMP4:%.*]])
 665 ; CHECK-AVX512-VF8:    ret void
 666 ;
 667 entry:
 668   br label %for.body
 669
 670 for.body:
 671   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 672   %tmp = trunc i64 %iv to i32
 673   %conv = sitofp i32 %tmp to double
 674   %call = tail call double @llvm.log.f64(double %conv)
 675   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 676   store double %call, ptr %arrayidx, align 4
 677   %iv.next = add nuw nsw i64 %iv, 1
 678   %exitcond = icmp eq i64 %iv.next, 1000
 679   br i1 %exitcond, label %for.end, label %for.body
 680
 681 for.end:
 682   ret void
 683 }
 684
 685 define void @log_f32_intrinsic(ptr nocapture %varray) {
 686 ; CHECK-LABEL: @log_f32_intrinsic(
 687 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_logf(<4 x float> [[TMP4:%.*]])
 688 ; CHECK:    ret void
 689 ;
 690 ; CHECK-AVX512-VF16-LABEL: @log_f32_intrinsic(
 691 ; CHECK-AVX512-VF16:    [[TMP5:%.*]] = call <16 x float> @amd_vrs16_logf(<16 x float> [[TMP4:%.*]])
 692 ; CHECK-AVX512-VF16:    ret void
 693 ;
 694 entry:
 695   br label %for.body
 696
 697 for.body:
 698   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 699   %tmp = trunc i64 %iv to i32
 700   %conv = sitofp i32 %tmp to float
 701   %call = tail call float @llvm.log.f32(float %conv)
 702   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 703   store float %call, ptr %arrayidx, align 4
 704   %iv.next = add nuw nsw i64 %iv, 1
 705   %exitcond = icmp eq i64 %iv.next, 1000
 706   br i1 %exitcond, label %for.end, label %for.body
 707
 708 for.end:
 709   ret void
 710 }
 711
 712 define void @log2_f64(ptr nocapture %varray) {
 713 ; CHECK-LABEL: @log2_f64(
 714 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log2(<4 x double> [[TMP4:%.*]])
 715 ; CHECK:    ret void
 716 ;
 717 ; CHECK-AVX512-VF8-LABEL: @log2_f64(
 718 ; CHECK-AVX512-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log2(<8 x double> [[TMP4:%.*]])
 719 ; CHECK-AVX512-VF8:    ret void
 720 ;
 721 entry:
 722   br label %for.body
 723
 724 for.body:
 725   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 726   %tmp = trunc i64 %iv to i32
 727   %conv = sitofp i32 %tmp to double
 728   %call = tail call double @log2(double %conv)
 729   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 730   store double %call, ptr %arrayidx, align 4
 731   %iv.next = add nuw nsw i64 %iv, 1
 732   %exitcond = icmp eq i64 %iv.next, 1000
 733   br i1 %exitcond, label %for.end, label %for.body
 734
 735 for.end:
 736   ret void
 737 }
 738
 739 define void @log2_f32(ptr nocapture %varray) {
 740 ; CHECK-LABEL: @log2_f32(
 741 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log2f(<4 x float> [[TMP4:%.*]])
 742 ; CHECK:    ret void
 743 ;
 744 ; CHECK-AVX512-VF16-LABEL: @log2_f32(
 745 ; CHECK-AVX512-VF16:    [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log2f(<16 x float> [[TMP4:%.*]])
 746 ; CHECK-AVX512-VF16:    ret void
 747 ;
 748 entry:
 749   br label %for.body
 750
 751 for.body:
 752   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 753   %tmp = trunc i64 %iv to i32
 754   %conv = sitofp i32 %tmp to float
 755   %call = tail call float @log2f(float %conv)
 756   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 757   store float %call, ptr %arrayidx, align 4
 758   %iv.next = add nuw nsw i64 %iv, 1
 759   %exitcond = icmp eq i64 %iv.next, 1000
 760   br i1 %exitcond, label %for.end, label %for.body
 761
 762 for.end:
 763   ret void
 764 }
 765
 766 define void @log2_f64_intrinsic(ptr nocapture %varray) {
 767 ; CHECK-LABEL: @log2_f64_intrinsic(
 768 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log2(<4 x double> [[TMP4:%.*]])
 769 ; CHECK:    ret void
 770 ;
 771 ; CHECK-AVX512-VF8-LABEL: @log2_f64_intrinsic(
 772 ; CHECK-AVX512-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log2(<8 x double> [[TMP4:%.*]])
 773 ; CHECK-AVX512-VF8:    ret void
 774 ;
 775 entry:
 776   br label %for.body
 777
 778 for.body:
 779   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 780   %tmp = trunc i64 %iv to i32
 781   %conv = sitofp i32 %tmp to double
 782   %call = tail call double @llvm.log2.f64(double %conv)
 783   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 784   store double %call, ptr %arrayidx, align 4
 785   %iv.next = add nuw nsw i64 %iv, 1
 786   %exitcond = icmp eq i64 %iv.next, 1000
 787   br i1 %exitcond, label %for.end, label %for.body
 788
 789 for.end:
 790   ret void
 791 }
 792
 793 define void @log2_f32_intrinsic(ptr nocapture %varray) {
 794 ; CHECK-LABEL: @log2_f32_intrinsic(
 795 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log2f(<4 x float> [[TMP4:%.*]])
 796 ; CHECK:    ret void
 797 ;
 798 ; CHECK-AVX512-VF16-LABEL: @log2_f32_intrinsic(
 799 ; CHECK-AVX512-VF16:    [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log2f(<16 x float> [[TMP4:%.*]])
 800 ; CHECK-AVX512-VF16:    ret void
 801 ;
 802 entry:
 803   br label %for.body
 804
 805 for.body:
 806   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 807   %tmp = trunc i64 %iv to i32
 808   %conv = sitofp i32 %tmp to float
 809   %call = tail call float @llvm.log2.f32(float %conv)
 810   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 811   store float %call, ptr %arrayidx, align 4
 812   %iv.next = add nuw nsw i64 %iv, 1
 813   %exitcond = icmp eq i64 %iv.next, 1000
 814   br i1 %exitcond, label %for.end, label %for.body
 815
 816 for.end:
 817   ret void
 818 }
 819
 820 define void @log10_f32(ptr nocapture %varray) {
 821 ; CHECK-LABEL: @log10_f32(
 822 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log10f(<4 x float> [[TMP4:%.*]])
 823 ; CHECK:    ret void
 824 ;
 825 ; CHECK-AVX512-VF16-LABEL: @log10_f32(
 826 ; CHECK-AVX512-VF16:    [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log10f(<16 x float> [[TMP4:%.*]])
 827 ; CHECK-AVX512-VF16:    ret void
 828 ;
 829 entry:
 830   br label %for.body
 831
 832 for.body:
 833   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 834   %tmp = trunc i64 %iv to i32
 835   %conv = sitofp i32 %tmp to float
 836   %call = tail call float @log10f(float %conv)
 837   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 838   store float %call, ptr %arrayidx, align 4
 839   %iv.next = add nuw nsw i64 %iv, 1
 840   %exitcond = icmp eq i64 %iv.next, 1000
 841   br i1 %exitcond, label %for.end, label %for.body
 842
 843 for.end:
 844   ret void
 845 }
 846
 847 define void @log10_f32_intrinsic(ptr nocapture %varray) {
 848 ; CHECK-LABEL: @log10_f32_intrinsic(
 849 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log10f(<4 x float> [[TMP4:%.*]])
 850 ; CHECK:    ret void
 851 ;
 852 ; CHECK-AVX512-VF16-LABEL: @log10_f32_intrinsic(
 853 ; CHECK-AVX512-VF16:    [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log10f(<16 x float> [[TMP4:%.*]])
 854 ; CHECK-AVX512-VF16:    ret void
 855 ;
 856 entry:
 857   br label %for.body
 858
 859 for.body:
 860   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 861   %tmp = trunc i64 %iv to i32
 862   %conv = sitofp i32 %tmp to float
 863   %call = tail call float @llvm.log10.f32(float %conv)
 864   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 865   store float %call, ptr %arrayidx, align 4
 866   %iv.next = add nuw nsw i64 %iv, 1
 867   %exitcond = icmp eq i64 %iv.next, 1000
 868   br i1 %exitcond, label %for.end, label %for.body
 869
 870 for.end:
 871   ret void
 872 }
 873
 874 define void @exp2_f64(ptr nocapture %varray) {
 875 ; CHECK-LABEL: @exp2_f64(
 876 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp2(<4 x double> [[TMP4:%.*]])
 877 ; CHECK:    ret void
 878 ;
 879 ; CHECK-AVX512-VF8-LABEL: @exp2_f64(
 880 ; CHECK-AVX512-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp2(<8 x double> [[TMP4:%.*]])
 881 ; CHECK-AVX512-VF8:    ret void
 882 ;
 883 entry:
 884   br label %for.body
 885
 886 for.body:
 887   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 888   %tmp = trunc i64 %iv to i32
 889   %conv = sitofp i32 %tmp to double
 890   %call = tail call double @exp2(double %conv)
 891   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 892   store double %call, ptr %arrayidx, align 4
 893   %iv.next = add nuw nsw i64 %iv, 1
 894   %exitcond = icmp eq i64 %iv.next, 1000
 895   br i1 %exitcond, label %for.end, label %for.body
 896
 897 for.end:
 898   ret void
 899 }
 900
 901 define void @exp2_f32(ptr nocapture %varray) {
 902 ; CHECK-LABEL: @exp2_f32(
 903 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp2f(<4 x float> [[TMP4:%.*]])
 904 ; CHECK:    ret void
 905 ;
 906 ; CHECK-AVX512-VF16-LABEL: @exp2_f32(
 907 ; CHECK-AVX512-VF16:    [[TMP5:%.*]] = call <16 x float> @amd_vrs16_exp2f(<16 x float> [[TMP4:%.*]])
 908 ; CHECK-AVX512-VF16:    ret void
 909 ;
 910 entry:
 911   br label %for.body
 912
 913 for.body:
 914   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 915   %tmp = trunc i64 %iv to i32
 916   %conv = sitofp i32 %tmp to float
 917   %call = tail call float @exp2f(float %conv)
 918   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 919   store float %call, ptr %arrayidx, align 4
 920   %iv.next = add nuw nsw i64 %iv, 1
 921   %exitcond = icmp eq i64 %iv.next, 1000
 922   br i1 %exitcond, label %for.end, label %for.body
 923
 924 for.end:
 925   ret void
 926 }
 927
 928 define void @exp2_f64_intrinsic(ptr nocapture %varray) {
 929 ; CHECK-LABEL: @exp2_f64_intrinsic(
 930 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp2(<4 x double> [[TMP4:%.*]])
 931 ; CHECK:    ret void
 932 ;
 933 ; CHECK-AVX512-VF8-LABEL: @exp2_f64_intrinsic(
 934 ; CHECK-AVX512-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp2(<8 x double> [[TMP4:%.*]])
 935 ; CHECK-AVX512-VF8:    ret void
 936 ;
 937 entry:
 938   br label %for.body
 939
 940 for.body:
 941   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 942   %tmp = trunc i64 %iv to i32
 943   %conv = sitofp i32 %tmp to double
 944   %call = tail call double @llvm.exp2.f64(double %conv)
 945   %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
 946   store double %call, ptr %arrayidx, align 4
 947   %iv.next = add nuw nsw i64 %iv, 1
 948   %exitcond = icmp eq i64 %iv.next, 1000
 949   br i1 %exitcond, label %for.end, label %for.body
 950
 951 for.end:
 952   ret void
 953 }
 954
 955 define void @exp2_f32_intrinsic(ptr nocapture %varray) {
 956 ; CHECK-LABEL: @exp2_f32_intrinsic(
 957 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp2f(<4 x float> [[TMP4:%.*]])
 958 ; CHECK:    ret void
 959 ;
 960 ; CHECK-AVX512-VF16-LABEL: @exp2_f32_intrinsic(
 961 ; CHECK-AVX512-VF16:    [[TMP5:%.*]] = call <16 x float> @amd_vrs16_exp2f(<16 x float> [[TMP4:%.*]])
 962 ; CHECK-AVX512-VF16:    ret void
 963 ;
 964 entry:
 965   br label %for.body
 966
 967 for.body:
 968   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
 969   %tmp = trunc i64 %iv to i32
 970   %conv = sitofp i32 %tmp to float
 971   %call = tail call float @llvm.exp2.f32(float %conv)
 972   %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
 973   store float %call, ptr %arrayidx, align 4
 974   %iv.next = add nuw nsw i64 %iv, 1
 975   %exitcond = icmp eq i64 %iv.next, 1000
 976   br i1 %exitcond, label %for.end, label %for.body
 977
 978 for.end:
 979   ret void
 980 }
 981
 982 attributes #0 = { nounwind readnone }