llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
   2 ; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -mtriple=aarch64--linux-gnu < %s | FileCheck %s
   3 ; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -mtriple=aarch64--linux-gnu -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=FP16
   4 ; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -mtriple=aarch64--linux-gnu -mattr=+bf16 < %s | FileCheck %s --check-prefix=BF16
   5
   6 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
   7
   8 define void @strict_fp_reductions() {
   9 ; CHECK-LABEL: 'strict_fp_reductions'
  10 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
  11 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
  12 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
  13 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
  14 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
  15 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
  16 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
  17 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
  18 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
  19 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
  20 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
  21 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  22 ;
  23 ; FP16-LABEL: 'strict_fp_reductions'
  24 ; FP16-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
  25 ; FP16-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
  26 ; FP16-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
  27 ; FP16-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
  28 ; FP16-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
  29 ; FP16-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
  30 ; FP16-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
  31 ; FP16-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
  32 ; FP16-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
  33 ; FP16-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
  34 ; FP16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
  35 ; FP16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  36 ;
  37 ; BF16-LABEL: 'strict_fp_reductions'
  38 ; BF16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
  39 ; BF16-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
  40 ; BF16-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
  41 ; BF16-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
  42 ; BF16-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
  43 ; BF16-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
  44 ; BF16-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
  45 ; BF16-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
  46 ; BF16-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
  47 ; BF16-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
  48 ; BF16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
  49 ; BF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  50 ;
  51   %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
  52   %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
  53   %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
  54   %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)
  55   %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef)
  56   %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
  57   %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)
  58   %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)
  59   %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef)
  60   %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4f8(bfloat 0.0, <4 x bfloat> undef)
  61   %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
  62
  63   ret void
  64 }
  65
  66
  67 define void @fast_fp_reductions() {
  68 ; CHECK-LABEL: 'fast_fp_reductions'
  69 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
  70 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
  71 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
  72 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
  73 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
  74 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
  75 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
  76 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
  77 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
  78 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
  79 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f32 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
  80 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
  81 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
  82 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
  83 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
  84 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v8f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
  85 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fadd_v13f32 = call fast float @llvm.vector.reduce.fadd.v13f32(float 0.000000e+00, <13 x float> undef)
  86 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v5f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v5f32(float 0.000000e+00, <5 x float> undef)
  87 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
  88 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
  89 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
  90 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
  91 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v7f64 = call fast double @llvm.vector.reduce.fadd.v7f64(double 0.000000e+00, <7 x double> undef)
  92 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v9f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v9f64(double 0.000000e+00, <9 x double> undef)
  93 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
  94 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
  95 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  96 ;
  97 ; FP16-LABEL: 'fast_fp_reductions'
  98 ; FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
  99 ; FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
 100 ; FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
 101 ; FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
 102 ; FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
 103 ; FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
 104 ; FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
 105 ; FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
 106 ; FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
 107 ; FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
 108 ; FP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f32 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
 109 ; FP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
 110 ; FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
 111 ; FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
 112 ; FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
 113 ; FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v8f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
 114 ; FP16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fadd_v13f32 = call fast float @llvm.vector.reduce.fadd.v13f32(float 0.000000e+00, <13 x float> undef)
 115 ; FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v5f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v5f32(float 0.000000e+00, <5 x float> undef)
 116 ; FP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
 117 ; FP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
 118 ; FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
 119 ; FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
 120 ; FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v7f64 = call fast double @llvm.vector.reduce.fadd.v7f64(double 0.000000e+00, <7 x double> undef)
 121 ; FP16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v9f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v9f64(double 0.000000e+00, <9 x double> undef)
 122 ; FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
 123 ; FP16-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
 124 ; FP16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 125 ;
 126 ; BF16-LABEL: 'fast_fp_reductions'
 127 ; BF16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
 128 ; BF16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
 129 ; BF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
 130 ; BF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
 131 ; BF16-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
 132 ; BF16-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
 133 ; BF16-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
 134 ; BF16-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
 135 ; BF16-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
 136 ; BF16-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
 137 ; BF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f32 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
 138 ; BF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
 139 ; BF16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
 140 ; BF16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
 141 ; BF16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
 142 ; BF16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v8f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
 143 ; BF16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fadd_v13f32 = call fast float @llvm.vector.reduce.fadd.v13f32(float 0.000000e+00, <13 x float> undef)
 144 ; BF16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v5f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v5f32(float 0.000000e+00, <5 x float> undef)
 145 ; BF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
 146 ; BF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
 147 ; BF16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
 148 ; BF16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
 149 ; BF16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v7f64 = call fast double @llvm.vector.reduce.fadd.v7f64(double 0.000000e+00, <7 x double> undef)
 150 ; BF16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v9f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v9f64(double 0.000000e+00, <9 x double> undef)
 151 ; BF16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
 152 ; BF16-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
 153 ; BF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 154 ;
 155   %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
 156   %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
 157
 158   %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
 159   %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
 160
 161   %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
 162   %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
 163
 164   %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)
 165   %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)
 166
 167   %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0.0, <11 x half> undef)
 168   %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0.0, <13 x half> undef)
 169
 170   %fadd_v2f32 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef)
 171   %fadd_v2f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef)
 172
 173   %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
 174   %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
 175
 176   %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)
 177   %fadd_v8f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)
 178
 179   %fadd_v13f32 = call fast float @llvm.vector.reduce.fadd.v13f32(float 0.0, <13 x float> undef)
 180   %fadd_v5f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v5f32(float 0.0, <5 x float> undef)
 181
 182   %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)
 183   %fadd_v2f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)
 184
 185   %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef)
 186   %fadd_v4f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef)
 187
 188   %fadd_v7f64 = call fast double @llvm.vector.reduce.fadd.v7f64(double 0.0, <7 x double> undef)
 189   %fadd_v9f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v9f64(double 0.0, <9 x double> undef)
 190
 191   %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4f8(bfloat -0.0, <4 x bfloat> undef)
 192   %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
 193
 194   ret void
 195 }
 196
 197 declare bfloat @llvm.vector.reduce.fadd.v4f8(bfloat, <4 x bfloat>)
 198 declare fp128 @llvm.vector.reduce.fadd.v4f128(fp128, <4 x fp128>)
 199
 200 declare half @llvm.vector.reduce.fadd.v2f16(half, <2 x half>)
 201 declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>)
 202 declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>)
 203 declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>)
 204 declare half @llvm.vector.reduce.fadd.v11f16(half, <11 x half>)
 205 declare half @llvm.vector.reduce.fadd.v13f16(half, <13 x half>)
 206
 207 declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>)
 208 declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
 209 declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>)
 210 declare float @llvm.vector.reduce.fadd.v13f32(float, <13 x float>)
 211 declare float @llvm.vector.reduce.fadd.v5f32(float, <5 x float>)
 212
 213 declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)
 214 declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>)
 215 declare double @llvm.vector.reduce.fadd.v7f64(double, <7 x double>)
 216 declare double @llvm.vector.reduce.fadd.v9f64(double, <9 x double>)