llvm/test/Analysis/CostModel/ARM/reduce-fp.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
   2 ; RUN: opt < %s -mtriple=armv8a-linux-gnueabihf -mattr=+fp64 -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-V8
   3 ; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-MVEFP
   4 ; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-MVEI
   5
   6 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
   7
   8 define void @fadd_strict() {
   9 ; CHECK-V8-LABEL: 'fadd_strict'
  10 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
  11 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
  12 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
  13 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
  14 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
  15 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
  16 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
  17 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
  18 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
  19 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
  20 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  21 ;
  22 ; CHECK-MVEFP-LABEL: 'fadd_strict'
  23 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
  24 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
  25 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
  26 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
  27 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
  28 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
  29 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
  30 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
  31 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
  32 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
  33 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  34 ;
  35 ; CHECK-MVEI-LABEL: 'fadd_strict'
  36 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
  37 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
  38 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
  39 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
  40 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
  41 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
  42 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
  43 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
  44 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
  45 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
  46 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  47 ;
  48   %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
  49   %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
  50   %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
  51   %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)
  52   %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef)
  53   %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
  54   %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)
  55   %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)
  56   %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef)
  57   %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
  58   ret void
  59 }
  60
  61
  62 define void @fadd_unordered() {
  63 ; CHECK-V8-LABEL: 'fadd_unordered'
  64 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
  65 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
  66 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
  67 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
  68 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
  69 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
  70 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
  71 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
  72 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
  73 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
  74 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  75 ;
  76 ; CHECK-MVEFP-LABEL: 'fadd_unordered'
  77 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
  78 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
  79 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
  80 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
  81 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
  82 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
  83 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
  84 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
  85 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
  86 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
  87 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  88 ;
  89 ; CHECK-MVEI-LABEL: 'fadd_unordered'
  90 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
  91 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
  92 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 169 for instruction: %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
  93 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 225 for instruction: %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
  94 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
  95 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
  96 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 85 for instruction: %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
  97 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
  98 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
  99 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
 100 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 101 ;
 102   %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
 103   %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
 104   %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
 105   %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)
 106   %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef)
 107   %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
 108   %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)
 109   %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)
 110   %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef)
 111   %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
 112   ret void
 113 }
 114
 115 define void @fmul_strict() {
 116 ; CHECK-V8-LABEL: 'fmul_strict'
 117 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
 118 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
 119 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
 120 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
 121 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
 122 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
 123 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
 124 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
 125 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
 126 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
 127 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 128 ;
 129 ; CHECK-MVEFP-LABEL: 'fmul_strict'
 130 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
 131 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
 132 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
 133 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
 134 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
 135 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
 136 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
 137 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
 138 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
 139 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
 140 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 141 ;
 142 ; CHECK-MVEI-LABEL: 'fmul_strict'
 143 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
 144 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
 145 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
 146 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
 147 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
 148 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
 149 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
 150 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
 151 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
 152 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
 153 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 154 ;
 155   %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0.0, <2 x half> undef)
 156   %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0.0, <4 x half> undef)
 157   %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0.0, <8 x half> undef)
 158   %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0.0, <16 x half> undef)
 159   %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.0, <2 x float> undef)
 160   %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef)
 161   %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef)
 162   %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef)
 163   %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef)
 164   %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
 165   ret void
 166 }
 167
 168
 169 define void @fmul_unordered() {
 170 ; CHECK-V8-LABEL: 'fmul_unordered'
 171 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
 172 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
 173 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
 174 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
 175 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
 176 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
 177 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
 178 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
 179 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
 180 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
 181 ; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 182 ;
 183 ; CHECK-MVEFP-LABEL: 'fmul_unordered'
 184 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
 185 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
 186 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
 187 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
 188 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
 189 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
 190 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
 191 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
 192 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
 193 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
 194 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 195 ;
 196 ; CHECK-MVEI-LABEL: 'fmul_unordered'
 197 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
 198 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
 199 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 169 for instruction: %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
 200 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 225 for instruction: %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
 201 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
 202 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
 203 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 85 for instruction: %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
 204 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
 205 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
 206 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
 207 ; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 208 ;
 209   %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0.0, <2 x half> undef)
 210   %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0.0, <4 x half> undef)
 211   %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0.0, <8 x half> undef)
 212   %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0.0, <16 x half> undef)
 213   %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.0, <2 x float> undef)
 214   %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef)
 215   %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef)
 216   %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef)
 217   %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef)
 218   %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
 219   ret void
 220 }
 221
 222
 223 declare half @llvm.vector.reduce.fadd.v2f16(half, <2 x half>)
 224 declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>)
 225 declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>)
 226 declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>)
 227 declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>)
 228 declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
 229 declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>)
 230 declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)
 231 declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>)
 232 declare fp128 @llvm.vector.reduce.fadd.v4f128(fp128, <4 x fp128>)
 233
 234
 235 declare half @llvm.vector.reduce.fmul.v2f16(half, <2 x half>)
 236 declare half @llvm.vector.reduce.fmul.v4f16(half, <4 x half>)
 237 declare half @llvm.vector.reduce.fmul.v8f16(half, <8 x half>)
 238 declare half @llvm.vector.reduce.fmul.v16f16(half, <16 x half>)
 239 declare float @llvm.vector.reduce.fmul.v2f32(float, <2 x float>)
 240 declare float @llvm.vector.reduce.fmul.v4f32(float, <4 x float>)
 241 declare float @llvm.vector.reduce.fmul.v8f32(float, <8 x float>)
 242 declare double @llvm.vector.reduce.fmul.v2f64(double, <2 x double>)
 243 declare double @llvm.vector.reduce.fmul.v4f64(double, <4 x double>)
 244 declare fp128 @llvm.vector.reduce.fmul.v4f128(fp128, <4 x fp128>)