llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
   2 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
   3 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
   4 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s  --check-prefix=SIZE
   5
   6 define void @reduce_fmul_bfloat() {
   7 ; FP-REDUCE-LABEL: 'reduce_fmul_bfloat'
   8 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
   9 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2 = call fast bfloat @llvm.vector.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
  10 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call fast bfloat @llvm.vector.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
  11 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8 = call fast bfloat @llvm.vector.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef)
  12 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %V16 = call fast bfloat @llvm.vector.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef)
  13 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 211 for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
  14 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
  15 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
  16 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
  17 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
  18 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
  19 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
  20 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
  21 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
  22 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  23 ;
  24 ; SIZE-LABEL: 'reduce_fmul_bfloat'
  25 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
  26 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call fast bfloat @llvm.vector.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
  27 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call fast bfloat @llvm.vector.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
  28 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8 = call fast bfloat @llvm.vector.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef)
  29 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16 = call fast bfloat @llvm.vector.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef)
  30 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
  31 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
  32 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
  33 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
  34 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
  35 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
  36 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
  37 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
  38 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
  39 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
  40 ;
  41   %V1 = call fast bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef)
  42   %V2 = call fast bfloat @llvm.vector.reduce.fmul.v2bf16(bfloat 0.0, <2 x bfloat> undef)
  43   %V4 = call fast bfloat @llvm.vector.reduce.fmul.v4bf16(bfloat 0.0, <4 x bfloat> undef)
  44   %V8 = call fast bfloat @llvm.vector.reduce.fmul.v8bf16(bfloat 0.0, <8 x bfloat> undef)
  45   %V16 = call fast bfloat @llvm.vector.reduce.fmul.v16bf16(bfloat 0.0, <16 x bfloat> undef)
  46   %v32 = call fast bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0.0, <32 x bfloat> undef)
  47   %V64 = call fast bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0.0, <64 x bfloat> undef)
  48   %V128 = call fast bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0.0, <128 x bfloat> undef)
  49   %NXV1 = call fast bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0.0, <vscale x 1 x bfloat> undef)
  50   %NXV2 = call fast bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0.0, <vscale x 2 x bfloat> undef)
  51   %NXV4 = call fast bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0.0, <vscale x 4 x bfloat> undef)
  52   %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef)
  53   %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef)
  54   %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef)
  55   ret void
  56 }
  57
  58 define void @reduce_fmul_half() {
  59 ; FP-REDUCE-ZVFH-LABEL: 'reduce_fmul_half'
  60 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast half @llvm.vector.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef)
  61 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call fast half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
  62 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4 = call fast half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
  63 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8 = call fast half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
  64 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V16 = call fast half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
  65 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 151 for instruction: %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
  66 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
  67 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
  68 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
  69 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
  70 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
  71 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
  72 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
  73 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
  74 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  75 ;
  76 ; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_fmul_half'
  77 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast half @llvm.vector.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef)
  78 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2 = call fast half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
  79 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call fast half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
  80 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8 = call fast half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
  81 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %V16 = call fast half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
  82 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 211 for instruction: %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
  83 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
  84 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
  85 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
  86 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
  87 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
  88 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
  89 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
  90 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
  91 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  92 ;
  93 ; SIZE-LABEL: 'reduce_fmul_half'
  94 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast half @llvm.vector.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef)
  95 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call fast half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
  96 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call fast half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
  97 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8 = call fast half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
  98 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16 = call fast half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
  99 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
 100 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
 101 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
 102 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
 103 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
 104 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
 105 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
 106 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
 107 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
 108 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 109 ;
 110   %V1 = call fast half @llvm.vector.reduce.fmul.v1f16(half 0.0, <1 x half> undef)
 111   %V2 = call fast half @llvm.vector.reduce.fmul.v2f16(half 0.0, <2 x half> undef)
 112   %V4 = call fast half @llvm.vector.reduce.fmul.v4f16(half 0.0, <4 x half> undef)
 113   %V8 = call fast half @llvm.vector.reduce.fmul.v8f16(half 0.0, <8 x half> undef)
 114   %V16 = call fast half @llvm.vector.reduce.fmul.v16f16(half 0.0, <16 x half> undef)
 115   %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0.0, <32 x half> undef)
 116   %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0.0, <64 x half> undef)
 117   %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0.0, <128 x half> undef)
 118   %NXV1 = call fast half @llvm.vector.reduce.fmul.nxv1f16(half 0.0, <vscale x 1 x half> undef)
 119   %NXV2 = call fast half @llvm.vector.reduce.fmul.nxv2f16(half 0.0, <vscale x 2 x half> undef)
 120   %NXV4 = call fast half @llvm.vector.reduce.fmul.nxv4f16(half 0.0, <vscale x 4 x half> undef)
 121   %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0.0, <vscale x 8 x half> undef)
 122   %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0.0, <vscale x 16 x half> undef)
 123   %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0.0, <vscale x 32 x half> undef)
 124   ret void
 125 }
 126
 127 define void @reduce_fmul_float() {
 128 ; FP-REDUCE-LABEL: 'reduce_fmul_float'
 129 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast float @llvm.vector.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef)
 130 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call fast float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
 131 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
 132 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V8 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
 133 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 121 for instruction: %V16 = call fast float @llvm.vector.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef)
 134 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 451 for instruction: %v32 = call fast float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
 135 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 483 for instruction: %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
 136 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 547 for instruction: %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
 137 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast float @llvm.vector.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
 138 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast float @llvm.vector.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef)
 139 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
 140 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
 141 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
 142 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 143 ;
 144 ; SIZE-LABEL: 'reduce_fmul_float'
 145 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast float @llvm.vector.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef)
 146 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call fast float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
 147 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
 148 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
 149 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16 = call fast float @llvm.vector.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef)
 150 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
 151 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
 152 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
 153 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast float @llvm.vector.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
 154 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast float @llvm.vector.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef)
 155 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
 156 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
 157 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
 158 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 159 ;
 160   %V1 = call fast float @llvm.vector.reduce.fmul.v1f32(float 0.0, <1 x float> undef)
 161   %V2 = call fast float @llvm.vector.reduce.fmul.v2f32(float 0.0, <2 x float> undef)
 162   %V4 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef)
 163   %V8 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef)
 164   %V16 = call fast float @llvm.vector.reduce.fmul.v16f32(float 0.0, <16 x float> undef)
 165   %v32 = call fast float @llvm.vector.reduce.fmul.v32f32(float 0.0, <32 x float> undef)
 166   %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.0, <64 x float> undef)
 167   %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.0, <128 x float> undef)
 168   %NXV1 = call fast float @llvm.vector.reduce.fmul.nxv1f32(float 0.0, <vscale x 1 x float> undef)
 169   %NXV2 = call fast float @llvm.vector.reduce.fmul.nxv2f32(float 0.0, <vscale x 2 x float> undef)
 170   %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.0, <vscale x 4 x float> undef)
 171   %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.0, <vscale x 8 x float> undef)
 172   %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.0, <vscale x 16 x float> undef)
 173   ret void
 174 }
 175
 176 define void @reduce_fmul_double() {
 177 ; FP-REDUCE-LABEL: 'reduce_fmul_double'
 178 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast double @llvm.vector.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef)
 179 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
 180 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V4 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
 181 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %V8 = call fast double @llvm.vector.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef)
 182 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 361 for instruction: %V16 = call fast double @llvm.vector.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef)
 183 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 393 for instruction: %v32 = call fast double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
 184 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 457 for instruction: %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
 185 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 585 for instruction: %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
 186 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast double @llvm.vector.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef)
 187 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
 188 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
 189 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
 190 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 191 ;
 192 ; SIZE-LABEL: 'reduce_fmul_double'
 193 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast double @llvm.vector.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef)
 194 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
 195 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
 196 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8 = call fast double @llvm.vector.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef)
 197 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16 = call fast double @llvm.vector.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef)
 198 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v32 = call fast double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
 199 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
 200 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
 201 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast double @llvm.vector.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef)
 202 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
 203 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
 204 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
 205 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 206 ;
 207   %V1 = call fast double @llvm.vector.reduce.fmul.v1f64(double 0.0, <1 x double> undef)
 208   %V2 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef)
 209   %V4 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef)
 210   %V8 = call fast double @llvm.vector.reduce.fmul.v8f64(double 0.0, <8 x double> undef)
 211   %V16 = call fast double @llvm.vector.reduce.fmul.v16f64(double 0.0, <16 x double> undef)
 212   %v32 = call fast double @llvm.vector.reduce.fmul.v32f64(double 0.0, <32 x double> undef)
 213   %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.0, <64 x double> undef)
 214   %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.0, <128 x double> undef)
 215   %NXV1 = call fast double @llvm.vector.reduce.fmul.nxv1f64(double 0.0, <vscale x 1 x double> undef)
 216   %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.0, <vscale x 2 x double> undef)
 217   %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.0, <vscale x 4 x double> undef)
 218   %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.0, <vscale x 8 x double> undef)
 219   ret void
 220 }
 221
 222 define void @reduce_ordered_fmul_bfloat() {
 223 ; FP-REDUCE-LABEL: 'reduce_ordered_fmul_bfloat'
 224 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
 225 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call bfloat @llvm.vector.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
 226 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call bfloat @llvm.vector.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
 227 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8 = call bfloat @llvm.vector.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef)
 228 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16 = call bfloat @llvm.vector.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef)
 229 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32 = call bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
 230 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %V64 = call bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 231 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %V128 = call bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
 232 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
 233 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
 234 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
 235 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
 236 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
 237 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
 238 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 239 ;
 240 ; SIZE-LABEL: 'reduce_ordered_fmul_bfloat'
 241 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
 242 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2 = call bfloat @llvm.vector.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
 243 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call bfloat @llvm.vector.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
 244 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8 = call bfloat @llvm.vector.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef)
 245 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16 = call bfloat @llvm.vector.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef)
 246 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32 = call bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
 247 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 191 for instruction: %V64 = call bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 248 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 382 for instruction: %V128 = call bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
 249 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
 250 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
 251 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
 252 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
 253 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
 254 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
 255 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 256 ;
 257   %V1 = call bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef)
 258   %V2 = call bfloat @llvm.vector.reduce.fmul.v2bf16(bfloat 0.0, <2 x bfloat> undef)
 259   %V4 = call bfloat @llvm.vector.reduce.fmul.v4bf16(bfloat 0.0, <4 x bfloat> undef)
 260   %V8 = call bfloat @llvm.vector.reduce.fmul.v8bf16(bfloat 0.0, <8 x bfloat> undef)
 261   %V16 = call bfloat @llvm.vector.reduce.fmul.v16bf16(bfloat 0.0, <16 x bfloat> undef)
 262   %v32 = call bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0.0, <32 x bfloat> undef)
 263   %V64 = call bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0.0, <64 x bfloat> undef)
 264   %V128 = call bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0.0, <128 x bfloat> undef)
 265   %NXV1 = call bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0.0, <vscale x 1 x bfloat> undef)
 266   %NXV2 = call bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0.0, <vscale x 2 x bfloat> undef)
 267   %NXV4 = call bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0.0, <vscale x 4 x bfloat> undef)
 268   %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef)
 269   %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef)
 270   %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef)
 271   ret void
 272 }
 273
 274 define void @reduce_ordered_fmul_half() {
 275 ; FP-REDUCE-LABEL: 'reduce_ordered_fmul_half'
 276 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef)
 277 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
 278 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
 279 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
 280 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
 281 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
 282 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
 283 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
 284 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
 285 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
 286 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
 287 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
 288 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
 289 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
 290 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 291 ;
 292 ; SIZE-LABEL: 'reduce_ordered_fmul_half'
 293 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call half @llvm.vector.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef)
 294 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
 295 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
 296 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
 297 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
 298 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
 299 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 191 for instruction: %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
 300 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 382 for instruction: %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
 301 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
 302 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
 303 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
 304 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
 305 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
 306 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
 307 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 308 ;
 309   %V1 = call half @llvm.vector.reduce.fmul.v1f16(half 0.0, <1 x half> undef)
 310   %V2 = call half @llvm.vector.reduce.fmul.v2f16(half 0.0, <2 x half> undef)
 311   %V4 = call half @llvm.vector.reduce.fmul.v4f16(half 0.0, <4 x half> undef)
 312   %V8 = call half @llvm.vector.reduce.fmul.v8f16(half 0.0, <8 x half> undef)
 313   %V16 = call half @llvm.vector.reduce.fmul.v16f16(half 0.0, <16 x half> undef)
 314   %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0.0, <32 x half> undef)
 315   %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0.0, <64 x half> undef)
 316   %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0.0, <128 x half> undef)
 317   %NXV1 = call half @llvm.vector.reduce.fmul.nxv1f16(half 0.0, <vscale x 1 x half> undef)
 318   %NXV2 = call half @llvm.vector.reduce.fmul.nxv2f16(half 0.0, <vscale x 2 x half> undef)
 319   %NXV4 = call half @llvm.vector.reduce.fmul.nxv4f16(half 0.0, <vscale x 4 x half> undef)
 320   %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0.0, <vscale x 8 x half> undef)
 321   %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0.0, <vscale x 16 x half> undef)
 322   %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0.0, <vscale x 32 x half> undef)
 323   ret void
 324 }
 325
 326 define void @reduce_ordered_fmul_float() {
 327 ; FP-REDUCE-LABEL: 'reduce_ordered_fmul_float'
 328 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call float @llvm.vector.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef)
 329 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
 330 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
 331 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
 332 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16 = call float @llvm.vector.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef)
 333 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
 334 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 254 for instruction: %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
 335 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 508 for instruction: %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
 336 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call float @llvm.vector.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
 337 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call float @llvm.vector.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef)
 338 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
 339 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
 340 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
 341 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 342 ;
 343 ; SIZE-LABEL: 'reduce_ordered_fmul_float'
 344 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call float @llvm.vector.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef)
 345 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
 346 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
 347 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
 348 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16 = call float @llvm.vector.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef)
 349 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
 350 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
 351 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 380 for instruction: %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
 352 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call float @llvm.vector.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
 353 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call float @llvm.vector.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef)
 354 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
 355 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
 356 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
 357 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 358 ;
 359   %V1 = call float @llvm.vector.reduce.fmul.v1f32(float 0.0, <1 x float> undef)
 360   %V2 = call float @llvm.vector.reduce.fmul.v2f32(float 0.0, <2 x float> undef)
 361   %V4 = call float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef)
 362   %V8 = call float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef)
 363   %V16 = call float @llvm.vector.reduce.fmul.v16f32(float 0.0, <16 x float> undef)
 364   %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.0, <32 x float> undef)
 365   %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.0, <64 x float> undef)
 366   %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.0, <128 x float> undef)
 367   %NXV1 = call float @llvm.vector.reduce.fmul.nxv1f32(float 0.0, <vscale x 1 x float> undef)
 368   %NXV2 = call float @llvm.vector.reduce.fmul.nxv2f32(float 0.0, <vscale x 2 x float> undef)
 369   %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.0, <vscale x 4 x float> undef)
 370   %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.0, <vscale x 8 x float> undef)
 371   %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.0, <vscale x 16 x float> undef)
 372   ret void
 373 }
 374
 375 define void @reduce_ordered_fmul_double() {
 376 ; FP-REDUCE-LABEL: 'reduce_ordered_fmul_double'
 377 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call double @llvm.vector.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef)
 378 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
 379 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
 380 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8 = call double @llvm.vector.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef)
 381 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16 = call double @llvm.vector.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef)
 382 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
 383 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 252 for instruction: %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
 384 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 504 for instruction: %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
 385 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call double @llvm.vector.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef)
 386 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
 387 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
 388 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
 389 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 390 ;
 391 ; SIZE-LABEL: 'reduce_ordered_fmul_double'
 392 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call double @llvm.vector.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef)
 393 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
 394 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
 395 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8 = call double @llvm.vector.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef)
 396 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16 = call double @llvm.vector.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef)
 397 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
 398 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 188 for instruction: %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
 399 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 376 for instruction: %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
 400 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call double @llvm.vector.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef)
 401 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
 402 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
 403 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
 404 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 405 ;
 406   %V1 = call double @llvm.vector.reduce.fmul.v1f64(double 0.0, <1 x double> undef)
 407   %V2 = call double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef)
 408   %V4 = call double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef)
 409   %V8 = call double @llvm.vector.reduce.fmul.v8f64(double 0.0, <8 x double> undef)
 410   %V16 = call double @llvm.vector.reduce.fmul.v16f64(double 0.0, <16 x double> undef)
 411   %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.0, <32 x double> undef)
 412   %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.0, <64 x double> undef)
 413   %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.0, <128 x double> undef)
 414   %NXV1 = call double @llvm.vector.reduce.fmul.nxv1f64(double 0.0, <vscale x 1 x double> undef)
 415   %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.0, <vscale x 2 x double> undef)
 416   %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.0, <vscale x 4 x double> undef)
 417   %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.0, <vscale x 8 x double> undef)
 418   ret void
 419 }