llvm/test/Analysis/CostModel/AMDGPU/arith-ssat.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
   2 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST %s
   3 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST %s
   4 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST %s
   5 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s
   6 ; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST-SIZE %s
   7 ; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST-SIZE %s
   8 ; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST-SIZE %s
   9 ; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW-SIZE %s
  10 ; END.
  11
  12 declare i64        @llvm.sadd.sat.i64(i64, i64)
  13 declare <2 x i64>  @llvm.sadd.sat.v2i64(<2 x i64>, <2 x i64>)
  14 declare <4 x i64>  @llvm.sadd.sat.v4i64(<4 x i64>, <4 x i64>)
  15 declare <5 x i64>  @llvm.sadd.sat.v5i64(<5 x i64>, <5 x i64>)
  16 declare <8 x i64>  @llvm.sadd.sat.v8i64(<8 x i64>, <8 x i64>)
  17
  18 declare i32        @llvm.sadd.sat.i32(i32, i32)
  19 declare <2 x i32>  @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>)
  20 declare <4 x i32>  @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>)
  21 declare <8 x i32>  @llvm.sadd.sat.v8i32(<8 x i32>, <8 x i32>)
  22 declare <9 x i32>  @llvm.sadd.sat.v9i32(<9 x i32>, <9 x i32>)
  23 declare <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32>, <16 x i32>)
  24
  25 declare i16        @llvm.sadd.sat.i16(i16, i16)
  26 declare <2 x i16>  @llvm.sadd.sat.v2i16(<2 x i16>, <2 x i16>)
  27 declare <4 x i16>  @llvm.sadd.sat.v4i16(<4 x i16>, <4 x i16>)
  28 declare <8 x i16>  @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>)
  29 declare <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16>, <16 x i16>)
  30 declare <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16>, <17 x i16>)
  31 declare <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16>, <32 x i16>)
  32
  33 declare i8         @llvm.sadd.sat.i8(i8,  i8)
  34 declare <2 x i8>   @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>)
  35 declare <4 x i8>   @llvm.sadd.sat.v4i8(<4 x i8>, <4 x i8>)
  36 declare <8 x i8>   @llvm.sadd.sat.v8i8(<8 x i8>, <8 x i8>)
  37 declare <16 x i8>  @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>)
  38 declare <32 x i8>  @llvm.sadd.sat.v32i8(<32 x i8>, <32 x i8>)
  39 declare <33 x i8>  @llvm.sadd.sat.v33i8(<33 x i8>, <33 x i8>)
  40 declare <64 x i8>  @llvm.sadd.sat.v64i8(<64 x i8>, <64 x i8>)
  41
  42 define i32 @add(i32 %arg) {
  43 ; FAST-LABEL: 'add'
  44 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
  45 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
  46 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
  47 ; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V5I64 = call <5 x i64> @llvm.sadd.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
  48 ; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
  49 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
  50 ; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
  51 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
  52 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
  53 ; FAST-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V9I32 = call <9 x i32> @llvm.sadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
  54 ; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
  55 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
  56 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
  57 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
  58 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
  59 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
  60 ; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
  61 ; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
  62 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
  63 ; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
  64 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
  65 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
  66 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
  67 ; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
  68 ; FAST-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V33I8 = call <33 x i8> @llvm.sadd.sat.v33i8(<33 x i8> undef, <33 x i8> undef)
  69 ; FAST-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
  70 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
  71 ;
  72 ; SLOW-LABEL: 'add'
  73 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
  74 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
  75 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
  76 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V5I64 = call <5 x i64> @llvm.sadd.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
  77 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
  78 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
  79 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
  80 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
  81 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
  82 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V9I32 = call <9 x i32> @llvm.sadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
  83 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
  84 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
  85 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
  86 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
  87 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
  88 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
  89 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
  90 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
  91 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
  92 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
  93 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
  94 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
  95 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
  96 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
  97 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V33I8 = call <33 x i8> @llvm.sadd.sat.v33i8(<33 x i8> undef, <33 x i8> undef)
  98 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
  99 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 100 ;
 101 ; FAST-SIZE-LABEL: 'add'
 102 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
 103 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
 104 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
 105 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V5I64 = call <5 x i64> @llvm.sadd.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
 106 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
 107 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
 108 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
 109 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
 110 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
 111 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V9I32 = call <9 x i32> @llvm.sadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
 112 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
 113 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
 114 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 115 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
 116 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
 117 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
 118 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
 119 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 120 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
 121 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 122 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
 123 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
 124 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
 125 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
 126 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V33I8 = call <33 x i8> @llvm.sadd.sat.v33i8(<33 x i8> undef, <33 x i8> undef)
 127 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
 128 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 129 ;
 130 ; SLOW-SIZE-LABEL: 'add'
 131 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
 132 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
 133 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
 134 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V5I64 = call <5 x i64> @llvm.sadd.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
 135 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
 136 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
 137 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
 138 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
 139 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
 140 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V9I32 = call <9 x i32> @llvm.sadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
 141 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
 142 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
 143 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 144 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
 145 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
 146 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
 147 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
 148 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 149 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
 150 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 151 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
 152 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
 153 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
 154 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
 155 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V33I8 = call <33 x i8> @llvm.sadd.sat.v33i8(<33 x i8> undef, <33 x i8> undef)
 156 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
 157 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 158 ;
 159   %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
 160   %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
 161   %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
 162   %V5I64 = call <5 x i64> @llvm.sadd.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
 163   %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
 164
 165   %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
 166   %V2I32  = call <2 x i32>  @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
 167   %V4I32  = call <4 x i32>  @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
 168   %V8I32  = call <8 x i32>  @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
 169   %V9I32  = call <9 x i32>  @llvm.sadd.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
 170   %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
 171
 172   %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
 173   %V2I16  = call <2 x i16>  @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 174   %V4I16  = call <4 x i16>  @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
 175   %V8I16  = call <8 x i16>  @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
 176   %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
 177   %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
 178   %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 179
 180   %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
 181   %V2I8  = call <2 x i8>  @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 182   %V4I8  = call <4 x i8>  @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
 183   %V8I8  = call <8 x i8>  @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
 184   %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
 185   %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
 186   %V33I8 = call <33 x i8> @llvm.sadd.sat.v33i8(<33 x i8> undef, <33 x i8> undef)
 187   %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
 188
 189   ret i32 undef
 190 }
 191
 192 declare i64        @llvm.ssub.sat.i64(i64, i64)
 193 declare <2 x i64>  @llvm.ssub.sat.v2i64(<2 x i64>, <2 x i64>)
 194 declare <4 x i64>  @llvm.ssub.sat.v4i64(<4 x i64>, <4 x i64>)
 195 declare <5 x i64>  @llvm.ssub.sat.v5i64(<5 x i64>, <5 x i64>)
 196 declare <8 x i64>  @llvm.ssub.sat.v8i64(<8 x i64>, <8 x i64>)
 197
 198 declare i32        @llvm.ssub.sat.i32(i32, i32)
 199 declare <2 x i32>  @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>)
 200 declare <4 x i32>  @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>)
 201 declare <8 x i32>  @llvm.ssub.sat.v8i32(<8 x i32>, <8 x i32>)
 202 declare <9 x i32>  @llvm.ssub.sat.v9i32(<9 x i32>, <9 x i32>)
 203 declare <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32>, <16 x i32>)
 204
 205 declare i16        @llvm.ssub.sat.i16(i16, i16)
 206 declare <2 x i16>  @llvm.ssub.sat.v2i16(<2 x i16>, <2 x i16>)
 207 declare <4 x i16>  @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>)
 208 declare <8 x i16>  @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>)
 209 declare <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16>, <16 x i16>)
 210 declare <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16>, <17 x i16>)
 211 declare <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16>, <32 x i16>)
 212
 213 declare i8         @llvm.ssub.sat.i8(i8,  i8)
 214 declare <2 x i8>   @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>)
 215 declare <4 x i8>   @llvm.ssub.sat.v4i8(<4 x i8>, <4 x i8>)
 216 declare <8 x i8>   @llvm.ssub.sat.v8i8(<8 x i8>, <8 x i8>)
 217 declare <16 x i8>  @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>)
 218 declare <32 x i8>  @llvm.ssub.sat.v32i8(<32 x i8>, <32 x i8>)
 219 declare <33 x i8>  @llvm.ssub.sat.v33i8(<33 x i8>, <33 x i8>)
 220 declare <64 x i8>  @llvm.ssub.sat.v64i8(<64 x i8>, <64 x i8>)
 221
 222 define i32 @sub(i32 %arg) {
 223 ; FAST-LABEL: 'sub'
 224 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
 225 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
 226 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
 227 ; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V5I64 = call <5 x i64> @llvm.ssub.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
 228 ; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
 229 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
 230 ; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
 231 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
 232 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
 233 ; FAST-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V9I32 = call <9 x i32> @llvm.ssub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
 234 ; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
 235 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
 236 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 237 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
 238 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
 239 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
 240 ; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V17I16 = call <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
 241 ; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 242 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
 243 ; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 244 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
 245 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
 246 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
 247 ; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
 248 ; FAST-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V33I8 = call <33 x i8> @llvm.ssub.sat.v33i8(<33 x i8> undef, <33 x i8> undef)
 249 ; FAST-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
 250 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 251 ;
 252 ; SLOW-LABEL: 'sub'
 253 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
 254 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
 255 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
 256 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V5I64 = call <5 x i64> @llvm.ssub.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
 257 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
 258 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
 259 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
 260 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
 261 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
 262 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V9I32 = call <9 x i32> @llvm.ssub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
 263 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
 264 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
 265 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 266 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
 267 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
 268 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
 269 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V17I16 = call <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
 270 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 271 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
 272 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 273 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
 274 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
 275 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
 276 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
 277 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V33I8 = call <33 x i8> @llvm.ssub.sat.v33i8(<33 x i8> undef, <33 x i8> undef)
 278 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
 279 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 280 ;
 281 ; FAST-SIZE-LABEL: 'sub'
 282 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
 283 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
 284 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
 285 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V5I64 = call <5 x i64> @llvm.ssub.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
 286 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
 287 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
 288 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
 289 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
 290 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
 291 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V9I32 = call <9 x i32> @llvm.ssub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
 292 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
 293 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
 294 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 295 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
 296 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
 297 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
 298 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V17I16 = call <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
 299 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 300 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
 301 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 302 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
 303 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
 304 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
 305 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
 306 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V33I8 = call <33 x i8> @llvm.ssub.sat.v33i8(<33 x i8> undef, <33 x i8> undef)
 307 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
 308 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 309 ;
 310 ; SLOW-SIZE-LABEL: 'sub'
 311 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
 312 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
 313 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
 314 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V5I64 = call <5 x i64> @llvm.ssub.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
 315 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
 316 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
 317 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
 318 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
 319 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
 320 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V9I32 = call <9 x i32> @llvm.ssub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
 321 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
 322 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
 323 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 324 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
 325 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
 326 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
 327 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V17I16 = call <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
 328 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 329 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
 330 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 331 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
 332 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
 333 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
 334 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
 335 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V33I8 = call <33 x i8> @llvm.ssub.sat.v33i8(<33 x i8> undef, <33 x i8> undef)
 336 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
 337 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 338 ;
 339   %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
 340   %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
 341   %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
 342   %V5I64 = call <5 x i64> @llvm.ssub.sat.v5i64(<5 x i64> undef, <5 x i64> undef)
 343   %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
 344
 345   %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
 346   %V2I32  = call <2 x i32>  @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
 347   %V4I32  = call <4 x i32>  @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
 348   %V8I32  = call <8 x i32>  @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
 349   %V9I32  = call <9 x i32>  @llvm.ssub.sat.v9i32(<9 x i32> undef, <9 x i32> undef)
 350   %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
 351
 352   %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
 353   %V2I16  = call <2 x i16>  @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 354   %V4I16  = call <4 x i16>  @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
 355   %V8I16  = call <8 x i16>  @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
 356   %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
 357   %V17I16 = call <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
 358   %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 359
 360   %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
 361   %V2I8  = call <2 x i8>  @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 362   %V4I8  = call <4 x i8>  @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
 363   %V8I8  = call <8 x i8>  @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
 364   %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
 365   %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
 366   %V33I8 = call <33 x i8> @llvm.ssub.sat.v33i8(<33 x i8> undef, <33 x i8> undef)
 367   %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
 368
 369   ret i32 undef
 370 }