test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll

   1 ; RUN: opt < %s -instcombine -S | FileCheck %s
   2
   3 ; (V * C1) * C2 => V * (C1 * C2)
   4 ; Verify this doesn't fold when no fast-math-flags are specified
   5 define <4 x float> @test_fmul(<4 x float> %V) {
   6 ; CHECK-LABEL: @test_fmul(
   7 ; CHECK-NEXT:     [[TMP1:%.*]] = fmul <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
   8 ; CHECK-NEXT:     [[TMP2:%.*]] = fmul <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
   9 ; CHECK-NEXT:     ret <4 x float> [[TMP2]]
  10         %Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
  11         %Z = fmul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
  12         ret <4 x float> %Z
  13 }
  14
  15 ; (V * C1) * C2 => V * (C1 * C2)
  16 ; Verify this folds with 'fast'
  17 define <4 x float> @test_fmul_fast(<4 x float> %V) {
  18 ; CHECK-LABEL: @test_fmul_fast(
  19 ; CHECK-NEXT:     [[TMP1:%.*]] = fmul fast <4 x float> [[V:%.*]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
  20 ; CHECK-NEXT:     ret <4 x float> [[TMP1]]
  21         %Y = fmul fast <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
  22         %Z = fmul fast <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
  23         ret <4 x float> %Z
  24 }
  25
  26 ; (V * C1) * C2 => V * (C1 * C2)
  27 ; Verify this folds with 'reassoc' and 'nsz' ('nsz' not technically required)
  28 define <4 x float> @test_fmul_reassoc_nsz(<4 x float> %V) {
  29 ; CHECK-LABEL: @test_fmul_reassoc_nsz(
  30 ; CHECK-NEXT:     [[TMP1:%.*]] = fmul reassoc nsz <4 x float> [[V:%.*]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
  31 ; CHECK-NEXT:     ret <4 x float> [[TMP1]]
  32         %Y = fmul reassoc nsz <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
  33         %Z = fmul reassoc nsz <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
  34         ret <4 x float> %Z
  35 }
  36
  37 ; (V * C1) * C2 => V * (C1 * C2)
  38 ; TODO: This doesn't require 'nsz'.  It should fold to V * { 1.0, 4.0e+05, -9.0, 16.0 }
  39 define <4 x float> @test_fmul_reassoc(<4 x float> %V) {
  40 ; CHECK-LABEL: @test_fmul_reassoc(
  41 ; CHECK-NEXT:     [[TMP1:%.*]] = fmul reassoc <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
  42 ; CHECK-NEXT:     [[TMP2:%.*]] = fmul reassoc <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
  43 ; CHECK-NEXT:     ret <4 x float> [[TMP2]]
  44         %Y = fmul reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
  45         %Z = fmul reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
  46         ret <4 x float> %Z
  47 }
  48
  49 ; (V + C1) + C2 => V + (C1 + C2)
  50 ; Verify this doesn't fold when no fast-math-flags are specified
  51 define <4 x float> @test_fadd(<4 x float> %V) {
  52 ; CHECK-LABEL: @test_fadd(
  53 ; CHECK-NEXT:     [[TMP1:%.*]] = fadd <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
  54 ; CHECK-NEXT:     [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00>
  55 ; CHECK-NEXT:     ret <4 x float> [[TMP2]]
  56         %Y = fadd <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
  57         %Z = fadd <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
  58         ret <4 x float> %Z
  59 }
  60
  61 ; (V + C1) + C2 => V + (C1 + C2)
  62 ; Verify this folds with 'fast'
  63 define <4 x float> @test_fadd_fast(<4 x float> %V) {
  64 ; CHECK-LABEL: @test_fadd_fast(
  65 ; CHECK-NEXT:     [[TMP1:%.*]] = fadd fast <4 x float> [[V:%.*]], <float 2.000000e+00, float 4.000000e+00, float 0.000000e+00, float 8.000000e+00>
  66 ; CHECK-NEXT:     ret <4 x float> [[TMP1]]
  67         %Y = fadd fast <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
  68         %Z = fadd fast <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
  69         ret <4 x float> %Z
  70 }
  71
  72 ; (V + C1) + C2 => V + (C1 + C2)
  73 ; Verify this folds with 'reassoc' and 'nsz' ('nsz' not technically required)
  74 define <4 x float> @test_fadd_reassoc_nsz(<4 x float> %V) {
  75 ; CHECK-LABEL: @test_fadd_reassoc_nsz(
  76 ; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc nsz <4 x float> [[V:%.*]], <float 2.000000e+00, float 4.000000e+00, float 0.000000e+00, float 8.000000e+00>
  77 ; CHECK-NEXT:     ret <4 x float> [[TMP1]]
  78         %Y = fadd reassoc nsz <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
  79         %Z = fadd reassoc nsz <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
  80         ret <4 x float> %Z
  81 }
  82
  83 ; (V + C1) + C2 => V + (C1 + C2)
  84 ; TODO: This doesn't require 'nsz'.  It should fold to V + { 2.0, 4.0, 0.0, 8.0 }
  85 define <4 x float> @test_fadd_reassoc(<4 x float> %V) {
  86 ; CHECK-LABEL: @test_fadd_reassoc(
  87 ; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
  88 ; CHECK-NEXT:     [[TMP2:%.*]] = fadd reassoc <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00>
  89 ; CHECK-NEXT:     ret <4 x float> [[TMP2]]
  90         %Y = fadd reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
  91         %Z = fadd reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
  92         ret <4 x float> %Z
  93 }
  94
  95 ; ( A + C1 ) + ( B + -C1 )
  96 ; Verify this doesn't fold when no fast-math-flags are specified
  97 define <4 x float> @test_fadds_cancel_(<4 x float> %A, <4 x float> %B) {
  98 ; CHECK-LABEL: @test_fadds_cancel_(
  99 ; CHECK-NEXT:     [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
 100 ; CHECK-NEXT:     [[TMP2:%.*]] = fadd <4 x float> [[B:%.*]], <float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00>
 101 ; CHECK-NEXT:     [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
 102 ; CHECK-NEXT:     ret <4 x float> [[TMP3]]
 103         %X = fadd <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
 104         %Y = fadd <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
 105         %Z = fadd <4 x float> %X, %Y
 106         ret <4 x float> %Z
 107 }
 108
 109 ; ( A + C1 ) + ( B + -C1 )
 110 ; Verify this folds to 'A + B' with 'fast'
 111 define <4 x float> @test_fadds_cancel_fast(<4 x float> %A, <4 x float> %B) {
 112 ; CHECK-LABEL: @test_fadds_cancel_fast(
 113 ; CHECK-NEXT:     [[TMP1:%.*]] = fadd fast <4 x float> [[A:%.*]], [[B:%.*]]
 114 ; CHECK-NEXT:     ret <4 x float> [[TMP1]]
 115         %X = fadd fast <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
 116         %Y = fadd fast <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
 117         %Z = fadd fast <4 x float> %X, %Y
 118         ret <4 x float> %Z
 119 }
 120
 121 ; ( A + C1 ) + ( B + -C1 )
 122 ; Verify this folds to 'A + B' with 'reassoc' and 'nsz' ('nsz' is required)
 123 define <4 x float> @test_fadds_cancel_reassoc_nsz(<4 x float> %A, <4 x float> %B) {
 124 ; CHECK-LABEL: @test_fadds_cancel_reassoc_nsz(
 125 ; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc nsz <4 x float> [[A:%.*]], [[B:%.*]]
 126 ; CHECK-NEXT:     ret <4 x float> [[TMP1]]
 127         %X = fadd reassoc nsz <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
 128         %Y = fadd reassoc nsz <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
 129         %Z = fadd reassoc nsz <4 x float> %X, %Y
 130         ret <4 x float> %Z
 131 }
 132
 133 ; ( A + C1 ) + ( B + -C1 )
 134 ; Verify the fold is not done with only 'reassoc' ('nsz' is required).
 135 define <4 x float> @test_fadds_cancel_reassoc(<4 x float> %A, <4 x float> %B) {
 136 ; CHECK-LABEL: @test_fadds_cancel_reassoc(
 137 ; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc <4 x float> [[A:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
 138 ; CHECK-NEXT:     [[TMP2:%.*]] = fadd reassoc <4 x float> [[B:%.*]], <float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00>
 139 ; CHECK-NEXT:     [[TMP3:%.*]] = fadd reassoc <4 x float> [[TMP1]], [[TMP2]]
 140 ; CHECK-NEXT:     ret <4 x float> [[TMP3]]
 141         %X = fadd reassoc <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
 142         %Y = fadd reassoc <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
 143         %Z = fadd reassoc <4 x float> %X, %Y
 144         ret <4 x float> %Z
 145 }