test/Transforms/SLPVectorizer/X86/cmp_commute.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64--- -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE
   3 ; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64--- -mattr=+avx  | FileCheck %s --check-prefixes=CHECK,AVX
   4
   5 ;
   6 ; Check that we can commute operands based on the predicate.
   7 ;
   8
   9 define <4 x i32> @icmp_eq_v4i32(<4 x i32> %a, i32* %b) {
  10 ; CHECK-LABEL: @icmp_eq_v4i32(
  11 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
  12 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
  13 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <4 x i32> [[TMP2]], [[A:%.*]]
  14 ; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
  15 ; CHECK-NEXT:    ret <4 x i32> [[R]]
  16 ;
  17   %a0 = extractelement <4 x i32> %a, i32 0
  18   %a1 = extractelement <4 x i32> %a, i32 1
  19   %a2 = extractelement <4 x i32> %a, i32 2
  20   %a3 = extractelement <4 x i32> %a, i32 3
  21   %p0 = getelementptr inbounds i32, i32* %b, i32 0
  22   %p1 = getelementptr inbounds i32, i32* %b, i32 1
  23   %p2 = getelementptr inbounds i32, i32* %b, i32 2
  24   %p3 = getelementptr inbounds i32, i32* %b, i32 3
  25   %b0 = load i32, i32* %p0, align 4
  26   %b1 = load i32, i32* %p1, align 4
  27   %b2 = load i32, i32* %p2, align 4
  28   %b3 = load i32, i32* %p3, align 4
  29   %c0 = icmp eq i32 %a0, %b0
  30   %c1 = icmp eq i32 %b1, %a1
  31   %c2 = icmp eq i32 %b2, %a2
  32   %c3 = icmp eq i32 %a3, %b3
  33   %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
  34   %d1 = insertelement <4 x i1>   %d0, i1 %c1, i32 1
  35   %d2 = insertelement <4 x i1>   %d1, i1 %c2, i32 2
  36   %d3 = insertelement <4 x i1>   %d2, i1 %c3, i32 3
  37   %r = sext <4 x i1> %d3 to <4 x i32>
  38   ret <4 x i32> %r
  39 }
  40
  41 define <4 x i32> @icmp_ne_v4i32(<4 x i32> %a, i32* %b) {
  42 ; CHECK-LABEL: @icmp_ne_v4i32(
  43 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
  44 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
  45 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], [[A:%.*]]
  46 ; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
  47 ; CHECK-NEXT:    ret <4 x i32> [[R]]
  48 ;
  49   %a0 = extractelement <4 x i32> %a, i32 0
  50   %a1 = extractelement <4 x i32> %a, i32 1
  51   %a2 = extractelement <4 x i32> %a, i32 2
  52   %a3 = extractelement <4 x i32> %a, i32 3
  53   %p0 = getelementptr inbounds i32, i32* %b, i32 0
  54   %p1 = getelementptr inbounds i32, i32* %b, i32 1
  55   %p2 = getelementptr inbounds i32, i32* %b, i32 2
  56   %p3 = getelementptr inbounds i32, i32* %b, i32 3
  57   %b0 = load i32, i32* %p0, align 4
  58   %b1 = load i32, i32* %p1, align 4
  59   %b2 = load i32, i32* %p2, align 4
  60   %b3 = load i32, i32* %p3, align 4
  61   %c0 = icmp ne i32 %a0, %b0
  62   %c1 = icmp ne i32 %b1, %a1
  63   %c2 = icmp ne i32 %b2, %a2
  64   %c3 = icmp ne i32 %a3, %b3
  65   %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
  66   %d1 = insertelement <4 x i1>   %d0, i1 %c1, i32 1
  67   %d2 = insertelement <4 x i1>   %d1, i1 %c2, i32 2
  68   %d3 = insertelement <4 x i1>   %d2, i1 %c3, i32 3
  69   %r = sext <4 x i1> %d3 to <4 x i32>
  70   ret <4 x i32> %r
  71 }
  72
  73 define <4 x i32> @fcmp_oeq_v4i32(<4 x float> %a, float* %b) {
  74 ; CHECK-LABEL: @fcmp_oeq_v4i32(
  75 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
  76 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
  77 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp oeq <4 x float> [[TMP2]], [[A:%.*]]
  78 ; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
  79 ; CHECK-NEXT:    ret <4 x i32> [[R]]
  80 ;
  81   %a0 = extractelement <4 x float> %a, i32 0
  82   %a1 = extractelement <4 x float> %a, i32 1
  83   %a2 = extractelement <4 x float> %a, i32 2
  84   %a3 = extractelement <4 x float> %a, i32 3
  85   %p0 = getelementptr inbounds float, float* %b, i32 0
  86   %p1 = getelementptr inbounds float, float* %b, i32 1
  87   %p2 = getelementptr inbounds float, float* %b, i32 2
  88   %p3 = getelementptr inbounds float, float* %b, i32 3
  89   %b0 = load float, float* %p0, align 4
  90   %b1 = load float, float* %p1, align 4
  91   %b2 = load float, float* %p2, align 4
  92   %b3 = load float, float* %p3, align 4
  93   %c0 = fcmp oeq float %a0, %b0
  94   %c1 = fcmp oeq float %b1, %a1
  95   %c2 = fcmp oeq float %b2, %a2
  96   %c3 = fcmp oeq float %a3, %b3
  97   %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
  98   %d1 = insertelement <4 x i1>   %d0, i1 %c1, i32 1
  99   %d2 = insertelement <4 x i1>   %d1, i1 %c2, i32 2
 100   %d3 = insertelement <4 x i1>   %d2, i1 %c3, i32 3
 101   %r = sext <4 x i1> %d3 to <4 x i32>
 102   ret <4 x i32> %r
 103 }
 104
 105 define <4 x i32> @fcmp_uno_v4i32(<4 x float> %a, float* %b) {
 106 ; CHECK-LABEL: @fcmp_uno_v4i32(
 107 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
 108 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
 109 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp uno <4 x float> [[TMP2]], [[A:%.*]]
 110 ; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
 111 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 112 ;
 113   %a0 = extractelement <4 x float> %a, i32 0
 114   %a1 = extractelement <4 x float> %a, i32 1
 115   %a2 = extractelement <4 x float> %a, i32 2
 116   %a3 = extractelement <4 x float> %a, i32 3
 117   %p0 = getelementptr inbounds float, float* %b, i32 0
 118   %p1 = getelementptr inbounds float, float* %b, i32 1
 119   %p2 = getelementptr inbounds float, float* %b, i32 2
 120   %p3 = getelementptr inbounds float, float* %b, i32 3
 121   %b0 = load float, float* %p0, align 4
 122   %b1 = load float, float* %p1, align 4
 123   %b2 = load float, float* %p2, align 4
 124   %b3 = load float, float* %p3, align 4
 125   %c0 = fcmp uno float %a0, %b0
 126   %c1 = fcmp uno float %b1, %a1
 127   %c2 = fcmp uno float %b2, %a2
 128   %c3 = fcmp uno float %a3, %b3
 129   %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
 130   %d1 = insertelement <4 x i1>   %d0, i1 %c1, i32 1
 131   %d2 = insertelement <4 x i1>   %d1, i1 %c2, i32 2
 132   %d3 = insertelement <4 x i1>   %d2, i1 %c3, i32 3
 133   %r = sext <4 x i1> %d3 to <4 x i32>
 134   ret <4 x i32> %r
 135 }
 136
 137 ;
 138 ; Check that we can commute operands by swapping the predicate.
 139 ;
 140
 141 define <4 x i32> @icmp_sgt_slt_v4i32(<4 x i32> %a, i32* %b) {
 142 ; CHECK-LABEL: @icmp_sgt_slt_v4i32(
 143 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
 144 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
 145 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <4 x i32> [[TMP2]], [[A:%.*]]
 146 ; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
 147 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 148 ;
 149   %a0 = extractelement <4 x i32> %a, i32 0
 150   %a1 = extractelement <4 x i32> %a, i32 1
 151   %a2 = extractelement <4 x i32> %a, i32 2
 152   %a3 = extractelement <4 x i32> %a, i32 3
 153   %p0 = getelementptr inbounds i32, i32* %b, i32 0
 154   %p1 = getelementptr inbounds i32, i32* %b, i32 1
 155   %p2 = getelementptr inbounds i32, i32* %b, i32 2
 156   %p3 = getelementptr inbounds i32, i32* %b, i32 3
 157   %b0 = load i32, i32* %p0, align 4
 158   %b1 = load i32, i32* %p1, align 4
 159   %b2 = load i32, i32* %p2, align 4
 160   %b3 = load i32, i32* %p3, align 4
 161   %c0 = icmp sgt i32 %a0, %b0
 162   %c1 = icmp slt i32 %b1, %a1
 163   %c2 = icmp slt i32 %b2, %a2
 164   %c3 = icmp sgt i32 %a3, %b3
 165   %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
 166   %d1 = insertelement <4 x i1>   %d0, i1 %c1, i32 1
 167   %d2 = insertelement <4 x i1>   %d1, i1 %c2, i32 2
 168   %d3 = insertelement <4 x i1>   %d2, i1 %c3, i32 3
 169   %r = sext <4 x i1> %d3 to <4 x i32>
 170   ret <4 x i32> %r
 171 }
 172
 173 define <4 x i32> @icmp_uge_ule_v4i32(<4 x i32> %a, i32* %b) {
 174 ; CHECK-LABEL: @icmp_uge_ule_v4i32(
 175 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
 176 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
 177 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ule <4 x i32> [[TMP2]], [[A:%.*]]
 178 ; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
 179 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 180 ;
 181   %a0 = extractelement <4 x i32> %a, i32 0
 182   %a1 = extractelement <4 x i32> %a, i32 1
 183   %a2 = extractelement <4 x i32> %a, i32 2
 184   %a3 = extractelement <4 x i32> %a, i32 3
 185   %p0 = getelementptr inbounds i32, i32* %b, i32 0
 186   %p1 = getelementptr inbounds i32, i32* %b, i32 1
 187   %p2 = getelementptr inbounds i32, i32* %b, i32 2
 188   %p3 = getelementptr inbounds i32, i32* %b, i32 3
 189   %b0 = load i32, i32* %p0, align 4
 190   %b1 = load i32, i32* %p1, align 4
 191   %b2 = load i32, i32* %p2, align 4
 192   %b3 = load i32, i32* %p3, align 4
 193   %c0 = icmp uge i32 %a0, %b0
 194   %c1 = icmp ule i32 %b1, %a1
 195   %c2 = icmp ule i32 %b2, %a2
 196   %c3 = icmp uge i32 %a3, %b3
 197   %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
 198   %d1 = insertelement <4 x i1>   %d0, i1 %c1, i32 1
 199   %d2 = insertelement <4 x i1>   %d1, i1 %c2, i32 2
 200   %d3 = insertelement <4 x i1>   %d2, i1 %c3, i32 3
 201   %r = sext <4 x i1> %d3 to <4 x i32>
 202   ret <4 x i32> %r
 203 }
 204
 205 define <4 x i32> @fcmp_ogt_olt_v4i32(<4 x float> %a, float* %b) {
 206 ; CHECK-LABEL: @fcmp_ogt_olt_v4i32(
 207 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
 208 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
 209 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp olt <4 x float> [[TMP2]], [[A:%.*]]
 210 ; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
 211 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 212 ;
 213   %a0 = extractelement <4 x float> %a, i32 0
 214   %a1 = extractelement <4 x float> %a, i32 1
 215   %a2 = extractelement <4 x float> %a, i32 2
 216   %a3 = extractelement <4 x float> %a, i32 3
 217   %p0 = getelementptr inbounds float, float* %b, i32 0
 218   %p1 = getelementptr inbounds float, float* %b, i32 1
 219   %p2 = getelementptr inbounds float, float* %b, i32 2
 220   %p3 = getelementptr inbounds float, float* %b, i32 3
 221   %b0 = load float, float* %p0, align 4
 222   %b1 = load float, float* %p1, align 4
 223   %b2 = load float, float* %p2, align 4
 224   %b3 = load float, float* %p3, align 4
 225   %c0 = fcmp ogt float %a0, %b0
 226   %c1 = fcmp olt float %b1, %a1
 227   %c2 = fcmp olt float %b2, %a2
 228   %c3 = fcmp ogt float %a3, %b3
 229   %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
 230   %d1 = insertelement <4 x i1>   %d0, i1 %c1, i32 1
 231   %d2 = insertelement <4 x i1>   %d1, i1 %c2, i32 2
 232   %d3 = insertelement <4 x i1>   %d2, i1 %c3, i32 3
 233   %r = sext <4 x i1> %d3 to <4 x i32>
 234   ret <4 x i32> %r
 235 }
 236
 237 define <4 x i32> @fcmp_ord_uno_v4i32(<4 x float> %a, float* %b) {
 238 ; CHECK-LABEL: @fcmp_ord_uno_v4i32(
 239 ; CHECK-NEXT:    [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
 240 ; CHECK-NEXT:    [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
 241 ; CHECK-NEXT:    [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
 242 ; CHECK-NEXT:    [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
 243 ; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
 244 ; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
 245 ; CHECK-NEXT:    [[P3:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
 246 ; CHECK-NEXT:    [[B0:%.*]] = load float, float* [[B]], align 4
 247 ; CHECK-NEXT:    [[B1:%.*]] = load float, float* [[P1]], align 4
 248 ; CHECK-NEXT:    [[B2:%.*]] = load float, float* [[P2]], align 4
 249 ; CHECK-NEXT:    [[B3:%.*]] = load float, float* [[P3]], align 4
 250 ; CHECK-NEXT:    [[C0:%.*]] = fcmp ord float [[A0]], [[B0]]
 251 ; CHECK-NEXT:    [[C1:%.*]] = fcmp uno float [[B1]], [[A1]]
 252 ; CHECK-NEXT:    [[C2:%.*]] = fcmp uno float [[B2]], [[A2]]
 253 ; CHECK-NEXT:    [[C3:%.*]] = fcmp ord float [[A3]], [[B3]]
 254 ; CHECK-NEXT:    [[D0:%.*]] = insertelement <4 x i1> undef, i1 [[C0]], i32 0
 255 ; CHECK-NEXT:    [[D1:%.*]] = insertelement <4 x i1> [[D0]], i1 [[C1]], i32 1
 256 ; CHECK-NEXT:    [[D2:%.*]] = insertelement <4 x i1> [[D1]], i1 [[C2]], i32 2
 257 ; CHECK-NEXT:    [[D3:%.*]] = insertelement <4 x i1> [[D2]], i1 [[C3]], i32 3
 258 ; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[D3]] to <4 x i32>
 259 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 260 ;
 261   %a0 = extractelement <4 x float> %a, i32 0
 262   %a1 = extractelement <4 x float> %a, i32 1
 263   %a2 = extractelement <4 x float> %a, i32 2
 264   %a3 = extractelement <4 x float> %a, i32 3
 265   %p0 = getelementptr inbounds float, float* %b, i32 0
 266   %p1 = getelementptr inbounds float, float* %b, i32 1
 267   %p2 = getelementptr inbounds float, float* %b, i32 2
 268   %p3 = getelementptr inbounds float, float* %b, i32 3
 269   %b0 = load float, float* %p0, align 4
 270   %b1 = load float, float* %p1, align 4
 271   %b2 = load float, float* %p2, align 4
 272   %b3 = load float, float* %p3, align 4
 273   %c0 = fcmp ord float %a0, %b0
 274   %c1 = fcmp uno float %b1, %a1
 275   %c2 = fcmp uno float %b2, %a2
 276   %c3 = fcmp ord float %a3, %b3
 277   %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
 278   %d1 = insertelement <4 x i1>   %d0, i1 %c1, i32 1
 279   %d2 = insertelement <4 x i1>   %d1, i1 %c2, i32 2
 280   %d3 = insertelement <4 x i1>   %d2, i1 %c3, i32 3
 281   %r = sext <4 x i1> %d3 to <4 x i32>
 282   ret <4 x i32> %r
 283 }