llvm/test/CodeGen/X86/combine-or.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s -check-prefixes=CHECK,CHECK-LV
   3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -early-live-intervals | FileCheck %s -check-prefixes=CHECK,CHECK-LIS
   4
   5 define i32 @or_self(i32 %x) {
   6 ; CHECK-LABEL: or_self:
   7 ; CHECK:       # %bb.0:
   8 ; CHECK-NEXT:    movl %edi, %eax
   9 ; CHECK-NEXT:    retq
  10   %or = or i32 %x, %x
  11   ret i32 %or
  12 }
  13
  14 define <4 x i32> @or_self_vec(<4 x i32> %x) {
  15 ; CHECK-LABEL: or_self_vec:
  16 ; CHECK:       # %bb.0:
  17 ; CHECK-NEXT:    retq
  18   %or = or <4 x i32> %x, %x
  19   ret <4 x i32> %or
  20 }
  21
  22 ; Verify that each of the following test cases is folded into a single
  23 ; instruction which performs a blend operation.
  24
  25 define <2 x i64> @test1(<2 x i64> %a, <2 x i64> %b) {
  26 ; CHECK-LABEL: test1:
  27 ; CHECK:       # %bb.0:
  28 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
  29 ; CHECK-NEXT:    retq
  30   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
  31   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>
  32   %or = or <2 x i64> %shuf1, %shuf2
  33   ret <2 x i64> %or
  34 }
  35
  36
  37 define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) {
  38 ; CHECK-LABEL: test2:
  39 ; CHECK:       # %bb.0:
  40 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
  41 ; CHECK-NEXT:    retq
  42   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
  43   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
  44   %or = or <4 x i32> %shuf1, %shuf2
  45   ret <4 x i32> %or
  46 }
  47
  48
  49 define <2 x i64> @test3(<2 x i64> %a, <2 x i64> %b) {
  50 ; CHECK-LABEL: test3:
  51 ; CHECK:       # %bb.0:
  52 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
  53 ; CHECK-NEXT:    retq
  54   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>
  55   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
  56   %or = or <2 x i64> %shuf1, %shuf2
  57   ret <2 x i64> %or
  58 }
  59
  60
  61 define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) {
  62 ; CHECK-LABEL: test4:
  63 ; CHECK:       # %bb.0:
  64 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
  65 ; CHECK-NEXT:    retq
  66   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
  67   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
  68   %or = or <4 x i32> %shuf1, %shuf2
  69   ret <4 x i32> %or
  70 }
  71
  72
  73 define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) {
  74 ; CHECK-LABEL: test5:
  75 ; CHECK:       # %bb.0:
  76 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
  77 ; CHECK-NEXT:    retq
  78   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
  79   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
  80   %or = or <4 x i32> %shuf1, %shuf2
  81   ret <4 x i32> %or
  82 }
  83
  84
  85 define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) {
  86 ; CHECK-LABEL: test6:
  87 ; CHECK:       # %bb.0:
  88 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
  89 ; CHECK-NEXT:    retq
  90   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
  91   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
  92   %or = or <4 x i32> %shuf1, %shuf2
  93   ret <4 x i32> %or
  94 }
  95
  96
  97 define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) {
  98 ; CHECK-LABEL: test7:
  99 ; CHECK:       # %bb.0:
 100 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
 101 ; CHECK-NEXT:    retq
 102   %and1 = and <4 x i32> %a, <i32 -1, i32 -1, i32 0, i32 0>
 103   %and2 = and <4 x i32> %b, <i32 0, i32 0, i32 -1, i32 -1>
 104   %or = or <4 x i32> %and1, %and2
 105   ret <4 x i32> %or
 106 }
 107
 108
 109 define <2 x i64> @test8(<2 x i64> %a, <2 x i64> %b) {
 110 ; CHECK-LABEL: test8:
 111 ; CHECK:       # %bb.0:
 112 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
 113 ; CHECK-NEXT:    retq
 114   %and1 = and <2 x i64> %a, <i64 -1, i64 0>
 115   %and2 = and <2 x i64> %b, <i64 0, i64 -1>
 116   %or = or <2 x i64> %and1, %and2
 117   ret <2 x i64> %or
 118 }
 119
 120
 121 define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) {
 122 ; CHECK-LABEL: test9:
 123 ; CHECK:       # %bb.0:
 124 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 125 ; CHECK-NEXT:    retq
 126   %and1 = and <4 x i32> %a, <i32 0, i32 0, i32 -1, i32 -1>
 127   %and2 = and <4 x i32> %b, <i32 -1, i32 -1, i32 0, i32 0>
 128   %or = or <4 x i32> %and1, %and2
 129   ret <4 x i32> %or
 130 }
 131
 132
 133 define <2 x i64> @test10(<2 x i64> %a, <2 x i64> %b) {
 134 ; CHECK-LABEL: test10:
 135 ; CHECK:       # %bb.0:
 136 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 137 ; CHECK-NEXT:    retq
 138   %and1 = and <2 x i64> %a, <i64 0, i64 -1>
 139   %and2 = and <2 x i64> %b, <i64 -1, i64 0>
 140   %or = or <2 x i64> %and1, %and2
 141   ret <2 x i64> %or
 142 }
 143
 144
 145 define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) {
 146 ; CHECK-LABEL: test11:
 147 ; CHECK:       # %bb.0:
 148 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
 149 ; CHECK-NEXT:    retq
 150   %and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0>
 151   %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 -1>
 152   %or = or <4 x i32> %and1, %and2
 153   ret <4 x i32> %or
 154 }
 155
 156
 157 define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) {
 158 ; CHECK-LABEL: test12:
 159 ; CHECK:       # %bb.0:
 160 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
 161 ; CHECK-NEXT:    retq
 162   %and1 = and <4 x i32> %a, <i32 0, i32 -1, i32 -1, i32 -1>
 163   %and2 = and <4 x i32> %b, <i32 -1, i32 0, i32 0, i32 0>
 164   %or = or <4 x i32> %and1, %and2
 165   ret <4 x i32> %or
 166 }
 167
 168
 169 ; Verify that the following test cases are folded into single shuffles.
 170
 171 define <4 x i32> @test13(<4 x i32> %a, <4 x i32> %b) {
 172 ; CHECK-LABEL: test13:
 173 ; CHECK:       # %bb.0:
 174 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
 175 ; CHECK-NEXT:    retq
 176   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 1, i32 1, i32 4, i32 4>
 177   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
 178   %or = or <4 x i32> %shuf1, %shuf2
 179   ret <4 x i32> %or
 180 }
 181
 182
 183 define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b) {
 184 ; CHECK-LABEL: test14:
 185 ; CHECK:       # %bb.0:
 186 ; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 187 ; CHECK-NEXT:    retq
 188   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
 189   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
 190   %or = or <2 x i64> %shuf1, %shuf2
 191   ret <2 x i64> %or
 192 }
 193
 194
 195 define <4 x i32> @test15(<4 x i32> %a, <4 x i32> %b) {
 196 ; CHECK-LABEL: test15:
 197 ; CHECK:       # %bb.0:
 198 ; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,1],xmm0[2,1]
 199 ; CHECK-NEXT:    movaps %xmm1, %xmm0
 200 ; CHECK-NEXT:    retq
 201   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 1>
 202   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 2, i32 1, i32 4, i32 4>
 203   %or = or <4 x i32> %shuf1, %shuf2
 204   ret <4 x i32> %or
 205 }
 206
 207
 208 define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) {
 209 ; CHECK-LABEL: test16:
 210 ; CHECK:       # %bb.0:
 211 ; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
 212 ; CHECK-NEXT:    movaps %xmm1, %xmm0
 213 ; CHECK-NEXT:    retq
 214   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
 215   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
 216   %or = or <2 x i64> %shuf1, %shuf2
 217   ret <2 x i64> %or
 218 }
 219
 220
 221 ; Verify that the dag-combiner does not fold a OR of two shuffles into a single
 222 ; shuffle instruction when the shuffle indexes are not compatible.
 223
 224 define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
 225 ; CHECK-LABEL: test17:
 226 ; CHECK:       # %bb.0:
 227 ; CHECK-NEXT:    psllq $32, %xmm0
 228 ; CHECK-NEXT:    movq {{.*#+}} xmm1 = xmm1[0],zero
 229 ; CHECK-NEXT:    por %xmm1, %xmm0
 230 ; CHECK-NEXT:    retq
 231   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2>
 232   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
 233   %or = or <4 x i32> %shuf1, %shuf2
 234   ret <4 x i32> %or
 235 }
 236
 237
 238 define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) {
 239 ; CHECK-LV-LABEL: test18:
 240 ; CHECK-LV:       # %bb.0:
 241 ; CHECK-LV-NEXT:    pxor %xmm2, %xmm2
 242 ; CHECK-LV-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
 243 ; CHECK-LV-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
 244 ; CHECK-LV-NEXT:    pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3,4,5,6,7]
 245 ; CHECK-LV-NEXT:    por %xmm0, %xmm2
 246 ; CHECK-LV-NEXT:    movdqa %xmm2, %xmm0
 247 ; CHECK-LV-NEXT:    retq
 248 ;
 249 ; CHECK-LIS-LABEL: test18:
 250 ; CHECK-LIS:       # %bb.0:
 251 ; CHECK-LIS-NEXT:    pxor %xmm2, %xmm2
 252 ; CHECK-LIS-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
 253 ; CHECK-LIS-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
 254 ; CHECK-LIS-NEXT:    pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3,4,5,6,7]
 255 ; CHECK-LIS-NEXT:    por %xmm0, %xmm2
 256 ; CHECK-LIS-NEXT:    movdqa %xmm2, %xmm0
 257 ; CHECK-LIS-NEXT:    retq
 258   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 4>
 259   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
 260   %or = or <4 x i32> %shuf1, %shuf2
 261   ret <4 x i32> %or
 262 }
 263
 264
 265 define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
 266 ; CHECK-LABEL: test19:
 267 ; CHECK:       # %bb.0:
 268 ; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,0,2,3]
 269 ; CHECK-NEXT:    pxor %xmm3, %xmm3
 270 ; CHECK-NEXT:    pblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
 271 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,2,2]
 272 ; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5,6,7]
 273 ; CHECK-NEXT:    por %xmm2, %xmm0
 274 ; CHECK-NEXT:    retq
 275   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 3>
 276   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 2, i32 2>
 277   %or = or <4 x i32> %shuf1, %shuf2
 278   ret <4 x i32> %or
 279 }
 280
 281
 282 define <2 x i64> @test20(<2 x i64> %a, <2 x i64> %b) {
 283 ; CHECK-LABEL: test20:
 284 ; CHECK:       # %bb.0:
 285 ; CHECK-NEXT:    por %xmm1, %xmm0
 286 ; CHECK-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
 287 ; CHECK-NEXT:    retq
 288   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
 289   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
 290   %or = or <2 x i64> %shuf1, %shuf2
 291   ret <2 x i64> %or
 292 }
 293
 294
 295 define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) {
 296 ; CHECK-LABEL: test21:
 297 ; CHECK:       # %bb.0:
 298 ; CHECK-NEXT:    por %xmm1, %xmm0
 299 ; CHECK-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
 300 ; CHECK-NEXT:    retq
 301   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
 302   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
 303   %or = or <2 x i64> %shuf1, %shuf2
 304   ret <2 x i64> %or
 305 }
 306
 307
 308 ; Verify that the dag-combiner keeps the correct domain for float/double vectors
 309 ; bitcast to use the mask-or blend combine.
 310
 311 define <2 x double> @test22(<2 x double> %a0, <2 x double> %a1) {
 312 ; CHECK-LABEL: test22:
 313 ; CHECK:       # %bb.0:
 314 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 315 ; CHECK-NEXT:    retq
 316   %bc1 = bitcast <2 x double> %a0 to <2 x i64>
 317   %bc2 = bitcast <2 x double> %a1 to <2 x i64>
 318   %and1 = and <2 x i64> %bc1, <i64 0, i64 -1>
 319   %and2 = and <2 x i64> %bc2, <i64 -1, i64 0>
 320   %or = or <2 x i64> %and1, %and2
 321   %bc3 = bitcast <2 x i64> %or to <2 x double>
 322   ret <2 x double> %bc3
 323 }
 324
 325
 326 define <4 x float> @test23(<4 x float> %a0, <4 x float> %a1) {
 327 ; CHECK-LABEL: test23:
 328 ; CHECK:       # %bb.0:
 329 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
 330 ; CHECK-NEXT:    retq
 331   %bc1 = bitcast <4 x float> %a0 to <4 x i32>
 332   %bc2 = bitcast <4 x float> %a1 to <4 x i32>
 333   %and1 = and <4 x i32> %bc1, <i32 0, i32 -1, i32 -1, i32 0>
 334   %and2 = and <4 x i32> %bc2, <i32 -1, i32 0, i32 0, i32 -1>
 335   %or = or <4 x i32> %and1, %and2
 336   %bc3 = bitcast <4 x i32> %or to <4 x float>
 337   ret <4 x float> %bc3
 338 }
 339
 340
 341 define <4 x float> @test24(<4 x float> %a0, <4 x float> %a1) {
 342 ; CHECK-LABEL: test24:
 343 ; CHECK:       # %bb.0:
 344 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 345 ; CHECK-NEXT:    retq
 346   %bc1 = bitcast <4 x float> %a0 to <2 x i64>
 347   %bc2 = bitcast <4 x float> %a1 to <2 x i64>
 348   %and1 = and <2 x i64> %bc1, <i64 0, i64 -1>
 349   %and2 = and <2 x i64> %bc2, <i64 -1, i64 0>
 350   %or = or <2 x i64> %and1, %and2
 351   %bc3 = bitcast <2 x i64> %or to <4 x float>
 352   ret <4 x float> %bc3
 353 }
 354
 355
 356 define <4 x float> @test25(<4 x float> %a0) {
 357 ; CHECK-LABEL: test25:
 358 ; CHECK:       # %bb.0:
 359 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = mem[0],xmm0[1,2],mem[3]
 360 ; CHECK-NEXT:    retq
 361   %bc1 = bitcast <4 x float> %a0 to <4 x i32>
 362   %bc2 = bitcast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0> to <4 x i32>
 363   %and1 = and <4 x i32> %bc1, <i32 0, i32 -1, i32 -1, i32 0>
 364   %and2 = and <4 x i32> %bc2, <i32 -1, i32 0, i32 0, i32 -1>
 365   %or = or <4 x i32> %and1, %and2
 366   %bc3 = bitcast <4 x i32> %or to <4 x float>
 367   ret <4 x float> %bc3
 368 }
 369
 370
 371 ; Verify that the DAGCombiner doesn't crash in the attempt to check if a shuffle
 372 ; with illegal type has a legal mask. Method 'isShuffleMaskLegal' only knows how to
 373 ; handle legal vector value types.
 374 define <4 x i8> @test_crash(<4 x i8> %a, <4 x i8> %b) {
 375 ; CHECK-LABEL: test_crash:
 376 ; CHECK:       # %bb.0:
 377 ; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3,4,5,6,7]
 378 ; CHECK-NEXT:    retq
 379   %shuf1 = shufflevector <4 x i8> %a, <4 x i8> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
 380   %shuf2 = shufflevector <4 x i8> %b, <4 x i8> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
 381   %or = or <4 x i8> %shuf1, %shuf2
 382   ret <4 x i8> %or
 383 }
 384
 385 ; Verify that we can fold regardless of which operand is the zeroinitializer
 386
 387 define <4 x i32> @test2b(<4 x i32> %a, <4 x i32> %b) {
 388 ; CHECK-LABEL: test2b:
 389 ; CHECK:       # %bb.0:
 390 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 391 ; CHECK-NEXT:    retq
 392   %shuf1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32><i32 0, i32 0, i32 6, i32 7>
 393   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
 394   %or = or <4 x i32> %shuf1, %shuf2
 395   ret <4 x i32> %or
 396 }
 397
 398 define <4 x i32> @test2c(<4 x i32> %a, <4 x i32> %b) {
 399 ; CHECK-LABEL: test2c:
 400 ; CHECK:       # %bb.0:
 401 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 402 ; CHECK-NEXT:    retq
 403   %shuf1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32><i32 0, i32 0, i32 6, i32 7>
 404   %shuf2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %b, <4 x i32><i32 4, i32 5, i32 0, i32 0>
 405   %or = or <4 x i32> %shuf1, %shuf2
 406   ret <4 x i32> %or
 407 }
 408
 409
 410 define <4 x i32> @test2d(<4 x i32> %a, <4 x i32> %b) {
 411 ; CHECK-LABEL: test2d:
 412 ; CHECK:       # %bb.0:
 413 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 414 ; CHECK-NEXT:    retq
 415   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
 416   %shuf2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %b, <4 x i32><i32 4, i32 5, i32 0, i32 0>
 417   %or = or <4 x i32> %shuf1, %shuf2
 418   ret <4 x i32> %or
 419 }
 420
 421 ; Make sure we can have an undef where an index pointing to the zero vector should be
 422
 423 define <4 x i32> @test2e(<4 x i32> %a, <4 x i32> %b) {
 424 ; CHECK-LABEL: test2e:
 425 ; CHECK:       # %bb.0:
 426 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 427 ; CHECK-NEXT:    retq
 428   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 undef, i32 4, i32 2, i32 3>
 429   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 0, i32 1, i32 4, i32 4>
 430   %or = or <4 x i32> %shuf1, %shuf2
 431   ret <4 x i32> %or
 432 }
 433
 434 define <4 x i32> @test2f(<4 x i32> %a, <4 x i32> %b) {
 435 ; CHECK-LABEL: test2f:
 436 ; CHECK:       # %bb.0:
 437 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 438 ; CHECK-NEXT:    retq
 439   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 4, i32 4, i32 2, i32 3>
 440   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 undef, i32 1, i32 4, i32 4>
 441   %or = or <4 x i32> %shuf1, %shuf2
 442   ret <4 x i32> %or
 443 }
 444
 445 ; (or (and X, c1), c2) -> (and (or X, c2), c1|c2) iff (c1 & c2) != 0
 446
 447 define <2 x i64> @or_and_v2i64(<2 x i64> %a0) {
 448 ; CHECK-LABEL: or_and_v2i64:
 449 ; CHECK:       # %bb.0:
 450 ; CHECK-NEXT:    orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 451 ; CHECK-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 452 ; CHECK-NEXT:    retq
 453   %1 = and <2 x i64> %a0, <i64 7, i64 7>
 454   %2 = or <2 x i64> %1, <i64 3, i64 3>
 455   ret <2 x i64> %2
 456 }
 457
 458 define <4 x i32> @or_and_v4i32(<4 x i32> %a0) {
 459 ; CHECK-LABEL: or_and_v4i32:
 460 ; CHECK:       # %bb.0:
 461 ; CHECK-NEXT:    orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 462 ; CHECK-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 463 ; CHECK-NEXT:    retq
 464   %1 = and <4 x i32> %a0, <i32 1, i32 3, i32 5, i32 7>
 465   %2 = or <4 x i32> %1, <i32 3, i32 2, i32 15, i32 2>
 466   ret <4 x i32> %2
 467 }
 468
 469 ; If all masked bits are going to be set, that's a constant fold.
 470
 471 define <4 x i32> @or_and_v4i32_fold(<4 x i32> %a0) {
 472 ; CHECK-LABEL: or_and_v4i32_fold:
 473 ; CHECK:       # %bb.0:
 474 ; CHECK-NEXT:    movaps {{.*#+}} xmm0 = [3,3,3,3]
 475 ; CHECK-NEXT:    retq
 476   %1 = and <4 x i32> %a0, <i32 1, i32 1, i32 1, i32 1>
 477   %2 = or <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
 478   ret <4 x i32> %2
 479 }
 480
 481 ; fold (or x, c) -> c iff (x & ~c) == 0
 482
 483 define <2 x i64> @or_zext_v2i32(<2 x i32> %a0) {
 484 ; CHECK-LABEL: or_zext_v2i32:
 485 ; CHECK:       # %bb.0:
 486 ; CHECK-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,4294967295]
 487 ; CHECK-NEXT:    retq
 488   %1 = zext <2 x i32> %a0 to <2 x i64>
 489   %2 = or <2 x i64> %1, <i64 4294967295, i64 4294967295>
 490   ret <2 x i64> %2
 491 }
 492
 493 define <4 x i32> @or_zext_v4i16(<4 x i16> %a0) {
 494 ; CHECK-LABEL: or_zext_v4i16:
 495 ; CHECK:       # %bb.0:
 496 ; CHECK-NEXT:    movaps {{.*#+}} xmm0 = [65535,65535,65535,65535]
 497 ; CHECK-NEXT:    retq
 498   %1 = zext <4 x i16> %a0 to <4 x i32>
 499   %2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
 500   ret <4 x i32> %2
 501 }
 502
 503 ; fold (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
 504
 505 define i32 @or_and_and_i32(i32 %x, i32 %y) {
 506 ; CHECK-LABEL: or_and_and_i32:
 507 ; CHECK:       # %bb.0:
 508 ; CHECK-NEXT:    movl %edi, %eax
 509 ; CHECK-NEXT:    andl $-11, %esi
 510 ; CHECK-NEXT:    andl $-3, %eax
 511 ; CHECK-NEXT:    orl %esi, %eax
 512 ; CHECK-NEXT:    retq
 513   %xy = or i32 %x, %y
 514   %mx = and i32 %x, 8
 515   %mxy = and i32 %xy, -11
 516   %r = or i32 %mx, %mxy
 517   ret i32 %r
 518 }
 519
 520 define i64 @or_and_and_commute_i64(i64 %x, i64 %y) {
 521 ; CHECK-LABEL: or_and_and_commute_i64:
 522 ; CHECK:       # %bb.0:
 523 ; CHECK-NEXT:    movq %rdi, %rax
 524 ; CHECK-NEXT:    orq %rsi, %rax
 525 ; CHECK-NEXT:    andq $-3, %rax
 526 ; CHECK-NEXT:    retq
 527   %xy = or i64 %x, %y
 528   %mx = and i64 %x, 8
 529   %mxy = and i64 %xy, -3
 530   %r = or i64 %mxy, %mx
 531   ret i64 %r
 532 }
 533
 534 define <4 x i32> @or_and_and_v4i32(<4 x i32> %x, <4 x i32> %y) {
 535 ; CHECK-LABEL: or_and_and_v4i32:
 536 ; CHECK:       # %bb.0:
 537 ; CHECK-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
 538 ; CHECK-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 539 ; CHECK-NEXT:    orps %xmm1, %xmm0
 540 ; CHECK-NEXT:    retq
 541   %xy = or <4 x i32> %x, %y
 542   %mx = and <4 x i32> %x, <i32 2, i32 4, i32 8, i32 16>
 543   %mxy = and <4 x i32> %xy, <i32 1, i32 -1, i32 -5, i32 -25>
 544   %r = or <4 x i32> %mx, %mxy
 545   ret <4 x i32> %r
 546 }
 547
 548 define i32 @or_and_and_multiuse_i32(i32 %x, i32 %y) nounwind {
 549 ; CHECK-LABEL: or_and_and_multiuse_i32:
 550 ; CHECK:       # %bb.0:
 551 ; CHECK-NEXT:    pushq %rbx
 552 ; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
 553 ; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
 554 ; CHECK-NEXT:    orl %edi, %esi
 555 ; CHECK-NEXT:    andl $8, %edi
 556 ; CHECK-NEXT:    andl $-11, %esi
 557 ; CHECK-NEXT:    leal (%rdi,%rsi), %ebx
 558 ; CHECK-NEXT:    movl %esi, %edi
 559 ; CHECK-NEXT:    callq use_i32@PLT
 560 ; CHECK-NEXT:    movl %ebx, %eax
 561 ; CHECK-NEXT:    popq %rbx
 562 ; CHECK-NEXT:    retq
 563   %xy = or i32 %x, %y
 564   %mx = and i32 %x, 8
 565   %mxy = and i32 %xy, -11
 566   %r = or i32 %mx, %mxy
 567   call void @use_i32(i32 %mxy)
 568   ret i32 %r
 569 }
 570
 571 define i32 @or_and_multiuse_and_i32(i32 %x, i32 %y) nounwind {
 572 ; CHECK-LABEL: or_and_multiuse_and_i32:
 573 ; CHECK:       # %bb.0:
 574 ; CHECK-NEXT:    pushq %rbx
 575 ; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
 576 ; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
 577 ; CHECK-NEXT:    orl %edi, %esi
 578 ; CHECK-NEXT:    andl $8, %edi
 579 ; CHECK-NEXT:    andl $-11, %esi
 580 ; CHECK-NEXT:    leal (%rsi,%rdi), %ebx
 581 ; CHECK-NEXT:    # kill: def $edi killed $edi killed $rdi
 582 ; CHECK-NEXT:    callq use_i32@PLT
 583 ; CHECK-NEXT:    movl %ebx, %eax
 584 ; CHECK-NEXT:    popq %rbx
 585 ; CHECK-NEXT:    retq
 586   %xy = or i32 %x, %y
 587   %mx = and i32 %x, 8
 588   %mxy = and i32 %xy, -11
 589   %r = or i32 %mx, %mxy
 590   call void @use_i32(i32 %mx)
 591   ret i32 %r
 592 }
 593
 594 define i32 @or_and_multiuse_and_multiuse_i32(i32 %x, i32 %y) nounwind {
 595 ; CHECK-LABEL: or_and_multiuse_and_multiuse_i32:
 596 ; CHECK:       # %bb.0:
 597 ; CHECK-NEXT:    pushq %rbp
 598 ; CHECK-NEXT:    pushq %rbx
 599 ; CHECK-NEXT:    pushq %rax
 600 ; CHECK-NEXT:    movl %esi, %ebx
 601 ; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
 602 ; CHECK-NEXT:    orl %edi, %ebx
 603 ; CHECK-NEXT:    andl $8, %edi
 604 ; CHECK-NEXT:    andl $-11, %ebx
 605 ; CHECK-NEXT:    leal (%rdi,%rbx), %ebp
 606 ; CHECK-NEXT:    # kill: def $edi killed $edi killed $rdi
 607 ; CHECK-NEXT:    callq use_i32@PLT
 608 ; CHECK-NEXT:    movl %ebx, %edi
 609 ; CHECK-NEXT:    callq use_i32@PLT
 610 ; CHECK-NEXT:    movl %ebp, %eax
 611 ; CHECK-NEXT:    addq $8, %rsp
 612 ; CHECK-NEXT:    popq %rbx
 613 ; CHECK-NEXT:    popq %rbp
 614 ; CHECK-NEXT:    retq
 615   %xy = or i32 %x, %y
 616   %mx = and i32 %x, 8
 617   %mxy = and i32 %xy, -11
 618   %r = or i32 %mx, %mxy
 619   call void @use_i32(i32 %mx)
 620   call void @use_i32(i32 %mxy)
 621   ret i32 %r
 622 }
 623
 624 declare void @use_i32(i32)