test/CodeGen/X86/combine-or.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
   3
   4 define i32 @or_self(i32 %x) {
   5 ; CHECK-LABEL: or_self:
   6 ; CHECK:       # %bb.0:
   7 ; CHECK-NEXT:    movl %edi, %eax
   8 ; CHECK-NEXT:    retq
   9   %or = or i32 %x, %x
  10   ret i32 %or
  11 }
  12
  13 define <4 x i32> @or_self_vec(<4 x i32> %x) {
  14 ; CHECK-LABEL: or_self_vec:
  15 ; CHECK:       # %bb.0:
  16 ; CHECK-NEXT:    retq
  17   %or = or <4 x i32> %x, %x
  18   ret <4 x i32> %or
  19 }
  20
  21 ; Verify that each of the following test cases is folded into a single
  22 ; instruction which performs a blend operation.
  23
  24 define <2 x i64> @test1(<2 x i64> %a, <2 x i64> %b) {
  25 ; CHECK-LABEL: test1:
  26 ; CHECK:       # %bb.0:
  27 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
  28 ; CHECK-NEXT:    retq
  29   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
  30   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>
  31   %or = or <2 x i64> %shuf1, %shuf2
  32   ret <2 x i64> %or
  33 }
  34
  35
  36 define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) {
  37 ; CHECK-LABEL: test2:
  38 ; CHECK:       # %bb.0:
  39 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
  40 ; CHECK-NEXT:    retq
  41   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
  42   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
  43   %or = or <4 x i32> %shuf1, %shuf2
  44   ret <4 x i32> %or
  45 }
  46
  47
  48 define <2 x i64> @test3(<2 x i64> %a, <2 x i64> %b) {
  49 ; CHECK-LABEL: test3:
  50 ; CHECK:       # %bb.0:
  51 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
  52 ; CHECK-NEXT:    retq
  53   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>
  54   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
  55   %or = or <2 x i64> %shuf1, %shuf2
  56   ret <2 x i64> %or
  57 }
  58
  59
  60 define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) {
  61 ; CHECK-LABEL: test4:
  62 ; CHECK:       # %bb.0:
  63 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
  64 ; CHECK-NEXT:    retq
  65   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
  66   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
  67   %or = or <4 x i32> %shuf1, %shuf2
  68   ret <4 x i32> %or
  69 }
  70
  71
  72 define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) {
  73 ; CHECK-LABEL: test5:
  74 ; CHECK:       # %bb.0:
  75 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
  76 ; CHECK-NEXT:    retq
  77   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
  78   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
  79   %or = or <4 x i32> %shuf1, %shuf2
  80   ret <4 x i32> %or
  81 }
  82
  83
  84 define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) {
  85 ; CHECK-LABEL: test6:
  86 ; CHECK:       # %bb.0:
  87 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
  88 ; CHECK-NEXT:    retq
  89   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
  90   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
  91   %or = or <4 x i32> %shuf1, %shuf2
  92   ret <4 x i32> %or
  93 }
  94
  95
  96 define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) {
  97 ; CHECK-LABEL: test7:
  98 ; CHECK:       # %bb.0:
  99 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
 100 ; CHECK-NEXT:    retq
 101   %and1 = and <4 x i32> %a, <i32 -1, i32 -1, i32 0, i32 0>
 102   %and2 = and <4 x i32> %b, <i32 0, i32 0, i32 -1, i32 -1>
 103   %or = or <4 x i32> %and1, %and2
 104   ret <4 x i32> %or
 105 }
 106
 107
 108 define <2 x i64> @test8(<2 x i64> %a, <2 x i64> %b) {
 109 ; CHECK-LABEL: test8:
 110 ; CHECK:       # %bb.0:
 111 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
 112 ; CHECK-NEXT:    retq
 113   %and1 = and <2 x i64> %a, <i64 -1, i64 0>
 114   %and2 = and <2 x i64> %b, <i64 0, i64 -1>
 115   %or = or <2 x i64> %and1, %and2
 116   ret <2 x i64> %or
 117 }
 118
 119
 120 define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) {
 121 ; CHECK-LABEL: test9:
 122 ; CHECK:       # %bb.0:
 123 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 124 ; CHECK-NEXT:    retq
 125   %and1 = and <4 x i32> %a, <i32 0, i32 0, i32 -1, i32 -1>
 126   %and2 = and <4 x i32> %b, <i32 -1, i32 -1, i32 0, i32 0>
 127   %or = or <4 x i32> %and1, %and2
 128   ret <4 x i32> %or
 129 }
 130
 131
 132 define <2 x i64> @test10(<2 x i64> %a, <2 x i64> %b) {
 133 ; CHECK-LABEL: test10:
 134 ; CHECK:       # %bb.0:
 135 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 136 ; CHECK-NEXT:    retq
 137   %and1 = and <2 x i64> %a, <i64 0, i64 -1>
 138   %and2 = and <2 x i64> %b, <i64 -1, i64 0>
 139   %or = or <2 x i64> %and1, %and2
 140   ret <2 x i64> %or
 141 }
 142
 143
 144 define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) {
 145 ; CHECK-LABEL: test11:
 146 ; CHECK:       # %bb.0:
 147 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
 148 ; CHECK-NEXT:    retq
 149   %and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0>
 150   %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 -1>
 151   %or = or <4 x i32> %and1, %and2
 152   ret <4 x i32> %or
 153 }
 154
 155
 156 define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) {
 157 ; CHECK-LABEL: test12:
 158 ; CHECK:       # %bb.0:
 159 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
 160 ; CHECK-NEXT:    retq
 161   %and1 = and <4 x i32> %a, <i32 0, i32 -1, i32 -1, i32 -1>
 162   %and2 = and <4 x i32> %b, <i32 -1, i32 0, i32 0, i32 0>
 163   %or = or <4 x i32> %and1, %and2
 164   ret <4 x i32> %or
 165 }
 166
 167
 168 ; Verify that the following test cases are folded into single shuffles.
 169
 170 define <4 x i32> @test13(<4 x i32> %a, <4 x i32> %b) {
 171 ; CHECK-LABEL: test13:
 172 ; CHECK:       # %bb.0:
 173 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
 174 ; CHECK-NEXT:    retq
 175   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 1, i32 1, i32 4, i32 4>
 176   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
 177   %or = or <4 x i32> %shuf1, %shuf2
 178   ret <4 x i32> %or
 179 }
 180
 181
 182 define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b) {
 183 ; CHECK-LABEL: test14:
 184 ; CHECK:       # %bb.0:
 185 ; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 186 ; CHECK-NEXT:    retq
 187   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
 188   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
 189   %or = or <2 x i64> %shuf1, %shuf2
 190   ret <2 x i64> %or
 191 }
 192
 193
 194 define <4 x i32> @test15(<4 x i32> %a, <4 x i32> %b) {
 195 ; CHECK-LABEL: test15:
 196 ; CHECK:       # %bb.0:
 197 ; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,1],xmm0[2,1]
 198 ; CHECK-NEXT:    movaps %xmm1, %xmm0
 199 ; CHECK-NEXT:    retq
 200   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 1>
 201   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 2, i32 1, i32 4, i32 4>
 202   %or = or <4 x i32> %shuf1, %shuf2
 203   ret <4 x i32> %or
 204 }
 205
 206
 207 define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) {
 208 ; CHECK-LABEL: test16:
 209 ; CHECK:       # %bb.0:
 210 ; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
 211 ; CHECK-NEXT:    movaps %xmm1, %xmm0
 212 ; CHECK-NEXT:    retq
 213   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
 214   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
 215   %or = or <2 x i64> %shuf1, %shuf2
 216   ret <2 x i64> %or
 217 }
 218
 219
 220 ; Verify that the dag-combiner does not fold a OR of two shuffles into a single
 221 ; shuffle instruction when the shuffle indexes are not compatible.
 222
 223 define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
 224 ; CHECK-LABEL: test17:
 225 ; CHECK:       # %bb.0:
 226 ; CHECK-NEXT:    psllq $32, %xmm0
 227 ; CHECK-NEXT:    movq {{.*#+}} xmm1 = xmm1[0],zero
 228 ; CHECK-NEXT:    por %xmm1, %xmm0
 229 ; CHECK-NEXT:    retq
 230   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2>
 231   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
 232   %or = or <4 x i32> %shuf1, %shuf2
 233   ret <4 x i32> %or
 234 }
 235
 236
 237 define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) {
 238 ; CHECK-LABEL: test18:
 239 ; CHECK:       # %bb.0:
 240 ; CHECK-NEXT:    pxor %xmm2, %xmm2
 241 ; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
 242 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
 243 ; CHECK-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
 244 ; CHECK-NEXT:    por %xmm1, %xmm0
 245 ; CHECK-NEXT:    retq
 246   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 4>
 247   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
 248   %or = or <4 x i32> %shuf1, %shuf2
 249   ret <4 x i32> %or
 250 }
 251
 252
 253 define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
 254 ; CHECK-LABEL: test19:
 255 ; CHECK:       # %bb.0:
 256 ; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,0,2,3]
 257 ; CHECK-NEXT:    pxor %xmm3, %xmm3
 258 ; CHECK-NEXT:    pblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
 259 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,2,2]
 260 ; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5,6,7]
 261 ; CHECK-NEXT:    por %xmm2, %xmm0
 262 ; CHECK-NEXT:    retq
 263   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 3>
 264   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 2, i32 2>
 265   %or = or <4 x i32> %shuf1, %shuf2
 266   ret <4 x i32> %or
 267 }
 268
 269
 270 define <2 x i64> @test20(<2 x i64> %a, <2 x i64> %b) {
 271 ; CHECK-LABEL: test20:
 272 ; CHECK:       # %bb.0:
 273 ; CHECK-NEXT:    por %xmm1, %xmm0
 274 ; CHECK-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
 275 ; CHECK-NEXT:    retq
 276   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
 277   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
 278   %or = or <2 x i64> %shuf1, %shuf2
 279   ret <2 x i64> %or
 280 }
 281
 282
 283 define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) {
 284 ; CHECK-LABEL: test21:
 285 ; CHECK:       # %bb.0:
 286 ; CHECK-NEXT:    por %xmm1, %xmm0
 287 ; CHECK-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
 288 ; CHECK-NEXT:    retq
 289   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
 290   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
 291   %or = or <2 x i64> %shuf1, %shuf2
 292   ret <2 x i64> %or
 293 }
 294
 295
 296 ; Verify that the dag-combiner keeps the correct domain for float/double vectors
 297 ; bitcast to use the mask-or blend combine.
 298
 299 define <2 x double> @test22(<2 x double> %a0, <2 x double> %a1) {
 300 ; CHECK-LABEL: test22:
 301 ; CHECK:       # %bb.0:
 302 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 303 ; CHECK-NEXT:    retq
 304   %bc1 = bitcast <2 x double> %a0 to <2 x i64>
 305   %bc2 = bitcast <2 x double> %a1 to <2 x i64>
 306   %and1 = and <2 x i64> %bc1, <i64 0, i64 -1>
 307   %and2 = and <2 x i64> %bc2, <i64 -1, i64 0>
 308   %or = or <2 x i64> %and1, %and2
 309   %bc3 = bitcast <2 x i64> %or to <2 x double>
 310   ret <2 x double> %bc3
 311 }
 312
 313
 314 define <4 x float> @test23(<4 x float> %a0, <4 x float> %a1) {
 315 ; CHECK-LABEL: test23:
 316 ; CHECK:       # %bb.0:
 317 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
 318 ; CHECK-NEXT:    retq
 319   %bc1 = bitcast <4 x float> %a0 to <4 x i32>
 320   %bc2 = bitcast <4 x float> %a1 to <4 x i32>
 321   %and1 = and <4 x i32> %bc1, <i32 0, i32 -1, i32 -1, i32 0>
 322   %and2 = and <4 x i32> %bc2, <i32 -1, i32 0, i32 0, i32 -1>
 323   %or = or <4 x i32> %and1, %and2
 324   %bc3 = bitcast <4 x i32> %or to <4 x float>
 325   ret <4 x float> %bc3
 326 }
 327
 328
 329 define <4 x float> @test24(<4 x float> %a0, <4 x float> %a1) {
 330 ; CHECK-LABEL: test24:
 331 ; CHECK:       # %bb.0:
 332 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 333 ; CHECK-NEXT:    retq
 334   %bc1 = bitcast <4 x float> %a0 to <2 x i64>
 335   %bc2 = bitcast <4 x float> %a1 to <2 x i64>
 336   %and1 = and <2 x i64> %bc1, <i64 0, i64 -1>
 337   %and2 = and <2 x i64> %bc2, <i64 -1, i64 0>
 338   %or = or <2 x i64> %and1, %and2
 339   %bc3 = bitcast <2 x i64> %or to <4 x float>
 340   ret <4 x float> %bc3
 341 }
 342
 343
 344 define <4 x float> @test25(<4 x float> %a0) {
 345 ; CHECK-LABEL: test25:
 346 ; CHECK:       # %bb.0:
 347 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = mem[0],xmm0[1,2],mem[3]
 348 ; CHECK-NEXT:    retq
 349   %bc1 = bitcast <4 x float> %a0 to <4 x i32>
 350   %bc2 = bitcast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0> to <4 x i32>
 351   %and1 = and <4 x i32> %bc1, <i32 0, i32 -1, i32 -1, i32 0>
 352   %and2 = and <4 x i32> %bc2, <i32 -1, i32 0, i32 0, i32 -1>
 353   %or = or <4 x i32> %and1, %and2
 354   %bc3 = bitcast <4 x i32> %or to <4 x float>
 355   ret <4 x float> %bc3
 356 }
 357
 358
 359 ; Verify that the DAGCombiner doesn't crash in the attempt to check if a shuffle
 360 ; with illegal type has a legal mask. Method 'isShuffleMaskLegal' only knows how to
 361 ; handle legal vector value types.
 362 define <4 x i8> @test_crash(<4 x i8> %a, <4 x i8> %b) {
 363 ; CHECK-LABEL: test_crash:
 364 ; CHECK:       # %bb.0:
 365 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 366 ; CHECK-NEXT:    retq
 367   %shuf1 = shufflevector <4 x i8> %a, <4 x i8> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
 368   %shuf2 = shufflevector <4 x i8> %b, <4 x i8> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
 369   %or = or <4 x i8> %shuf1, %shuf2
 370   ret <4 x i8> %or
 371 }
 372
 373 ; Verify that we can fold regardless of which operand is the zeroinitializer
 374
 375 define <4 x i32> @test2b(<4 x i32> %a, <4 x i32> %b) {
 376 ; CHECK-LABEL: test2b:
 377 ; CHECK:       # %bb.0:
 378 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 379 ; CHECK-NEXT:    retq
 380   %shuf1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32><i32 0, i32 0, i32 6, i32 7>
 381   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
 382   %or = or <4 x i32> %shuf1, %shuf2
 383   ret <4 x i32> %or
 384 }
 385
 386 define <4 x i32> @test2c(<4 x i32> %a, <4 x i32> %b) {
 387 ; CHECK-LABEL: test2c:
 388 ; CHECK:       # %bb.0:
 389 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 390 ; CHECK-NEXT:    retq
 391   %shuf1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32><i32 0, i32 0, i32 6, i32 7>
 392   %shuf2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %b, <4 x i32><i32 4, i32 5, i32 0, i32 0>
 393   %or = or <4 x i32> %shuf1, %shuf2
 394   ret <4 x i32> %or
 395 }
 396
 397
 398 define <4 x i32> @test2d(<4 x i32> %a, <4 x i32> %b) {
 399 ; CHECK-LABEL: test2d:
 400 ; CHECK:       # %bb.0:
 401 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 402 ; CHECK-NEXT:    retq
 403   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
 404   %shuf2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %b, <4 x i32><i32 4, i32 5, i32 0, i32 0>
 405   %or = or <4 x i32> %shuf1, %shuf2
 406   ret <4 x i32> %or
 407 }
 408
 409 ; Make sure we can have an undef where an index pointing to the zero vector should be
 410
 411 define <4 x i32> @test2e(<4 x i32> %a, <4 x i32> %b) {
 412 ; CHECK-LABEL: test2e:
 413 ; CHECK:       # %bb.0:
 414 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 415 ; CHECK-NEXT:    retq
 416   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 undef, i32 4, i32 2, i32 3>
 417   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 0, i32 1, i32 4, i32 4>
 418   %or = or <4 x i32> %shuf1, %shuf2
 419   ret <4 x i32> %or
 420 }
 421
 422 define <4 x i32> @test2f(<4 x i32> %a, <4 x i32> %b) {
 423 ; CHECK-LABEL: test2f:
 424 ; CHECK:       # %bb.0:
 425 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 426 ; CHECK-NEXT:    retq
 427   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 4, i32 4, i32 2, i32 3>
 428   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 undef, i32 1, i32 4, i32 4>
 429   %or = or <4 x i32> %shuf1, %shuf2
 430   ret <4 x i32> %or
 431 }
 432
 433 ; (or (and X, c1), c2) -> (and (or X, c2), c1|c2) iff (c1 & c2) != 0
 434
 435 define <2 x i64> @or_and_v2i64(<2 x i64> %a0) {
 436 ; CHECK-LABEL: or_and_v2i64:
 437 ; CHECK:       # %bb.0:
 438 ; CHECK-NEXT:    orps {{.*}}(%rip), %xmm0
 439 ; CHECK-NEXT:    andps {{.*}}(%rip), %xmm0
 440 ; CHECK-NEXT:    retq
 441   %1 = and <2 x i64> %a0, <i64 7, i64 7>
 442   %2 = or <2 x i64> %1, <i64 3, i64 3>
 443   ret <2 x i64> %2
 444 }
 445
 446 define <4 x i32> @or_and_v4i32(<4 x i32> %a0) {
 447 ; CHECK-LABEL: or_and_v4i32:
 448 ; CHECK:       # %bb.0:
 449 ; CHECK-NEXT:    orps {{.*}}(%rip), %xmm0
 450 ; CHECK-NEXT:    andps {{.*}}(%rip), %xmm0
 451 ; CHECK-NEXT:    retq
 452   %1 = and <4 x i32> %a0, <i32 1, i32 3, i32 5, i32 7>
 453   %2 = or <4 x i32> %1, <i32 3, i32 2, i32 15, i32 2>
 454   ret <4 x i32> %2
 455 }
 456
 457 ; If all masked bits are going to be set, that's a constant fold.
 458
 459 define <4 x i32> @or_and_v4i32_fold(<4 x i32> %a0) {
 460 ; CHECK-LABEL: or_and_v4i32_fold:
 461 ; CHECK:       # %bb.0:
 462 ; CHECK-NEXT:    movaps {{.*#+}} xmm0 = [3,3,3,3]
 463 ; CHECK-NEXT:    retq
 464   %1 = and <4 x i32> %a0, <i32 1, i32 1, i32 1, i32 1>
 465   %2 = or <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
 466   ret <4 x i32> %2
 467 }
 468
 469 ; fold (or x, c) -> c iff (x & ~c) == 0
 470
 471 define <2 x i64> @or_zext_v2i32(<2 x i32> %a0) {
 472 ; CHECK-LABEL: or_zext_v2i32:
 473 ; CHECK:       # %bb.0:
 474 ; CHECK-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,4294967295]
 475 ; CHECK-NEXT:    retq
 476   %1 = zext <2 x i32> %a0 to <2 x i64>
 477   %2 = or <2 x i64> %1, <i64 4294967295, i64 4294967295>
 478   ret <2 x i64> %2
 479 }
 480
 481 define <4 x i32> @or_zext_v4i16(<4 x i16> %a0) {
 482 ; CHECK-LABEL: or_zext_v4i16:
 483 ; CHECK:       # %bb.0:
 484 ; CHECK-NEXT:    movaps {{.*#+}} xmm0 = [65535,65535,65535,65535]
 485 ; CHECK-NEXT:    retq
 486   %1 = zext <4 x i16> %a0 to <4 x i32>
 487   %2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
 488   ret <4 x i32> %2
 489 }
 490