test/CodeGen/X86/vector-shuffle-combining-sse4a.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3,+sse4a | FileCheck %s --check-prefixes=CHECK,SSE,SSSE3
   3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2,+sse4a | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
   4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,+sse4a| FileCheck %s --check-prefixes=CHECK,AVX,AVX1
   5 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2,+sse4a | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
   6 ;
   7 ; Combine tests involving SSE4A target shuffles (EXTRQI,INSERTQI)
   8
   9 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
  10
  11 define <16 x i8> @combine_extrqi_pshufb_16i8(<16 x i8> %a0) {
  12 ; CHECK-LABEL: combine_extrqi_pshufb_16i8:
  13 ; CHECK:       # %bb.0:
  14 ; CHECK-NEXT:    extrq {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
  15 ; CHECK-NEXT:    retq
  16   %1 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  17   %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
  18   ret <16 x i8> %2
  19 }
  20
  21 define <8 x i16> @combine_extrqi_pshufb_8i16(<8 x i16> %a0) {
  22 ; CHECK-LABEL: combine_extrqi_pshufb_8i16:
  23 ; CHECK:       # %bb.0:
  24 ; CHECK-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
  25 ; CHECK-NEXT:    retq
  26   %1 = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
  27   %2 = bitcast <8 x i16> %1 to <16 x i8>
  28   %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 0, i8 1, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
  29   %4 = bitcast <16 x i8> %3 to <8 x i16>
  30   ret <8 x i16> %4
  31 }
  32
  33 define <16 x i8> @combine_insertqi_pshufb_16i8(<16 x i8> %a0, <16 x i8> %a1) {
  34 ; SSSE3-LABEL: combine_insertqi_pshufb_16i8:
  35 ; SSSE3:       # %bb.0:
  36 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
  37 ; SSSE3-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
  38 ; SSSE3-NEXT:    retq
  39 ;
  40 ; SSE42-LABEL: combine_insertqi_pshufb_16i8:
  41 ; SSE42:       # %bb.0:
  42 ; SSE42-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
  43 ; SSE42-NEXT:    retq
  44 ;
  45 ; AVX-LABEL: combine_insertqi_pshufb_16i8:
  46 ; AVX:       # %bb.0:
  47 ; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
  48 ; AVX-NEXT:    retq
  49   %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 16, i32 17, i32 18, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  50   %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
  51   ret <16 x i8> %2
  52 }
  53
  54 define <8 x i16> @combine_insertqi_pshufb_8i16(<8 x i16> %a0, <8 x i16> %a1) {
  55 ; SSSE3-LABEL: combine_insertqi_pshufb_8i16:
  56 ; SSSE3:       # %bb.0:
  57 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
  58 ; SSSE3-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
  59 ; SSSE3-NEXT:    retq
  60 ;
  61 ; SSE42-LABEL: combine_insertqi_pshufb_8i16:
  62 ; SSE42:       # %bb.0:
  63 ; SSE42-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
  64 ; SSE42-NEXT:    retq
  65 ;
  66 ; AVX-LABEL: combine_insertqi_pshufb_8i16:
  67 ; AVX:       # %bb.0:
  68 ; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
  69 ; AVX-NEXT:    retq
  70   %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
  71   %2 = bitcast <8 x i16> %1 to <16 x i8>
  72   %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 0, i8 1, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
  73   %4 = bitcast <16 x i8> %3 to <8 x i16>
  74   ret <8 x i16> %4
  75 }
  76
  77 define <16 x i8> @combine_pshufb_insertqi_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
  78 ; CHECK-LABEL: combine_pshufb_insertqi_pshufb:
  79 ; CHECK:       # %bb.0:
  80 ; CHECK-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0,1],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
  81 ; CHECK-NEXT:    retq
  82   %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
  83   %2 = shufflevector <16 x i8> %1, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 17, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  84   %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 7, i8 1, i8 2, i8 4, i8 3, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
  85   ret <16 x i8> %3
  86 }