llvm/test/CodeGen/X86/vec_insert-5.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=i386-unknown -mattr=+mmx,+sse2,+ssse3 | FileCheck %s --check-prefix=X86
   3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+mmx,+sse2,+ssse3 | FileCheck %s --check-prefixes=X64,ALIGN
   4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+mmx,+sse2,+ssse3,sse-unaligned-mem | FileCheck %s --check-prefixes=X64,UNALIGN
   5
   6 ; There are no MMX operations in @t1
   7
   8 define void  @t1(i32 %a, ptr %P) nounwind {
   9 ; X86-LABEL: t1:
  10 ; X86:       # %bb.0:
  11 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
  12 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
  13 ; X86-NEXT:    shll $12, %ecx
  14 ; X86-NEXT:    movd %ecx, %xmm0
  15 ; X86-NEXT:    psllq $32, %xmm0
  16 ; X86-NEXT:    movq %xmm0, (%eax)
  17 ; X86-NEXT:    retl
  18 ;
  19 ; X64-LABEL: t1:
  20 ; X64:       # %bb.0:
  21 ; X64-NEXT:    shll $12, %edi
  22 ; X64-NEXT:    movd %edi, %xmm0
  23 ; X64-NEXT:    psllq $32, %xmm0
  24 ; X64-NEXT:    movq %xmm0, (%rsi)
  25 ; X64-NEXT:    retq
  26  %tmp12 = shl i32 %a, 12
  27  %tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1
  28  %tmp22 = insertelement <2 x i32> %tmp21, i32 0, i32 0
  29  %tmp23 = bitcast <2 x i32> %tmp22 to x86_mmx
  30  store x86_mmx %tmp23, ptr %P
  31  ret void
  32 }
  33
  34 define <4 x float> @t2(ptr %P) nounwind {
  35 ; X86-LABEL: t2:
  36 ; X86:       # %bb.0:
  37 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
  38 ; X86-NEXT:    xorps %xmm0, %xmm0
  39 ; X86-NEXT:    xorps %xmm1, %xmm1
  40 ; X86-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0]
  41 ; X86-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
  42 ; X86-NEXT:    retl
  43 ;
  44 ; X64-LABEL: t2:
  45 ; X64:       # %bb.0:
  46 ; X64-NEXT:    xorps %xmm0, %xmm0
  47 ; X64-NEXT:    xorps %xmm1, %xmm1
  48 ; X64-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0]
  49 ; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
  50 ; X64-NEXT:    retq
  51   %tmp1 = load <4 x float>, ptr %P
  52   %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 4, i32 4, i32 0 >
  53   ret <4 x float> %tmp2
  54 }
  55
  56 define <4 x float> @t3(ptr %P) nounwind {
  57 ; X86-LABEL: t3:
  58 ; X86:       # %bb.0:
  59 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
  60 ; X86-NEXT:    xorps %xmm0, %xmm0
  61 ; X86-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
  62 ; X86-NEXT:    retl
  63 ;
  64 ; X64-LABEL: t3:
  65 ; X64:       # %bb.0:
  66 ; X64-NEXT:    xorps %xmm0, %xmm0
  67 ; X64-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
  68 ; X64-NEXT:    retq
  69   %tmp1 = load <4 x float>, ptr %P
  70   %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 4, i32 4 >
  71   ret <4 x float> %tmp2
  72 }
  73
  74 define <4 x float> @t4(ptr %P) nounwind {
  75 ; X86-LABEL: t4:
  76 ; X86:       # %bb.0:
  77 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
  78 ; X86-NEXT:    xorps %xmm1, %xmm1
  79 ; X86-NEXT:    xorps %xmm0, %xmm0
  80 ; X86-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0],mem[3,0]
  81 ; X86-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
  82 ; X86-NEXT:    retl
  83 ;
  84 ; X64-LABEL: t4:
  85 ; X64:       # %bb.0:
  86 ; X64-NEXT:    xorps %xmm1, %xmm1
  87 ; X64-NEXT:    xorps %xmm0, %xmm0
  88 ; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0],mem[3,0]
  89 ; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
  90 ; X64-NEXT:    retq
  91   %tmp1 = load <4 x float>, ptr %P
  92   %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 >
  93   ret <4 x float> %tmp2
  94 }
  95
  96 define <4 x float> @t4_under_aligned(ptr %P) nounwind {
  97 ; X86-LABEL: t4_under_aligned:
  98 ; X86:       # %bb.0:
  99 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 100 ; X86-NEXT:    movups (%eax), %xmm0
 101 ; X86-NEXT:    xorps %xmm1, %xmm1
 102 ; X86-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0]
 103 ; X86-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
 104 ; X86-NEXT:    retl
 105 ;
 106 ; ALIGN-LABEL: t4_under_aligned:
 107 ; ALIGN:       # %bb.0:
 108 ; ALIGN-NEXT:    movups (%rdi), %xmm0
 109 ; ALIGN-NEXT:    xorps %xmm1, %xmm1
 110 ; ALIGN-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0]
 111 ; ALIGN-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
 112 ; ALIGN-NEXT:    retq
 113 ;
 114 ; UNALIGN-LABEL: t4_under_aligned:
 115 ; UNALIGN:       # %bb.0:
 116 ; UNALIGN-NEXT:    xorps %xmm1, %xmm1
 117 ; UNALIGN-NEXT:    xorps %xmm0, %xmm0
 118 ; UNALIGN-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0],mem[3,0]
 119 ; UNALIGN-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
 120 ; UNALIGN-NEXT:    retq
 121   %tmp1 = load <4 x float>, ptr %P, align 4
 122   %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 >
 123   ret <4 x float> %tmp2
 124 }
 125
 126 define <16 x i8> @t5(<16 x i8> %x) nounwind {
 127 ; X86-LABEL: t5:
 128 ; X86:       # %bb.0:
 129 ; X86-NEXT:    psrlw $8, %xmm0
 130 ; X86-NEXT:    retl
 131 ;
 132 ; X64-LABEL: t5:
 133 ; X64:       # %bb.0:
 134 ; X64-NEXT:    psrlw $8, %xmm0
 135 ; X64-NEXT:    retq
 136   %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>
 137   ret <16 x i8> %s
 138 }
 139
 140 define <16 x i8> @t6(<16 x i8> %x) nounwind {
 141 ; X86-LABEL: t6:
 142 ; X86:       # %bb.0:
 143 ; X86-NEXT:    psrlw $8, %xmm0
 144 ; X86-NEXT:    retl
 145 ;
 146 ; X64-LABEL: t6:
 147 ; X64:       # %bb.0:
 148 ; X64-NEXT:    psrlw $8, %xmm0
 149 ; X64-NEXT:    retq
 150   %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 151   ret <16 x i8> %s
 152 }
 153
 154 define <16 x i8> @t7(<16 x i8> %x) nounwind {
 155 ; X86-LABEL: t7:
 156 ; X86:       # %bb.0:
 157 ; X86-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
 158 ; X86-NEXT:    retl
 159 ;
 160 ; X64-LABEL: t7:
 161 ; X64:       # %bb.0:
 162 ; X64-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
 163 ; X64-NEXT:    retq
 164   %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2>
 165   ret <16 x i8> %s
 166 }
 167
 168 define <16 x i8> @t8(<16 x i8> %x) nounwind {
 169 ; X86-LABEL: t8:
 170 ; X86:       # %bb.0:
 171 ; X86-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
 172 ; X86-NEXT:    retl
 173 ;
 174 ; X64-LABEL: t8:
 175 ; X64:       # %bb.0:
 176 ; X64-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
 177 ; X64-NEXT:    retq
 178   %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>
 179   ret <16 x i8> %s
 180 }
 181
 182 define <16 x i8> @t9(<16 x i8> %x) nounwind {
 183 ; X86-LABEL: t9:
 184 ; X86:       # %bb.0:
 185 ; X86-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
 186 ; X86-NEXT:    retl
 187 ;
 188 ; X64-LABEL: t9:
 189 ; X64:       # %bb.0:
 190 ; X64-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
 191 ; X64-NEXT:    retq
 192   %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 14, i32 undef, i32 undef>
 193   ret <16 x i8> %s
 194 }