test/CodeGen/X86/sse3-intrinsics-fast-isel.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE
   3 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1
   4 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512
   5 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE
   6 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
   7 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512
   8
   9 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse3-builtins.c
  10
  11 define <2 x double> @test_mm_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
  12 ; SSE-LABEL: test_mm_addsub_pd:
  13 ; SSE:       # %bb.0:
  14 ; SSE-NEXT:    addsubpd %xmm1, %xmm0
  15 ; SSE-NEXT:    ret{{[l|q]}}
  16 ;
  17 ; AVX-LABEL: test_mm_addsub_pd:
  18 ; AVX:       # %bb.0:
  19 ; AVX-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0
  20 ; AVX-NEXT:    ret{{[l|q]}}
  21   %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
  22   ret <2 x double> %res
  23 }
  24 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
  25
  26 define <4 x float> @test_mm_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
  27 ; SSE-LABEL: test_mm_addsub_ps:
  28 ; SSE:       # %bb.0:
  29 ; SSE-NEXT:    addsubps %xmm1, %xmm0
  30 ; SSE-NEXT:    ret{{[l|q]}}
  31 ;
  32 ; AVX-LABEL: test_mm_addsub_ps:
  33 ; AVX:       # %bb.0:
  34 ; AVX-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0
  35 ; AVX-NEXT:    ret{{[l|q]}}
  36   %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
  37   ret <4 x float> %res
  38 }
  39 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
  40
  41 define <2 x double> @test_mm_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
  42 ; SSE-LABEL: test_mm_hadd_pd:
  43 ; SSE:       # %bb.0:
  44 ; SSE-NEXT:    haddpd %xmm1, %xmm0
  45 ; SSE-NEXT:    ret{{[l|q]}}
  46 ;
  47 ; AVX-LABEL: test_mm_hadd_pd:
  48 ; AVX:       # %bb.0:
  49 ; AVX-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0
  50 ; AVX-NEXT:    ret{{[l|q]}}
  51   %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
  52   ret <2 x double> %res
  53 }
  54 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
  55
  56 define <4 x float> @test_mm_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
  57 ; SSE-LABEL: test_mm_hadd_ps:
  58 ; SSE:       # %bb.0:
  59 ; SSE-NEXT:    haddps %xmm1, %xmm0
  60 ; SSE-NEXT:    ret{{[l|q]}}
  61 ;
  62 ; AVX-LABEL: test_mm_hadd_ps:
  63 ; AVX:       # %bb.0:
  64 ; AVX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
  65 ; AVX-NEXT:    ret{{[l|q]}}
  66   %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
  67   ret <4 x float> %res
  68 }
  69 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
  70
  71 define <2 x double> @test_mm_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
  72 ; SSE-LABEL: test_mm_hsub_pd:
  73 ; SSE:       # %bb.0:
  74 ; SSE-NEXT:    hsubpd %xmm1, %xmm0
  75 ; SSE-NEXT:    ret{{[l|q]}}
  76 ;
  77 ; AVX-LABEL: test_mm_hsub_pd:
  78 ; AVX:       # %bb.0:
  79 ; AVX-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0
  80 ; AVX-NEXT:    ret{{[l|q]}}
  81   %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
  82   ret <2 x double> %res
  83 }
  84 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
  85
  86 define <4 x float> @test_mm_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
  87 ; SSE-LABEL: test_mm_hsub_ps:
  88 ; SSE:       # %bb.0:
  89 ; SSE-NEXT:    hsubps %xmm1, %xmm0
  90 ; SSE-NEXT:    ret{{[l|q]}}
  91 ;
  92 ; AVX-LABEL: test_mm_hsub_ps:
  93 ; AVX:       # %bb.0:
  94 ; AVX-NEXT:    vhsubps %xmm1, %xmm0, %xmm0
  95 ; AVX-NEXT:    ret{{[l|q]}}
  96   %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
  97   ret <4 x float> %res
  98 }
  99 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
 100
 101 define <2 x i64> @test_mm_lddqu_si128(<2 x i64>* %a0) {
 102 ; X86-SSE-LABEL: test_mm_lddqu_si128:
 103 ; X86-SSE:       # %bb.0:
 104 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 105 ; X86-SSE-NEXT:    lddqu (%eax), %xmm0
 106 ; X86-SSE-NEXT:    retl
 107 ;
 108 ; X86-AVX-LABEL: test_mm_lddqu_si128:
 109 ; X86-AVX:       # %bb.0:
 110 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 111 ; X86-AVX-NEXT:    vlddqu (%eax), %xmm0
 112 ; X86-AVX-NEXT:    retl
 113 ;
 114 ; X64-SSE-LABEL: test_mm_lddqu_si128:
 115 ; X64-SSE:       # %bb.0:
 116 ; X64-SSE-NEXT:    lddqu (%rdi), %xmm0
 117 ; X64-SSE-NEXT:    retq
 118 ;
 119 ; X64-AVX-LABEL: test_mm_lddqu_si128:
 120 ; X64-AVX:       # %bb.0:
 121 ; X64-AVX-NEXT:    vlddqu (%rdi), %xmm0
 122 ; X64-AVX-NEXT:    retq
 123   %bc = bitcast <2 x i64>* %a0 to i8*
 124   %call = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %bc)
 125   %res = bitcast <16 x i8> %call to <2 x i64>
 126   ret <2 x i64> %res
 127 }
 128 declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
 129
 130 define <2 x double> @test_mm_loaddup_pd(double* %a0) {
 131 ; X86-SSE-LABEL: test_mm_loaddup_pd:
 132 ; X86-SSE:       # %bb.0:
 133 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 134 ; X86-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
 135 ; X86-SSE-NEXT:    retl
 136 ;
 137 ; X86-AVX-LABEL: test_mm_loaddup_pd:
 138 ; X86-AVX:       # %bb.0:
 139 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 140 ; X86-AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
 141 ; X86-AVX-NEXT:    retl
 142 ;
 143 ; X64-SSE-LABEL: test_mm_loaddup_pd:
 144 ; X64-SSE:       # %bb.0:
 145 ; X64-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
 146 ; X64-SSE-NEXT:    retq
 147 ;
 148 ; X64-AVX-LABEL: test_mm_loaddup_pd:
 149 ; X64-AVX:       # %bb.0:
 150 ; X64-AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
 151 ; X64-AVX-NEXT:    retq
 152   %ld = load double, double* %a0
 153   %res0 = insertelement <2 x double> undef, double %ld, i32 0
 154   %res1 = insertelement <2 x double> %res0, double %ld, i32 1
 155   ret <2 x double> %res1
 156 }
 157
 158 define <2 x double> @test_mm_movedup_pd(<2 x double> %a0) {
 159 ; SSE-LABEL: test_mm_movedup_pd:
 160 ; SSE:       # %bb.0:
 161 ; SSE-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
 162 ; SSE-NEXT:    ret{{[l|q]}}
 163 ;
 164 ; AVX-LABEL: test_mm_movedup_pd:
 165 ; AVX:       # %bb.0:
 166 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
 167 ; AVX-NEXT:    ret{{[l|q]}}
 168   %res = shufflevector <2 x double> %a0, <2 x double> %a0, <2 x i32> zeroinitializer
 169   ret <2 x double> %res
 170 }
 171
 172 define <4 x float> @test_mm_movehdup_ps(<4 x float> %a0) {
 173 ; SSE-LABEL: test_mm_movehdup_ps:
 174 ; SSE:       # %bb.0:
 175 ; SSE-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
 176 ; SSE-NEXT:    ret{{[l|q]}}
 177 ;
 178 ; AVX-LABEL: test_mm_movehdup_ps:
 179 ; AVX:       # %bb.0:
 180 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
 181 ; AVX-NEXT:    ret{{[l|q]}}
 182   %res = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
 183   ret <4 x float> %res
 184 }
 185
 186 define <4 x float> @test_mm_moveldup_ps(<4 x float> %a0) {
 187 ; SSE-LABEL: test_mm_moveldup_ps:
 188 ; SSE:       # %bb.0:
 189 ; SSE-NEXT:    movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
 190 ; SSE-NEXT:    ret{{[l|q]}}
 191 ;
 192 ; AVX-LABEL: test_mm_moveldup_ps:
 193 ; AVX:       # %bb.0:
 194 ; AVX-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
 195 ; AVX-NEXT:    ret{{[l|q]}}
 196   %res = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
 197   ret <4 x float> %res
 198 }