llvm/test/CodeGen/X86/sse3-intrinsics-x86.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse3 -show-mc-encoding | FileCheck %s --check-prefixes=X86,SSE,X86-SSE
   3 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=X86,AVX,X86-AVX
   4 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=X86,AVX,X86-AVX
   5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse3 -show-mc-encoding | FileCheck %s --check-prefixes=X64,SSE,X64-SSE
   6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=X64,AVX,X64-AVX
   7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=X64,AVX,X64-AVX
   8
   9 define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
  10 ; SSE-LABEL: test_x86_sse3_addsub_pd:
  11 ; SSE:       ## %bb.0:
  12 ; SSE-NEXT:    addsubpd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xd0,0xc1]
  13 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
  14 ;
  15 ; AVX-LABEL: test_x86_sse3_addsub_pd:
  16 ; AVX:       ## %bb.0:
  17 ; AVX-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd0,0xc1]
  18 ; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
  19   %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
  20   ret <2 x double> %res
  21 }
  22 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
  23
  24
  25 define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
  26 ; SSE-LABEL: test_x86_sse3_addsub_ps:
  27 ; SSE:       ## %bb.0:
  28 ; SSE-NEXT:    addsubps %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0xd0,0xc1]
  29 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
  30 ;
  31 ; AVX-LABEL: test_x86_sse3_addsub_ps:
  32 ; AVX:       ## %bb.0:
  33 ; AVX-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0xd0,0xc1]
  34 ; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
  35   %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
  36   ret <4 x float> %res
  37 }
  38 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
  39
  40
  41 define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
  42 ; SSE-LABEL: test_x86_sse3_hadd_pd:
  43 ; SSE:       ## %bb.0:
  44 ; SSE-NEXT:    haddpd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x7c,0xc1]
  45 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
  46 ;
  47 ; AVX-LABEL: test_x86_sse3_hadd_pd:
  48 ; AVX:       ## %bb.0:
  49 ; AVX-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x7c,0xc1]
  50 ; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
  51   %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
  52   ret <2 x double> %res
  53 }
  54 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
  55
  56
  57 define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
  58 ; SSE-LABEL: test_x86_sse3_hadd_ps:
  59 ; SSE:       ## %bb.0:
  60 ; SSE-NEXT:    haddps %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x7c,0xc1]
  61 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
  62 ;
  63 ; AVX-LABEL: test_x86_sse3_hadd_ps:
  64 ; AVX:       ## %bb.0:
  65 ; AVX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x7c,0xc1]
  66 ; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
  67   %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
  68   ret <4 x float> %res
  69 }
  70 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
  71
  72
  73 define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
  74 ; SSE-LABEL: test_x86_sse3_hsub_pd:
  75 ; SSE:       ## %bb.0:
  76 ; SSE-NEXT:    hsubpd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x7d,0xc1]
  77 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
  78 ;
  79 ; AVX-LABEL: test_x86_sse3_hsub_pd:
  80 ; AVX:       ## %bb.0:
  81 ; AVX-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x7d,0xc1]
  82 ; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
  83   %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
  84   ret <2 x double> %res
  85 }
  86 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
  87
  88
  89 define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
  90 ; SSE-LABEL: test_x86_sse3_hsub_ps:
  91 ; SSE:       ## %bb.0:
  92 ; SSE-NEXT:    hsubps %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x7d,0xc1]
  93 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
  94 ;
  95 ; AVX-LABEL: test_x86_sse3_hsub_ps:
  96 ; AVX:       ## %bb.0:
  97 ; AVX-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x7d,0xc1]
  98 ; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
  99   %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
 100   ret <4 x float> %res
 101 }
 102 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
 103
 104
 105 define <16 x i8> @test_x86_sse3_ldu_dq(ptr %a0) {
 106 ; X86-SSE-LABEL: test_x86_sse3_ldu_dq:
 107 ; X86-SSE:       ## %bb.0:
 108 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
 109 ; X86-SSE-NEXT:    lddqu (%eax), %xmm0 ## encoding: [0xf2,0x0f,0xf0,0x00]
 110 ; X86-SSE-NEXT:    retl ## encoding: [0xc3]
 111 ;
 112 ; X86-AVX-LABEL: test_x86_sse3_ldu_dq:
 113 ; X86-AVX:       ## %bb.0:
 114 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
 115 ; X86-AVX-NEXT:    vlddqu (%eax), %xmm0 ## encoding: [0xc5,0xfb,0xf0,0x00]
 116 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
 117 ;
 118 ; X64-SSE-LABEL: test_x86_sse3_ldu_dq:
 119 ; X64-SSE:       ## %bb.0:
 120 ; X64-SSE-NEXT:    lddqu (%rdi), %xmm0 ## encoding: [0xf2,0x0f,0xf0,0x07]
 121 ; X64-SSE-NEXT:    retq ## encoding: [0xc3]
 122 ;
 123 ; X64-AVX-LABEL: test_x86_sse3_ldu_dq:
 124 ; X64-AVX:       ## %bb.0:
 125 ; X64-AVX-NEXT:    vlddqu (%rdi), %xmm0 ## encoding: [0xc5,0xfb,0xf0,0x07]
 126 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
 127   %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(ptr %a0) ; <<16 x i8>> [#uses=1]
 128   ret <16 x i8> %res
 129 }
 130 declare <16 x i8> @llvm.x86.sse3.ldu.dq(ptr) nounwind readonly
 131
 132 ; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work
 133
 134 define void @monitor(ptr %P, i32 %E, i32 %H) nounwind {
 135 ; X86-LABEL: monitor:
 136 ; X86:       ## %bb.0:
 137 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
 138 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
 139 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x0c]
 140 ; X86-NEXT:    monitor ## encoding: [0x0f,0x01,0xc8]
 141 ; X86-NEXT:    retl ## encoding: [0xc3]
 142 ;
 143 ; X64-LABEL: monitor:
 144 ; X64:       ## %bb.0:
 145 ; X64-NEXT:    movl %esi, %ecx ## encoding: [0x89,0xf1]
 146 ; X64-NEXT:    movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
 147 ; X64-NEXT:    monitor ## encoding: [0x0f,0x01,0xc8]
 148 ; X64-NEXT:    retq ## encoding: [0xc3]
 149   tail call void @llvm.x86.sse3.monitor(ptr %P, i32 %E, i32 %H)
 150   ret void
 151 }
 152 declare void @llvm.x86.sse3.monitor(ptr, i32, i32) nounwind
 153
 154 define void @mwait(i32 %E, i32 %H) nounwind {
 155 ; X86-LABEL: mwait:
 156 ; X86:       ## %bb.0:
 157 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
 158 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
 159 ; X86-NEXT:    mwait ## encoding: [0x0f,0x01,0xc9]
 160 ; X86-NEXT:    retl ## encoding: [0xc3]
 161 ;
 162 ; X64-LABEL: mwait:
 163 ; X64:       ## %bb.0:
 164 ; X64-NEXT:    movl %esi, %eax ## encoding: [0x89,0xf0]
 165 ; X64-NEXT:    movl %edi, %ecx ## encoding: [0x89,0xf9]
 166 ; X64-NEXT:    mwait ## encoding: [0x0f,0x01,0xc9]
 167 ; X64-NEXT:    retq ## encoding: [0xc3]
 168   tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
 169   ret void
 170 }
 171 declare void @llvm.x86.sse3.mwait(i32, i32) nounwind