test/CodeGen/X86/sse-intrinsics-x86-upgrade.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE
   3 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1
   4 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512
   5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-sse2 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE
   6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
   7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512
   8
   9
  10 define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
  11 ; SSE-LABEL: test_x86_sse_sqrt_ps:
  12 ; SSE:       ## %bb.0:
  13 ; SSE-NEXT:    sqrtps %xmm0, %xmm0 ## encoding: [0x0f,0x51,0xc0]
  14 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
  15 ;
  16 ; AVX1-LABEL: test_x86_sse_sqrt_ps:
  17 ; AVX1:       ## %bb.0:
  18 ; AVX1-NEXT:    vsqrtps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x51,0xc0]
  19 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
  20 ;
  21 ; AVX512-LABEL: test_x86_sse_sqrt_ps:
  22 ; AVX512:       ## %bb.0:
  23 ; AVX512-NEXT:    vsqrtps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x51,0xc0]
  24 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
  25   %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
  26   ret <4 x float> %res
  27 }
  28 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
  29
  30
  31 define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
  32 ; SSE-LABEL: test_x86_sse_sqrt_ss:
  33 ; SSE:       ## %bb.0:
  34 ; SSE-NEXT:    sqrtss %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x51,0xc0]
  35 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
  36 ;
  37 ; AVX1-LABEL: test_x86_sse_sqrt_ss:
  38 ; AVX1:       ## %bb.0:
  39 ; AVX1-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x51,0xc0]
  40 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
  41 ;
  42 ; AVX512-LABEL: test_x86_sse_sqrt_ss:
  43 ; AVX512:       ## %bb.0:
  44 ; AVX512-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0]
  45 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
  46   %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
  47   ret <4 x float> %res
  48 }
  49 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
  50
  51
  52 define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
  53 ; X86-SSE-LABEL: test_x86_sse_storeu_ps:
  54 ; X86-SSE:       ## %bb.0:
  55 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
  56 ; X86-SSE-NEXT:    movups %xmm0, (%eax) ## encoding: [0x0f,0x11,0x00]
  57 ; X86-SSE-NEXT:    retl ## encoding: [0xc3]
  58 ;
  59 ; X86-AVX1-LABEL: test_x86_sse_storeu_ps:
  60 ; X86-AVX1:       ## %bb.0:
  61 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
  62 ; X86-AVX1-NEXT:    vmovups %xmm0, (%eax) ## encoding: [0xc5,0xf8,0x11,0x00]
  63 ; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
  64 ;
  65 ; X86-AVX512-LABEL: test_x86_sse_storeu_ps:
  66 ; X86-AVX512:       ## %bb.0:
  67 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
  68 ; X86-AVX512-NEXT:    vmovups %xmm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
  69 ; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
  70 ;
  71 ; X64-SSE-LABEL: test_x86_sse_storeu_ps:
  72 ; X64-SSE:       ## %bb.0:
  73 ; X64-SSE-NEXT:    movups %xmm0, (%rdi) ## encoding: [0x0f,0x11,0x07]
  74 ; X64-SSE-NEXT:    retq ## encoding: [0xc3]
  75 ;
  76 ; X64-AVX1-LABEL: test_x86_sse_storeu_ps:
  77 ; X64-AVX1:       ## %bb.0:
  78 ; X64-AVX1-NEXT:    vmovups %xmm0, (%rdi) ## encoding: [0xc5,0xf8,0x11,0x07]
  79 ; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
  80 ;
  81 ; X64-AVX512-LABEL: test_x86_sse_storeu_ps:
  82 ; X64-AVX512:       ## %bb.0:
  83 ; X64-AVX512-NEXT:    vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
  84 ; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
  85   call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
  86   ret void
  87 }
  88 declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
  89
  90
  91 define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
  92 ; SSE-LABEL: test_x86_sse_add_ss:
  93 ; SSE:       ## %bb.0:
  94 ; SSE-NEXT:    addss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x58,0xc1]
  95 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
  96 ;
  97 ; AVX1-LABEL: test_x86_sse_add_ss:
  98 ; AVX1:       ## %bb.0:
  99 ; AVX1-NEXT:    vaddss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x58,0xc1]
 100 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
 101 ;
 102 ; AVX512-LABEL: test_x86_sse_add_ss:
 103 ; AVX512:       ## %bb.0:
 104 ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x58,0xc1]
 105 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
 106   %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
 107   ret <4 x float> %res
 108 }
 109 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
 110
 111
 112 define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
 113 ; SSE-LABEL: test_x86_sse_sub_ss:
 114 ; SSE:       ## %bb.0:
 115 ; SSE-NEXT:    subss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5c,0xc1]
 116 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
 117 ;
 118 ; AVX1-LABEL: test_x86_sse_sub_ss:
 119 ; AVX1:       ## %bb.0:
 120 ; AVX1-NEXT:    vsubss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5c,0xc1]
 121 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
 122 ;
 123 ; AVX512-LABEL: test_x86_sse_sub_ss:
 124 ; AVX512:       ## %bb.0:
 125 ; AVX512-NEXT:    vsubss %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5c,0xc1]
 126 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
 127   %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
 128   ret <4 x float> %res
 129 }
 130 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
 131
 132
 133 define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
 134 ; SSE-LABEL: test_x86_sse_mul_ss:
 135 ; SSE:       ## %bb.0:
 136 ; SSE-NEXT:    mulss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x59,0xc1]
 137 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
 138 ;
 139 ; AVX1-LABEL: test_x86_sse_mul_ss:
 140 ; AVX1:       ## %bb.0:
 141 ; AVX1-NEXT:    vmulss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x59,0xc1]
 142 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
 143 ;
 144 ; AVX512-LABEL: test_x86_sse_mul_ss:
 145 ; AVX512:       ## %bb.0:
 146 ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x59,0xc1]
 147 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
 148   %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
 149   ret <4 x float> %res
 150 }
 151 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
 152
 153
 154 define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
 155 ; SSE-LABEL: test_x86_sse_div_ss:
 156 ; SSE:       ## %bb.0:
 157 ; SSE-NEXT:    divss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5e,0xc1]
 158 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
 159 ;
 160 ; AVX1-LABEL: test_x86_sse_div_ss:
 161 ; AVX1:       ## %bb.0:
 162 ; AVX1-NEXT:    vdivss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5e,0xc1]
 163 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
 164 ;
 165 ; AVX512-LABEL: test_x86_sse_div_ss:
 166 ; AVX512:       ## %bb.0:
 167 ; AVX512-NEXT:    vdivss %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5e,0xc1]
 168 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
 169   %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
 170   ret <4 x float> %res
 171 }
 172 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
 173
 174
 175 define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0, i32 %a1) {
 176 ; X86-SSE-LABEL: test_x86_sse_cvtsi2ss:
 177 ; X86-SSE:       ## %bb.0:
 178 ; X86-SSE-NEXT:    cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xf3,0x0f,0x2a,0x44,0x24,0x04]
 179 ; X86-SSE-NEXT:    retl ## encoding: [0xc3]
 180 ;
 181 ; X86-AVX1-LABEL: test_x86_sse_cvtsi2ss:
 182 ; X86-AVX1:       ## %bb.0:
 183 ; X86-AVX1-NEXT:    vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x2a,0x44,0x24,0x04]
 184 ; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
 185 ;
 186 ; X86-AVX512-LABEL: test_x86_sse_cvtsi2ss:
 187 ; X86-AVX512:       ## %bb.0:
 188 ; X86-AVX512-NEXT:    vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0x44,0x24,0x04]
 189 ; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
 190 ;
 191 ; X64-SSE-LABEL: test_x86_sse_cvtsi2ss:
 192 ; X64-SSE:       ## %bb.0:
 193 ; X64-SSE-NEXT:    cvtsi2ss %edi, %xmm0 ## encoding: [0xf3,0x0f,0x2a,0xc7]
 194 ; X64-SSE-NEXT:    retq ## encoding: [0xc3]
 195 ;
 196 ; X64-AVX1-LABEL: test_x86_sse_cvtsi2ss:
 197 ; X64-AVX1:       ## %bb.0:
 198 ; X64-AVX1-NEXT:    vcvtsi2ss %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x2a,0xc7]
 199 ; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
 200 ;
 201 ; X64-AVX512-LABEL: test_x86_sse_cvtsi2ss:
 202 ; X64-AVX512:       ## %bb.0:
 203 ; X64-AVX512-NEXT:    vcvtsi2ss %edi, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0xc7]
 204 ; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
 205   %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 %a1) ; <<4 x float>> [#uses=1]
 206   ret <4 x float> %res
 207 }
 208 declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone