1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE
3 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1
4 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512
5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-sse2 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE
6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512
10 define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
11 ; SSE-LABEL: test_x86_sse_sqrt_ps:
13 ; SSE-NEXT: sqrtps %xmm0, %xmm0 ## encoding: [0x0f,0x51,0xc0]
14 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
16 ; AVX1-LABEL: test_x86_sse_sqrt_ps:
18 ; AVX1-NEXT: vsqrtps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x51,0xc0]
19 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
21 ; AVX512-LABEL: test_x86_sse_sqrt_ps:
23 ; AVX512-NEXT: vsqrtps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x51,0xc0]
24 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
25 %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
28 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
31 define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
32 ; SSE-LABEL: test_x86_sse_sqrt_ss:
34 ; SSE-NEXT: sqrtss %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x51,0xc0]
35 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
37 ; AVX1-LABEL: test_x86_sse_sqrt_ss:
39 ; AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x51,0xc0]
40 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
42 ; AVX512-LABEL: test_x86_sse_sqrt_ss:
44 ; AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0]
45 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
46 %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
49 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
52 define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
53 ; X86-SSE-LABEL: test_x86_sse_storeu_ps:
55 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
56 ; X86-SSE-NEXT: movups %xmm0, (%eax) ## encoding: [0x0f,0x11,0x00]
57 ; X86-SSE-NEXT: retl ## encoding: [0xc3]
59 ; X86-AVX1-LABEL: test_x86_sse_storeu_ps:
61 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
62 ; X86-AVX1-NEXT: vmovups %xmm0, (%eax) ## encoding: [0xc5,0xf8,0x11,0x00]
63 ; X86-AVX1-NEXT: retl ## encoding: [0xc3]
65 ; X86-AVX512-LABEL: test_x86_sse_storeu_ps:
66 ; X86-AVX512: ## %bb.0:
67 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
68 ; X86-AVX512-NEXT: vmovups %xmm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
69 ; X86-AVX512-NEXT: retl ## encoding: [0xc3]
71 ; X64-SSE-LABEL: test_x86_sse_storeu_ps:
73 ; X64-SSE-NEXT: movups %xmm0, (%rdi) ## encoding: [0x0f,0x11,0x07]
74 ; X64-SSE-NEXT: retq ## encoding: [0xc3]
76 ; X64-AVX1-LABEL: test_x86_sse_storeu_ps:
78 ; X64-AVX1-NEXT: vmovups %xmm0, (%rdi) ## encoding: [0xc5,0xf8,0x11,0x07]
79 ; X64-AVX1-NEXT: retq ## encoding: [0xc3]
81 ; X64-AVX512-LABEL: test_x86_sse_storeu_ps:
82 ; X64-AVX512: ## %bb.0:
83 ; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
84 ; X64-AVX512-NEXT: retq ## encoding: [0xc3]
85 call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
88 declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
91 define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
92 ; SSE-LABEL: test_x86_sse_add_ss:
94 ; SSE-NEXT: addss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x58,0xc1]
95 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
97 ; AVX1-LABEL: test_x86_sse_add_ss:
99 ; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x58,0xc1]
100 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
102 ; AVX512-LABEL: test_x86_sse_add_ss:
104 ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x58,0xc1]
105 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
106 %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
109 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
112 define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
113 ; SSE-LABEL: test_x86_sse_sub_ss:
115 ; SSE-NEXT: subss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5c,0xc1]
116 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
118 ; AVX1-LABEL: test_x86_sse_sub_ss:
120 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5c,0xc1]
121 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
123 ; AVX512-LABEL: test_x86_sse_sub_ss:
125 ; AVX512-NEXT: vsubss %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5c,0xc1]
126 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
127 %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
130 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
133 define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
134 ; SSE-LABEL: test_x86_sse_mul_ss:
136 ; SSE-NEXT: mulss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x59,0xc1]
137 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
139 ; AVX1-LABEL: test_x86_sse_mul_ss:
141 ; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x59,0xc1]
142 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
144 ; AVX512-LABEL: test_x86_sse_mul_ss:
146 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x59,0xc1]
147 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
148 %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
151 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
154 define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
155 ; SSE-LABEL: test_x86_sse_div_ss:
157 ; SSE-NEXT: divss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5e,0xc1]
158 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
160 ; AVX1-LABEL: test_x86_sse_div_ss:
162 ; AVX1-NEXT: vdivss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5e,0xc1]
163 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
165 ; AVX512-LABEL: test_x86_sse_div_ss:
167 ; AVX512-NEXT: vdivss %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5e,0xc1]
168 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
169 %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
172 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
175 define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0, i32 %a1) {
176 ; X86-SSE-LABEL: test_x86_sse_cvtsi2ss:
178 ; X86-SSE-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xf3,0x0f,0x2a,0x44,0x24,0x04]
179 ; X86-SSE-NEXT: retl ## encoding: [0xc3]
181 ; X86-AVX1-LABEL: test_x86_sse_cvtsi2ss:
182 ; X86-AVX1: ## %bb.0:
183 ; X86-AVX1-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x2a,0x44,0x24,0x04]
184 ; X86-AVX1-NEXT: retl ## encoding: [0xc3]
186 ; X86-AVX512-LABEL: test_x86_sse_cvtsi2ss:
187 ; X86-AVX512: ## %bb.0:
188 ; X86-AVX512-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0x44,0x24,0x04]
189 ; X86-AVX512-NEXT: retl ## encoding: [0xc3]
191 ; X64-SSE-LABEL: test_x86_sse_cvtsi2ss:
193 ; X64-SSE-NEXT: cvtsi2ss %edi, %xmm0 ## encoding: [0xf3,0x0f,0x2a,0xc7]
194 ; X64-SSE-NEXT: retq ## encoding: [0xc3]
196 ; X64-AVX1-LABEL: test_x86_sse_cvtsi2ss:
197 ; X64-AVX1: ## %bb.0:
198 ; X64-AVX1-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x2a,0xc7]
199 ; X64-AVX1-NEXT: retq ## encoding: [0xc3]
201 ; X64-AVX512-LABEL: test_x86_sse_cvtsi2ss:
202 ; X64-AVX512: ## %bb.0:
203 ; X64-AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0xc7]
204 ; X64-AVX512-NEXT: retq ## encoding: [0xc3]
205 %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 %a1) ; <<4 x float>> [#uses=1]
208 declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone