1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse2 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE
3 ; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1
4 ; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512
5 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse2 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE
6 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
7 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512
10 define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
11 ; SSE-LABEL: test_x86_sse2_sqrt_pd:
13 ; SSE-NEXT: sqrtpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x51,0xc0]
14 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
16 ; AVX1-LABEL: test_x86_sse2_sqrt_pd:
18 ; AVX1-NEXT: vsqrtpd %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x51,0xc0]
19 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
21 ; AVX512-LABEL: test_x86_sse2_sqrt_pd:
23 ; AVX512-NEXT: vsqrtpd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x51,0xc0]
24 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
25 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
28 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
31 define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
32 ; SSE-LABEL: test_x86_sse2_sqrt_sd:
34 ; SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0]
35 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
37 ; AVX1-LABEL: test_x86_sse2_sqrt_sd:
39 ; AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
40 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
42 ; AVX512-LABEL: test_x86_sse2_sqrt_sd:
44 ; AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
45 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
46 %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
49 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
52 define <2 x double> @test_x86_sse2_sqrt_sd_vec_load(<2 x double>* %a0) {
53 ; X86-SSE-LABEL: test_x86_sse2_sqrt_sd_vec_load:
55 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
56 ; X86-SSE-NEXT: movapd (%eax), %xmm0 ## encoding: [0x66,0x0f,0x28,0x00]
57 ; X86-SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0]
58 ; X86-SSE-NEXT: retl ## encoding: [0xc3]
60 ; X86-AVX1-LABEL: test_x86_sse2_sqrt_sd_vec_load:
62 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
63 ; X86-AVX1-NEXT: vmovapd (%eax), %xmm0 ## encoding: [0xc5,0xf9,0x28,0x00]
64 ; X86-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
65 ; X86-AVX1-NEXT: retl ## encoding: [0xc3]
67 ; X86-AVX512-LABEL: test_x86_sse2_sqrt_sd_vec_load:
68 ; X86-AVX512: ## %bb.0:
69 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
70 ; X86-AVX512-NEXT: vmovapd (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x00]
71 ; X86-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
72 ; X86-AVX512-NEXT: retl ## encoding: [0xc3]
74 ; X64-SSE-LABEL: test_x86_sse2_sqrt_sd_vec_load:
76 ; X64-SSE-NEXT: movapd (%rdi), %xmm0 ## encoding: [0x66,0x0f,0x28,0x07]
77 ; X64-SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0]
78 ; X64-SSE-NEXT: retq ## encoding: [0xc3]
80 ; X64-AVX1-LABEL: test_x86_sse2_sqrt_sd_vec_load:
82 ; X64-AVX1-NEXT: vmovapd (%rdi), %xmm0 ## encoding: [0xc5,0xf9,0x28,0x07]
83 ; X64-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
84 ; X64-AVX1-NEXT: retq ## encoding: [0xc3]
86 ; X64-AVX512-LABEL: test_x86_sse2_sqrt_sd_vec_load:
87 ; X64-AVX512: ## %bb.0:
88 ; X64-AVX512-NEXT: vmovapd (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x07]
89 ; X64-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
90 ; X64-AVX512-NEXT: retq ## encoding: [0xc3]
91 %a1 = load <2 x double>, <2 x double>* %a0, align 16
92 %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a1) ; <<2 x double>> [#uses=1]
97 define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
98 ; SSE-LABEL: test_x86_sse2_psll_dq_bs:
100 ; SSE-NEXT: pslldq $7, %xmm0 ## encoding: [0x66,0x0f,0x73,0xf8,0x07]
101 ; SSE-NEXT: ## xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
102 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
104 ; AVX1-LABEL: test_x86_sse2_psll_dq_bs:
106 ; AVX1-NEXT: vpslldq $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xf8,0x07]
107 ; AVX1-NEXT: ## xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
108 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
110 ; AVX512-LABEL: test_x86_sse2_psll_dq_bs:
112 ; AVX512-NEXT: vpslldq $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x07]
113 ; AVX512-NEXT: ## xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
114 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
115 %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
118 declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
121 define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
122 ; SSE-LABEL: test_x86_sse2_psrl_dq_bs:
124 ; SSE-NEXT: psrldq $7, %xmm0 ## encoding: [0x66,0x0f,0x73,0xd8,0x07]
125 ; SSE-NEXT: ## xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
126 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
128 ; AVX1-LABEL: test_x86_sse2_psrl_dq_bs:
130 ; AVX1-NEXT: vpsrldq $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xd8,0x07]
131 ; AVX1-NEXT: ## xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
132 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
134 ; AVX512-LABEL: test_x86_sse2_psrl_dq_bs:
136 ; AVX512-NEXT: vpsrldq $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x07]
137 ; AVX512-NEXT: ## xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
138 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
139 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
142 declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
144 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
145 ; SSE-LABEL: test_x86_sse2_psll_dq:
147 ; SSE-NEXT: pslldq $1, %xmm0 ## encoding: [0x66,0x0f,0x73,0xf8,0x01]
148 ; SSE-NEXT: ## xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
149 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
151 ; AVX1-LABEL: test_x86_sse2_psll_dq:
153 ; AVX1-NEXT: vpslldq $1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xf8,0x01]
154 ; AVX1-NEXT: ## xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
155 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
157 ; AVX512-LABEL: test_x86_sse2_psll_dq:
159 ; AVX512-NEXT: vpslldq $1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x01]
160 ; AVX512-NEXT: ## xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
161 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
162 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
165 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
168 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
169 ; SSE-LABEL: test_x86_sse2_psrl_dq:
171 ; SSE-NEXT: psrldq $1, %xmm0 ## encoding: [0x66,0x0f,0x73,0xd8,0x01]
172 ; SSE-NEXT: ## xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
173 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
175 ; AVX1-LABEL: test_x86_sse2_psrl_dq:
177 ; AVX1-NEXT: vpsrldq $1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xd8,0x01]
178 ; AVX1-NEXT: ## xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
179 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
181 ; AVX512-LABEL: test_x86_sse2_psrl_dq:
183 ; AVX512-NEXT: vpsrldq $1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x01]
184 ; AVX512-NEXT: ## xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
185 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
186 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
189 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
192 define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
193 ; SSE-LABEL: test_x86_sse2_cvtdq2pd:
195 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0xe6,0xc0]
196 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
198 ; AVX1-LABEL: test_x86_sse2_cvtdq2pd:
200 ; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0xe6,0xc0]
201 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
203 ; AVX512-LABEL: test_x86_sse2_cvtdq2pd:
205 ; AVX512-NEXT: vcvtdq2pd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xc0]
206 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
207 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
208 ret <2 x double> %res
210 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
213 define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
214 ; SSE-LABEL: test_x86_sse2_cvtps2pd:
216 ; SSE-NEXT: cvtps2pd %xmm0, %xmm0 ## encoding: [0x0f,0x5a,0xc0]
217 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
219 ; AVX1-LABEL: test_x86_sse2_cvtps2pd:
221 ; AVX1-NEXT: vcvtps2pd %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x5a,0xc0]
222 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
224 ; AVX512-LABEL: test_x86_sse2_cvtps2pd:
226 ; AVX512-NEXT: vcvtps2pd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0]
227 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
228 %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
229 ret <2 x double> %res
231 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
234 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
235 ; X86-SSE-LABEL: test_x86_sse2_storel_dq:
237 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
238 ; X86-SSE-NEXT: movlps %xmm0, (%eax) ## encoding: [0x0f,0x13,0x00]
239 ; X86-SSE-NEXT: retl ## encoding: [0xc3]
241 ; X86-AVX1-LABEL: test_x86_sse2_storel_dq:
242 ; X86-AVX1: ## %bb.0:
243 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
244 ; X86-AVX1-NEXT: vmovlps %xmm0, (%eax) ## encoding: [0xc5,0xf8,0x13,0x00]
245 ; X86-AVX1-NEXT: retl ## encoding: [0xc3]
247 ; X86-AVX512-LABEL: test_x86_sse2_storel_dq:
248 ; X86-AVX512: ## %bb.0:
249 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
250 ; X86-AVX512-NEXT: vmovlps %xmm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
251 ; X86-AVX512-NEXT: retl ## encoding: [0xc3]
253 ; X64-SSE-LABEL: test_x86_sse2_storel_dq:
255 ; X64-SSE-NEXT: movlps %xmm0, (%rdi) ## encoding: [0x0f,0x13,0x07]
256 ; X64-SSE-NEXT: retq ## encoding: [0xc3]
258 ; X64-AVX1-LABEL: test_x86_sse2_storel_dq:
259 ; X64-AVX1: ## %bb.0:
260 ; X64-AVX1-NEXT: vmovlps %xmm0, (%rdi) ## encoding: [0xc5,0xf8,0x13,0x07]
261 ; X64-AVX1-NEXT: retq ## encoding: [0xc3]
263 ; X64-AVX512-LABEL: test_x86_sse2_storel_dq:
264 ; X64-AVX512: ## %bb.0:
265 ; X64-AVX512-NEXT: vmovlps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07]
266 ; X64-AVX512-NEXT: retq ## encoding: [0xc3]
267 call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
270 declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
273 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
274 ; add operation forces the execution domain.
275 ; X86-SSE-LABEL: test_x86_sse2_storeu_dq:
277 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
278 ; X86-SSE-NEXT: pcmpeqd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x76,0xc9]
279 ; X86-SSE-NEXT: psubb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf8,0xc1]
280 ; X86-SSE-NEXT: movdqu %xmm0, (%eax) ## encoding: [0xf3,0x0f,0x7f,0x00]
281 ; X86-SSE-NEXT: retl ## encoding: [0xc3]
283 ; X86-AVX1-LABEL: test_x86_sse2_storeu_dq:
284 ; X86-AVX1: ## %bb.0:
285 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
286 ; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
287 ; X86-AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf8,0xc1]
288 ; X86-AVX1-NEXT: vmovdqu %xmm0, (%eax) ## encoding: [0xc5,0xfa,0x7f,0x00]
289 ; X86-AVX1-NEXT: retl ## encoding: [0xc3]
291 ; X86-AVX512-LABEL: test_x86_sse2_storeu_dq:
292 ; X86-AVX512: ## %bb.0:
293 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
294 ; X86-AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
295 ; X86-AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1]
296 ; X86-AVX512-NEXT: vmovdqu %xmm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00]
297 ; X86-AVX512-NEXT: retl ## encoding: [0xc3]
299 ; X64-SSE-LABEL: test_x86_sse2_storeu_dq:
301 ; X64-SSE-NEXT: pcmpeqd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x76,0xc9]
302 ; X64-SSE-NEXT: psubb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf8,0xc1]
303 ; X64-SSE-NEXT: movdqu %xmm0, (%rdi) ## encoding: [0xf3,0x0f,0x7f,0x07]
304 ; X64-SSE-NEXT: retq ## encoding: [0xc3]
306 ; X64-AVX1-LABEL: test_x86_sse2_storeu_dq:
307 ; X64-AVX1: ## %bb.0:
308 ; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
309 ; X64-AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf8,0xc1]
310 ; X64-AVX1-NEXT: vmovdqu %xmm0, (%rdi) ## encoding: [0xc5,0xfa,0x7f,0x07]
311 ; X64-AVX1-NEXT: retq ## encoding: [0xc3]
313 ; X64-AVX512-LABEL: test_x86_sse2_storeu_dq:
314 ; X64-AVX512: ## %bb.0:
315 ; X64-AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
316 ; X64-AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1]
317 ; X64-AVX512-NEXT: vmovdqu %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x07]
318 ; X64-AVX512-NEXT: retq ## encoding: [0xc3]
319 %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
320 call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
323 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
326 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
327 ; fadd operation forces the execution domain.
328 ; X86-SSE-LABEL: test_x86_sse2_storeu_pd:
330 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
331 ; X86-SSE-NEXT: xorpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x57,0xc9]
332 ; X86-SSE-NEXT: movhpd LCPI11_0, %xmm1 ## encoding: [0x66,0x0f,0x16,0x0d,A,A,A,A]
333 ; X86-SSE-NEXT: ## fixup A - offset: 4, value: LCPI11_0, kind: FK_Data_4
334 ; X86-SSE-NEXT: ## xmm1 = xmm1[0],mem[0]
335 ; X86-SSE-NEXT: addpd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x58,0xc8]
336 ; X86-SSE-NEXT: movupd %xmm1, (%eax) ## encoding: [0x66,0x0f,0x11,0x08]
337 ; X86-SSE-NEXT: retl ## encoding: [0xc3]
339 ; X86-AVX1-LABEL: test_x86_sse2_storeu_pd:
340 ; X86-AVX1: ## %bb.0:
341 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
342 ; X86-AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9]
343 ; X86-AVX1-NEXT: vmovhpd LCPI11_0, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
344 ; X86-AVX1-NEXT: ## fixup A - offset: 4, value: LCPI11_0, kind: FK_Data_4
345 ; X86-AVX1-NEXT: ## xmm1 = xmm1[0],mem[0]
346 ; X86-AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc1]
347 ; X86-AVX1-NEXT: vmovupd %xmm0, (%eax) ## encoding: [0xc5,0xf9,0x11,0x00]
348 ; X86-AVX1-NEXT: retl ## encoding: [0xc3]
350 ; X86-AVX512-LABEL: test_x86_sse2_storeu_pd:
351 ; X86-AVX512: ## %bb.0:
352 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
353 ; X86-AVX512-NEXT: vmovsd LCPI11_0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0d,A,A,A,A]
354 ; X86-AVX512-NEXT: ## fixup A - offset: 4, value: LCPI11_0, kind: FK_Data_4
355 ; X86-AVX512-NEXT: ## xmm1 = mem[0],zero
356 ; X86-AVX512-NEXT: vpslldq $8, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x73,0xf9,0x08]
357 ; X86-AVX512-NEXT: ## xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
358 ; X86-AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
359 ; X86-AVX512-NEXT: vmovupd %xmm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x00]
360 ; X86-AVX512-NEXT: retl ## encoding: [0xc3]
362 ; X64-SSE-LABEL: test_x86_sse2_storeu_pd:
364 ; X64-SSE-NEXT: xorpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x57,0xc9]
365 ; X64-SSE-NEXT: movhpd {{.*}}(%rip), %xmm1 ## encoding: [0x66,0x0f,0x16,0x0d,A,A,A,A]
366 ; X64-SSE-NEXT: ## fixup A - offset: 4, value: LCPI11_0-4, kind: reloc_riprel_4byte
367 ; X64-SSE-NEXT: ## xmm1 = xmm1[0],mem[0]
368 ; X64-SSE-NEXT: addpd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x58,0xc8]
369 ; X64-SSE-NEXT: movupd %xmm1, (%rdi) ## encoding: [0x66,0x0f,0x11,0x0f]
370 ; X64-SSE-NEXT: retq ## encoding: [0xc3]
372 ; X64-AVX1-LABEL: test_x86_sse2_storeu_pd:
373 ; X64-AVX1: ## %bb.0:
374 ; X64-AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9]
375 ; X64-AVX1-NEXT: vmovhpd {{.*}}(%rip), %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
376 ; X64-AVX1-NEXT: ## fixup A - offset: 4, value: LCPI11_0-4, kind: reloc_riprel_4byte
377 ; X64-AVX1-NEXT: ## xmm1 = xmm1[0],mem[0]
378 ; X64-AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc1]
379 ; X64-AVX1-NEXT: vmovupd %xmm0, (%rdi) ## encoding: [0xc5,0xf9,0x11,0x07]
380 ; X64-AVX1-NEXT: retq ## encoding: [0xc3]
382 ; X64-AVX512-LABEL: test_x86_sse2_storeu_pd:
383 ; X64-AVX512: ## %bb.0:
384 ; X64-AVX512-NEXT: vmovsd {{.*}}(%rip), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0d,A,A,A,A]
385 ; X64-AVX512-NEXT: ## fixup A - offset: 4, value: LCPI11_0-4, kind: reloc_riprel_4byte
386 ; X64-AVX512-NEXT: ## xmm1 = mem[0],zero
387 ; X64-AVX512-NEXT: vpslldq $8, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x73,0xf9,0x08]
388 ; X64-AVX512-NEXT: ## xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
389 ; X64-AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
390 ; X64-AVX512-NEXT: vmovupd %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x07]
391 ; X64-AVX512-NEXT: retq ## encoding: [0xc3]
392 %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
393 call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
396 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
398 define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) {
399 ; SSE-LABEL: test_x86_sse2_pshuf_d:
400 ; SSE: ## %bb.0: ## %entry
401 ; SSE-NEXT: pshufd $27, %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x70,0xc0,0x1b]
402 ; SSE-NEXT: ## xmm0 = xmm0[3,2,1,0]
403 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
405 ; AVX1-LABEL: test_x86_sse2_pshuf_d:
406 ; AVX1: ## %bb.0: ## %entry
407 ; AVX1-NEXT: vpermilps $27, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b]
408 ; AVX1-NEXT: ## xmm0 = xmm0[3,2,1,0]
409 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
411 ; AVX512-LABEL: test_x86_sse2_pshuf_d:
412 ; AVX512: ## %bb.0: ## %entry
413 ; AVX512-NEXT: vpermilps $27, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b]
414 ; AVX512-NEXT: ## xmm0 = xmm0[3,2,1,0]
415 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
417 %res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone
420 declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone
422 define <8 x i16> @test_x86_sse2_pshufl_w(<8 x i16> %a) {
423 ; SSE-LABEL: test_x86_sse2_pshufl_w:
424 ; SSE: ## %bb.0: ## %entry
425 ; SSE-NEXT: pshuflw $27, %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x70,0xc0,0x1b]
426 ; SSE-NEXT: ## xmm0 = xmm0[3,2,1,0,4,5,6,7]
427 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
429 ; AVX1-LABEL: test_x86_sse2_pshufl_w:
430 ; AVX1: ## %bb.0: ## %entry
431 ; AVX1-NEXT: vpshuflw $27, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x70,0xc0,0x1b]
432 ; AVX1-NEXT: ## xmm0 = xmm0[3,2,1,0,4,5,6,7]
433 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
435 ; AVX512-LABEL: test_x86_sse2_pshufl_w:
436 ; AVX512: ## %bb.0: ## %entry
437 ; AVX512-NEXT: vpshuflw $27, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xc0,0x1b]
438 ; AVX512-NEXT: ## xmm0 = xmm0[3,2,1,0,4,5,6,7]
439 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
441 %res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone
444 declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone
446 define <8 x i16> @test_x86_sse2_pshufh_w(<8 x i16> %a) {
447 ; SSE-LABEL: test_x86_sse2_pshufh_w:
448 ; SSE: ## %bb.0: ## %entry
449 ; SSE-NEXT: pshufhw $27, %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x70,0xc0,0x1b]
450 ; SSE-NEXT: ## xmm0 = xmm0[0,1,2,3,7,6,5,4]
451 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
453 ; AVX1-LABEL: test_x86_sse2_pshufh_w:
454 ; AVX1: ## %bb.0: ## %entry
455 ; AVX1-NEXT: vpshufhw $27, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x70,0xc0,0x1b]
456 ; AVX1-NEXT: ## xmm0 = xmm0[0,1,2,3,7,6,5,4]
457 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
459 ; AVX512-LABEL: test_x86_sse2_pshufh_w:
460 ; AVX512: ## %bb.0: ## %entry
461 ; AVX512-NEXT: vpshufhw $27, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xc0,0x1b]
462 ; AVX512-NEXT: ## xmm0 = xmm0[0,1,2,3,7,6,5,4]
463 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
465 %res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone
468 declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) nounwind readnone
470 define <16 x i8> @max_epu8(<16 x i8> %a0, <16 x i8> %a1) {
471 ; SSE-LABEL: max_epu8:
473 ; SSE-NEXT: pmaxub %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xde,0xc1]
474 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
476 ; AVX1-LABEL: max_epu8:
478 ; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xde,0xc1]
479 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
481 ; AVX512-LABEL: max_epu8:
483 ; AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xde,0xc1]
484 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
485 %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1)
488 declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
490 define <16 x i8> @min_epu8(<16 x i8> %a0, <16 x i8> %a1) {
491 ; SSE-LABEL: min_epu8:
493 ; SSE-NEXT: pminub %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xda,0xc1]
494 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
496 ; AVX1-LABEL: min_epu8:
498 ; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xda,0xc1]
499 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
501 ; AVX512-LABEL: min_epu8:
503 ; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xda,0xc1]
504 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
505 %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1)
508 declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
510 define <8 x i16> @max_epi16(<8 x i16> %a0, <8 x i16> %a1) {
511 ; SSE-LABEL: max_epi16:
513 ; SSE-NEXT: pmaxsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xee,0xc1]
514 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
516 ; AVX1-LABEL: max_epi16:
518 ; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xee,0xc1]
519 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
521 ; AVX512-LABEL: max_epi16:
523 ; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xc1]
524 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
525 %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1)
528 declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
530 define <8 x i16> @min_epi16(<8 x i16> %a0, <8 x i16> %a1) {
531 ; SSE-LABEL: min_epi16:
533 ; SSE-NEXT: pminsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xea,0xc1]
534 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
536 ; AVX1-LABEL: min_epi16:
538 ; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xea,0xc1]
539 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
541 ; AVX512-LABEL: min_epi16:
543 ; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xc1]
544 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
545 %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1)
548 declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
550 define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
551 ; SSE-LABEL: test_x86_sse2_add_sd:
553 ; SSE-NEXT: addsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x58,0xc1]
554 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
556 ; AVX1-LABEL: test_x86_sse2_add_sd:
558 ; AVX1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x58,0xc1]
559 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
561 ; AVX512-LABEL: test_x86_sse2_add_sd:
563 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x58,0xc1]
564 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
565 %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
566 ret <2 x double> %res
568 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
571 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
572 ; SSE-LABEL: test_x86_sse2_sub_sd:
574 ; SSE-NEXT: subsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5c,0xc1]
575 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
577 ; AVX1-LABEL: test_x86_sse2_sub_sd:
579 ; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5c,0xc1]
580 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
582 ; AVX512-LABEL: test_x86_sse2_sub_sd:
584 ; AVX512-NEXT: vsubsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5c,0xc1]
585 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
586 %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
587 ret <2 x double> %res
589 declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
592 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
593 ; SSE-LABEL: test_x86_sse2_mul_sd:
595 ; SSE-NEXT: mulsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x59,0xc1]
596 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
598 ; AVX1-LABEL: test_x86_sse2_mul_sd:
600 ; AVX1-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x59,0xc1]
601 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
603 ; AVX512-LABEL: test_x86_sse2_mul_sd:
605 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x59,0xc1]
606 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
607 %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
608 ret <2 x double> %res
610 declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
613 define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
614 ; SSE-LABEL: test_x86_sse2_div_sd:
616 ; SSE-NEXT: divsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5e,0xc1]
617 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
619 ; AVX1-LABEL: test_x86_sse2_div_sd:
621 ; AVX1-NEXT: vdivsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5e,0xc1]
622 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
624 ; AVX512-LABEL: test_x86_sse2_div_sd:
626 ; AVX512-NEXT: vdivsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5e,0xc1]
627 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
628 %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
629 ret <2 x double> %res
631 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
633 define <16 x i8> @mm_avg_epu8(<16 x i8> %a0, <16 x i8> %a1) {
634 ; SSE-LABEL: mm_avg_epu8:
636 ; SSE-NEXT: pavgb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe0,0xc1]
637 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
639 ; AVX1-LABEL: mm_avg_epu8:
641 ; AVX1-NEXT: vpavgb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe0,0xc1]
642 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
644 ; AVX512-LABEL: mm_avg_epu8:
646 ; AVX512-NEXT: vpavgb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xc1]
647 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
648 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
651 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
653 define <8 x i16> @mm_avg_epu16(<8 x i16> %a0, <8 x i16> %a1) {
654 ; SSE-LABEL: mm_avg_epu16:
656 ; SSE-NEXT: pavgw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe3,0xc1]
657 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
659 ; AVX1-LABEL: mm_avg_epu16:
661 ; AVX1-NEXT: vpavgw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe3,0xc1]
662 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
664 ; AVX512-LABEL: mm_avg_epu16:
666 ; AVX512-NEXT: vpavgw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xc1]
667 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
668 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
671 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
674 define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
675 ; SSE-LABEL: test_x86_sse2_pmulu_dq:
677 ; SSE-NEXT: pmuludq %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf4,0xc1]
678 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
680 ; AVX1-LABEL: test_x86_sse2_pmulu_dq:
682 ; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf4,0xc1]
683 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
685 ; AVX512-LABEL: test_x86_sse2_pmulu_dq:
687 ; AVX512-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0xc1]
688 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
689 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
692 declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
695 define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0, i32 %a1) {
696 ; X86-SSE-LABEL: test_x86_sse2_cvtsi2sd:
698 ; X86-SSE-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xf2,0x0f,0x2a,0x44,0x24,0x04]
699 ; X86-SSE-NEXT: retl ## encoding: [0xc3]
701 ; X86-AVX1-LABEL: test_x86_sse2_cvtsi2sd:
702 ; X86-AVX1: ## %bb.0:
703 ; X86-AVX1-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04]
704 ; X86-AVX1-NEXT: retl ## encoding: [0xc3]
706 ; X86-AVX512-LABEL: test_x86_sse2_cvtsi2sd:
707 ; X86-AVX512: ## %bb.0:
708 ; X86-AVX512-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04]
709 ; X86-AVX512-NEXT: retl ## encoding: [0xc3]
711 ; X64-SSE-LABEL: test_x86_sse2_cvtsi2sd:
713 ; X64-SSE-NEXT: cvtsi2sdl %edi, %xmm0 ## encoding: [0xf2,0x0f,0x2a,0xc7]
714 ; X64-SSE-NEXT: retq ## encoding: [0xc3]
716 ; X64-AVX1-LABEL: test_x86_sse2_cvtsi2sd:
717 ; X64-AVX1: ## %bb.0:
718 ; X64-AVX1-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x2a,0xc7]
719 ; X64-AVX1-NEXT: retq ## encoding: [0xc3]
721 ; X64-AVX512-LABEL: test_x86_sse2_cvtsi2sd:
722 ; X64-AVX512: ## %bb.0:
723 ; X64-AVX512-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7]
724 ; X64-AVX512-NEXT: retq ## encoding: [0xc3]
725 %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 %a1) ; <<2 x double>> [#uses=1]
726 ret <2 x double> %res
728 declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
731 define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
732 ; SSE-LABEL: test_x86_sse2_cvtss2sd:
734 ; SSE-NEXT: cvtss2sd %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5a,0xc1]
735 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
737 ; AVX1-LABEL: test_x86_sse2_cvtss2sd:
739 ; AVX1-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0xc1]
740 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
742 ; AVX512-LABEL: test_x86_sse2_cvtss2sd:
744 ; AVX512-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0xc1]
745 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
746 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
747 ret <2 x double> %res
749 declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
752 define <2 x double> @test_x86_sse2_cvtss2sd_load(<2 x double> %a0, <4 x float>* %p1) {
753 ; X86-SSE-LABEL: test_x86_sse2_cvtss2sd_load:
755 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
756 ; X86-SSE-NEXT: movss (%eax), %xmm1 ## encoding: [0xf3,0x0f,0x10,0x08]
757 ; X86-SSE-NEXT: ## xmm1 = mem[0],zero,zero,zero
758 ; X86-SSE-NEXT: cvtss2sd %xmm1, %xmm1 ## encoding: [0xf3,0x0f,0x5a,0xc9]
759 ; X86-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
760 ; X86-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1]
761 ; X86-SSE-NEXT: retl ## encoding: [0xc3]
763 ; X86-AVX1-LABEL: test_x86_sse2_cvtss2sd_load:
764 ; X86-AVX1: ## %bb.0:
765 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
766 ; X86-AVX1-NEXT: vmovss (%eax), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x08]
767 ; X86-AVX1-NEXT: ## xmm1 = mem[0],zero,zero,zero
768 ; X86-AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0xc9]
769 ; X86-AVX1-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
770 ; X86-AVX1-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3]
771 ; X86-AVX1-NEXT: retl ## encoding: [0xc3]
773 ; X86-AVX512-LABEL: test_x86_sse2_cvtss2sd_load:
774 ; X86-AVX512: ## %bb.0:
775 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
776 ; X86-AVX512-NEXT: vmovss (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x08]
777 ; X86-AVX512-NEXT: ## xmm1 = mem[0],zero,zero,zero
778 ; X86-AVX512-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0xc9]
779 ; X86-AVX512-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
780 ; X86-AVX512-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3]
781 ; X86-AVX512-NEXT: retl ## encoding: [0xc3]
783 ; X64-SSE-LABEL: test_x86_sse2_cvtss2sd_load:
785 ; X64-SSE-NEXT: movss (%rdi), %xmm1 ## encoding: [0xf3,0x0f,0x10,0x0f]
786 ; X64-SSE-NEXT: ## xmm1 = mem[0],zero,zero,zero
787 ; X64-SSE-NEXT: cvtss2sd %xmm1, %xmm1 ## encoding: [0xf3,0x0f,0x5a,0xc9]
788 ; X64-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
789 ; X64-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1]
790 ; X64-SSE-NEXT: retq ## encoding: [0xc3]
792 ; X64-AVX1-LABEL: test_x86_sse2_cvtss2sd_load:
793 ; X64-AVX1: ## %bb.0:
794 ; X64-AVX1-NEXT: vmovss (%rdi), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x0f]
795 ; X64-AVX1-NEXT: ## xmm1 = mem[0],zero,zero,zero
796 ; X64-AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0xc9]
797 ; X64-AVX1-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
798 ; X64-AVX1-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3]
799 ; X64-AVX1-NEXT: retq ## encoding: [0xc3]
801 ; X64-AVX512-LABEL: test_x86_sse2_cvtss2sd_load:
802 ; X64-AVX512: ## %bb.0:
803 ; X64-AVX512-NEXT: vmovss (%rdi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x0f]
804 ; X64-AVX512-NEXT: ## xmm1 = mem[0],zero,zero,zero
805 ; X64-AVX512-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0xc9]
806 ; X64-AVX512-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
807 ; X64-AVX512-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3]
808 ; X64-AVX512-NEXT: retq ## encoding: [0xc3]
809 %a1 = load <4 x float>, <4 x float>* %p1
810 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
811 ret <2 x double> %res
815 define <2 x double> @test_x86_sse2_cvtss2sd_load_optsize(<2 x double> %a0, <4 x float>* %p1) optsize {
816 ; X86-SSE-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
818 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
819 ; X86-SSE-NEXT: cvtss2sd (%eax), %xmm1 ## encoding: [0xf3,0x0f,0x5a,0x08]
820 ; X86-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
821 ; X86-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1]
822 ; X86-SSE-NEXT: retl ## encoding: [0xc3]
824 ; X86-AVX1-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
825 ; X86-AVX1: ## %bb.0:
826 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
827 ; X86-AVX1-NEXT: vcvtss2sd (%eax), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x08]
828 ; X86-AVX1-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1]
829 ; X86-AVX1-NEXT: ## xmm0 = xmm1[0],xmm0[1]
830 ; X86-AVX1-NEXT: retl ## encoding: [0xc3]
832 ; X86-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
833 ; X86-AVX512: ## %bb.0:
834 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
835 ; X86-AVX512-NEXT: vcvtss2sd (%eax), %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0x08]
836 ; X86-AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1]
837 ; X86-AVX512-NEXT: ## xmm0 = xmm1[0],xmm0[1]
838 ; X86-AVX512-NEXT: retl ## encoding: [0xc3]
840 ; X64-SSE-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
842 ; X64-SSE-NEXT: cvtss2sd (%rdi), %xmm1 ## encoding: [0xf3,0x0f,0x5a,0x0f]
843 ; X64-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
844 ; X64-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1]
845 ; X64-SSE-NEXT: retq ## encoding: [0xc3]
847 ; X64-AVX1-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
848 ; X64-AVX1: ## %bb.0:
849 ; X64-AVX1-NEXT: vcvtss2sd (%rdi), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x0f]
850 ; X64-AVX1-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1]
851 ; X64-AVX1-NEXT: ## xmm0 = xmm1[0],xmm0[1]
852 ; X64-AVX1-NEXT: retq ## encoding: [0xc3]
854 ; X64-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
855 ; X64-AVX512: ## %bb.0:
856 ; X64-AVX512-NEXT: vcvtss2sd (%rdi), %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0x0f]
857 ; X64-AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1]
858 ; X64-AVX512-NEXT: ## xmm0 = xmm1[0],xmm0[1]
859 ; X64-AVX512-NEXT: retq ## encoding: [0xc3]
860 %a1 = load <4 x float>, <4 x float>* %p1
861 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
862 ret <2 x double> %res
866 define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
867 ; SSE-LABEL: test_x86_sse2_cvtdq2ps:
869 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ## encoding: [0x0f,0x5b,0xc0]
870 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
872 ; AVX1-LABEL: test_x86_sse2_cvtdq2ps:
874 ; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x5b,0xc0]
875 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
877 ; AVX512-LABEL: test_x86_sse2_cvtdq2ps:
879 ; AVX512-NEXT: vcvtdq2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xc0]
880 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
881 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
884 declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
887 define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
888 ; SSE-LABEL: test_x86_sse2_padds_b:
890 ; SSE-NEXT: paddsb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xec,0xc1]
891 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
893 ; AVX1-LABEL: test_x86_sse2_padds_b:
895 ; AVX1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xec,0xc1]
896 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
898 ; AVX512-LABEL: test_x86_sse2_padds_b:
900 ; AVX512-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1]
901 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
902 %res = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
905 declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
908 define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
909 ; SSE-LABEL: test_x86_sse2_padds_w:
911 ; SSE-NEXT: paddsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xed,0xc1]
912 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
914 ; AVX1-LABEL: test_x86_sse2_padds_w:
916 ; AVX1-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xed,0xc1]
917 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
919 ; AVX512-LABEL: test_x86_sse2_padds_w:
921 ; AVX512-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1]
922 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
923 %res = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
926 declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
929 define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
930 ; SSE-LABEL: test_x86_sse2_paddus_b:
932 ; SSE-NEXT: paddusb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xdc,0xc1]
933 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
935 ; AVX2-LABEL: test_x86_sse2_paddus_b:
937 ; AVX2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdc,0xc1]
938 ; AVX2-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
940 ; SKX-LABEL: test_x86_sse2_paddus_b:
942 ; SKX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1]
943 ; SKX-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
944 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
947 declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
950 define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
951 ; SSE-LABEL: test_x86_sse2_paddus_w:
953 ; SSE-NEXT: paddusw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xdd,0xc1]
954 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
956 ; AVX2-LABEL: test_x86_sse2_paddus_w:
958 ; AVX2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdd,0xc1]
959 ; AVX2-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
961 ; SKX-LABEL: test_x86_sse2_paddus_w:
963 ; SKX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1]
964 ; SKX-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
965 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
968 declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
971 define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
972 ; SSE-LABEL: test_x86_sse2_psubs_b:
974 ; SSE-NEXT: psubsb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe8,0xc1]
975 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
977 ; AVX1-LABEL: test_x86_sse2_psubs_b:
979 ; AVX1-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe8,0xc1]
980 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
982 ; AVX512-LABEL: test_x86_sse2_psubs_b:
984 ; AVX512-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1]
985 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
986 %res = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
989 declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
992 define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
993 ; SSE-LABEL: test_x86_sse2_psubs_w:
995 ; SSE-NEXT: psubsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe9,0xc1]
996 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
998 ; AVX1-LABEL: test_x86_sse2_psubs_w:
1000 ; AVX1-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe9,0xc1]
1001 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1003 ; AVX512-LABEL: test_x86_sse2_psubs_w:
1005 ; AVX512-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1]
1006 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1007 %res = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1010 declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
1013 define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
1014 ; SSE-LABEL: test_x86_sse2_psubus_b:
1016 ; SSE-NEXT: psubusb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xd8,0xc1]
1017 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1019 ; AVX2-LABEL: test_x86_sse2_psubus_b:
1021 ; AVX2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd8,0xc1]
1022 ; AVX2-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1024 ; SKX-LABEL: test_x86_sse2_psubus_b:
1026 ; SKX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1]
1027 ; SKX-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1028 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1031 declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
1034 define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
1035 ; SSE-LABEL: test_x86_sse2_psubus_w:
1037 ; SSE-NEXT: psubusw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xd9,0xc1]
1038 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1040 ; AVX2-LABEL: test_x86_sse2_psubus_w:
1042 ; AVX2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd9,0xc1]
1043 ; AVX2-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1045 ; SKX-LABEL: test_x86_sse2_psubus_w:
1047 ; SKX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1]
1048 ; SKX-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1049 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1052 declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone