1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse2 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE
3 ; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,AVX1,X86-AVX1
4 ; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,AVX512,X86-AVX512
5 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse2 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE
6 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,AVX1,X64-AVX1
7 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,AVX512,X64-AVX512
9 define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
10 ; SSE-LABEL: test_x86_sse2_cmp_pd:
12 ; SSE-NEXT: cmpordpd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xc2,0xc1,0x07]
13 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
15 ; AVX-LABEL: test_x86_sse2_cmp_pd:
17 ; AVX-NEXT: vcmpordpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc2,0xc1,0x07]
18 ; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
19 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
22 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
25 define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
26 ; SSE-LABEL: test_x86_sse2_cmp_sd:
28 ; SSE-NEXT: cmpordsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0xc2,0xc1,0x07]
29 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
31 ; AVX-LABEL: test_x86_sse2_cmp_sd:
33 ; AVX-NEXT: vcmpordsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0xc2,0xc1,0x07]
34 ; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
35 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
38 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
41 define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
42 ; SSE-LABEL: test_x86_sse2_comieq_sd:
44 ; SSE-NEXT: comisd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x2f,0xc1]
45 ; SSE-NEXT: setnp %al ## encoding: [0x0f,0x9b,0xc0]
46 ; SSE-NEXT: sete %cl ## encoding: [0x0f,0x94,0xc1]
47 ; SSE-NEXT: andb %al, %cl ## encoding: [0x20,0xc1]
48 ; SSE-NEXT: movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
49 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
51 ; AVX1-LABEL: test_x86_sse2_comieq_sd:
53 ; AVX1-NEXT: vcomisd %xmm1, %xmm0 ## encoding: [0xc5,0xf9,0x2f,0xc1]
54 ; AVX1-NEXT: setnp %al ## encoding: [0x0f,0x9b,0xc0]
55 ; AVX1-NEXT: sete %cl ## encoding: [0x0f,0x94,0xc1]
56 ; AVX1-NEXT: andb %al, %cl ## encoding: [0x20,0xc1]
57 ; AVX1-NEXT: movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
58 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
60 ; AVX512-LABEL: test_x86_sse2_comieq_sd:
62 ; AVX512-NEXT: vcomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
63 ; AVX512-NEXT: setnp %al ## encoding: [0x0f,0x9b,0xc0]
64 ; AVX512-NEXT: sete %cl ## encoding: [0x0f,0x94,0xc1]
65 ; AVX512-NEXT: andb %al, %cl ## encoding: [0x20,0xc1]
66 ; AVX512-NEXT: movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
67 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
68 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
71 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
74 define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
75 ; SSE-LABEL: test_x86_sse2_comige_sd:
77 ; SSE-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
78 ; SSE-NEXT: comisd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x2f,0xc1]
79 ; SSE-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0]
80 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
82 ; AVX1-LABEL: test_x86_sse2_comige_sd:
84 ; AVX1-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
85 ; AVX1-NEXT: vcomisd %xmm1, %xmm0 ## encoding: [0xc5,0xf9,0x2f,0xc1]
86 ; AVX1-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0]
87 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
89 ; AVX512-LABEL: test_x86_sse2_comige_sd:
91 ; AVX512-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
92 ; AVX512-NEXT: vcomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
93 ; AVX512-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0]
94 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
95 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
98 declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
101 define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
102 ; SSE-LABEL: test_x86_sse2_comigt_sd:
104 ; SSE-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
105 ; SSE-NEXT: comisd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x2f,0xc1]
106 ; SSE-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0]
107 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
109 ; AVX1-LABEL: test_x86_sse2_comigt_sd:
111 ; AVX1-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
112 ; AVX1-NEXT: vcomisd %xmm1, %xmm0 ## encoding: [0xc5,0xf9,0x2f,0xc1]
113 ; AVX1-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0]
114 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
116 ; AVX512-LABEL: test_x86_sse2_comigt_sd:
118 ; AVX512-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
119 ; AVX512-NEXT: vcomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
120 ; AVX512-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0]
121 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
122 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
125 declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
128 define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
129 ; SSE-LABEL: test_x86_sse2_comile_sd:
131 ; SSE-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
132 ; SSE-NEXT: comisd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x2f,0xc8]
133 ; SSE-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0]
134 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
136 ; AVX1-LABEL: test_x86_sse2_comile_sd:
138 ; AVX1-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
139 ; AVX1-NEXT: vcomisd %xmm0, %xmm1 ## encoding: [0xc5,0xf9,0x2f,0xc8]
140 ; AVX1-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0]
141 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
143 ; AVX512-LABEL: test_x86_sse2_comile_sd:
145 ; AVX512-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
146 ; AVX512-NEXT: vcomisd %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8]
147 ; AVX512-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0]
148 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
149 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
152 declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
155 define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
156 ; SSE-LABEL: test_x86_sse2_comilt_sd:
158 ; SSE-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
159 ; SSE-NEXT: comisd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x2f,0xc8]
160 ; SSE-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0]
161 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
163 ; AVX1-LABEL: test_x86_sse2_comilt_sd:
165 ; AVX1-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
166 ; AVX1-NEXT: vcomisd %xmm0, %xmm1 ## encoding: [0xc5,0xf9,0x2f,0xc8]
167 ; AVX1-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0]
168 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
170 ; AVX512-LABEL: test_x86_sse2_comilt_sd:
172 ; AVX512-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
173 ; AVX512-NEXT: vcomisd %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8]
174 ; AVX512-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0]
175 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
176 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
179 declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
182 define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
183 ; SSE-LABEL: test_x86_sse2_comineq_sd:
185 ; SSE-NEXT: comisd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x2f,0xc1]
186 ; SSE-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0]
187 ; SSE-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1]
188 ; SSE-NEXT: orb %al, %cl ## encoding: [0x08,0xc1]
189 ; SSE-NEXT: movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
190 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
192 ; AVX1-LABEL: test_x86_sse2_comineq_sd:
194 ; AVX1-NEXT: vcomisd %xmm1, %xmm0 ## encoding: [0xc5,0xf9,0x2f,0xc1]
195 ; AVX1-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0]
196 ; AVX1-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1]
197 ; AVX1-NEXT: orb %al, %cl ## encoding: [0x08,0xc1]
198 ; AVX1-NEXT: movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
199 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
201 ; AVX512-LABEL: test_x86_sse2_comineq_sd:
203 ; AVX512-NEXT: vcomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
204 ; AVX512-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0]
205 ; AVX512-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1]
206 ; AVX512-NEXT: orb %al, %cl ## encoding: [0x08,0xc1]
207 ; AVX512-NEXT: movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
208 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
209 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
212 declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
215 define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
216 ; SSE-LABEL: test_x86_sse2_cvtpd2dq:
218 ; SSE-NEXT: cvtpd2dq %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0xe6,0xc0]
219 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
221 ; AVX1-LABEL: test_x86_sse2_cvtpd2dq:
223 ; AVX1-NEXT: vcvtpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0xe6,0xc0]
224 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
226 ; AVX512-LABEL: test_x86_sse2_cvtpd2dq:
228 ; AVX512-NEXT: vcvtpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
229 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
230 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
233 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
236 define <2 x i64> @test_mm_cvtpd_epi32_zext(<2 x double> %a0) nounwind {
237 ; SSE-LABEL: test_mm_cvtpd_epi32_zext:
239 ; SSE-NEXT: cvtpd2dq %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0xe6,0xc0]
240 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
242 ; AVX1-LABEL: test_mm_cvtpd_epi32_zext:
244 ; AVX1-NEXT: vcvtpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0xe6,0xc0]
245 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
247 ; AVX512-LABEL: test_mm_cvtpd_epi32_zext:
249 ; AVX512-NEXT: vcvtpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
250 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
251 %cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
252 %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
253 %bc = bitcast <4 x i32> %res to <2 x i64>
258 define <2 x i64> @test_mm_cvtpd_epi32_zext_load(ptr %p0) nounwind {
259 ; X86-SSE-LABEL: test_mm_cvtpd_epi32_zext_load:
261 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
262 ; X86-SSE-NEXT: cvtpd2dq (%eax), %xmm0 ## encoding: [0xf2,0x0f,0xe6,0x00]
263 ; X86-SSE-NEXT: retl ## encoding: [0xc3]
265 ; X86-AVX1-LABEL: test_mm_cvtpd_epi32_zext_load:
266 ; X86-AVX1: ## %bb.0:
267 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
268 ; X86-AVX1-NEXT: vcvtpd2dqx (%eax), %xmm0 ## encoding: [0xc5,0xfb,0xe6,0x00]
269 ; X86-AVX1-NEXT: retl ## encoding: [0xc3]
271 ; X86-AVX512-LABEL: test_mm_cvtpd_epi32_zext_load:
272 ; X86-AVX512: ## %bb.0:
273 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
274 ; X86-AVX512-NEXT: vcvtpd2dqx (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0x00]
275 ; X86-AVX512-NEXT: retl ## encoding: [0xc3]
277 ; X64-SSE-LABEL: test_mm_cvtpd_epi32_zext_load:
279 ; X64-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 ## encoding: [0xf2,0x0f,0xe6,0x07]
280 ; X64-SSE-NEXT: retq ## encoding: [0xc3]
282 ; X64-AVX1-LABEL: test_mm_cvtpd_epi32_zext_load:
283 ; X64-AVX1: ## %bb.0:
284 ; X64-AVX1-NEXT: vcvtpd2dqx (%rdi), %xmm0 ## encoding: [0xc5,0xfb,0xe6,0x07]
285 ; X64-AVX1-NEXT: retq ## encoding: [0xc3]
287 ; X64-AVX512-LABEL: test_mm_cvtpd_epi32_zext_load:
288 ; X64-AVX512: ## %bb.0:
289 ; X64-AVX512-NEXT: vcvtpd2dqx (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0x07]
290 ; X64-AVX512-NEXT: retq ## encoding: [0xc3]
291 %a0 = load <2 x double>, ptr %p0
292 %cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
293 %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
294 %bc = bitcast <4 x i32> %res to <2 x i64>
299 define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
300 ; SSE-LABEL: test_x86_sse2_cvtpd2ps:
302 ; SSE-NEXT: cvtpd2ps %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x5a,0xc0]
303 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
305 ; AVX1-LABEL: test_x86_sse2_cvtpd2ps:
307 ; AVX1-NEXT: vcvtpd2ps %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x5a,0xc0]
308 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
310 ; AVX512-LABEL: test_x86_sse2_cvtpd2ps:
312 ; AVX512-NEXT: vcvtpd2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
313 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
314 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
317 declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
319 define <4 x float> @test_x86_sse2_cvtpd2ps_zext(<2 x double> %a0) nounwind {
320 ; SSE-LABEL: test_x86_sse2_cvtpd2ps_zext:
322 ; SSE-NEXT: cvtpd2ps %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x5a,0xc0]
323 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
325 ; AVX1-LABEL: test_x86_sse2_cvtpd2ps_zext:
327 ; AVX1-NEXT: vcvtpd2ps %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x5a,0xc0]
328 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
330 ; AVX512-LABEL: test_x86_sse2_cvtpd2ps_zext:
332 ; AVX512-NEXT: vcvtpd2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
333 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
334 %cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
335 %res = shufflevector <4 x float> %cvt, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
339 define <4 x float> @test_x86_sse2_cvtpd2ps_zext_load(ptr %p0) nounwind {
340 ; X86-SSE-LABEL: test_x86_sse2_cvtpd2ps_zext_load:
342 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
343 ; X86-SSE-NEXT: cvtpd2ps (%eax), %xmm0 ## encoding: [0x66,0x0f,0x5a,0x00]
344 ; X86-SSE-NEXT: retl ## encoding: [0xc3]
346 ; X86-AVX1-LABEL: test_x86_sse2_cvtpd2ps_zext_load:
347 ; X86-AVX1: ## %bb.0:
348 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
349 ; X86-AVX1-NEXT: vcvtpd2psx (%eax), %xmm0 ## encoding: [0xc5,0xf9,0x5a,0x00]
350 ; X86-AVX1-NEXT: retl ## encoding: [0xc3]
352 ; X86-AVX512-LABEL: test_x86_sse2_cvtpd2ps_zext_load:
353 ; X86-AVX512: ## %bb.0:
354 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
355 ; X86-AVX512-NEXT: vcvtpd2psx (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0x00]
356 ; X86-AVX512-NEXT: retl ## encoding: [0xc3]
358 ; X64-SSE-LABEL: test_x86_sse2_cvtpd2ps_zext_load:
360 ; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 ## encoding: [0x66,0x0f,0x5a,0x07]
361 ; X64-SSE-NEXT: retq ## encoding: [0xc3]
363 ; X64-AVX1-LABEL: test_x86_sse2_cvtpd2ps_zext_load:
364 ; X64-AVX1: ## %bb.0:
365 ; X64-AVX1-NEXT: vcvtpd2psx (%rdi), %xmm0 ## encoding: [0xc5,0xf9,0x5a,0x07]
366 ; X64-AVX1-NEXT: retq ## encoding: [0xc3]
368 ; X64-AVX512-LABEL: test_x86_sse2_cvtpd2ps_zext_load:
369 ; X64-AVX512: ## %bb.0:
370 ; X64-AVX512-NEXT: vcvtpd2psx (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0x07]
371 ; X64-AVX512-NEXT: retq ## encoding: [0xc3]
372 %a0 = load <2 x double>, ptr %p0
373 %cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
374 %res = shufflevector <4 x float> %cvt, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
378 define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
379 ; SSE-LABEL: test_x86_sse2_cvtps2dq:
381 ; SSE-NEXT: cvtps2dq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x5b,0xc0]
382 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
384 ; AVX1-LABEL: test_x86_sse2_cvtps2dq:
386 ; AVX1-NEXT: vcvtps2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x5b,0xc0]
387 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
389 ; AVX512-LABEL: test_x86_sse2_cvtps2dq:
391 ; AVX512-NEXT: vcvtps2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5b,0xc0]
392 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
393 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
396 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
399 define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
400 ; SSE-LABEL: test_x86_sse2_cvtsd2si:
402 ; SSE-NEXT: cvtsd2si %xmm0, %eax ## encoding: [0xf2,0x0f,0x2d,0xc0]
403 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
405 ; AVX1-LABEL: test_x86_sse2_cvtsd2si:
407 ; AVX1-NEXT: vcvtsd2si %xmm0, %eax ## encoding: [0xc5,0xfb,0x2d,0xc0]
408 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
410 ; AVX512-LABEL: test_x86_sse2_cvtsd2si:
412 ; AVX512-NEXT: vcvtsd2si %xmm0, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2d,0xc0]
413 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
414 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
417 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
420 define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
421 ; SSE-LABEL: test_x86_sse2_cvtsd2ss:
423 ; SSE-NEXT: cvtsd2ss %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5a,0xc1]
424 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
426 ; AVX1-LABEL: test_x86_sse2_cvtsd2ss:
428 ; AVX1-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0xc1]
429 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
431 ; AVX512-LABEL: test_x86_sse2_cvtsd2ss:
433 ; AVX512-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0xc1]
434 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
435 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
438 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
441 define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, ptr %p1) {
442 ; X86-SSE-LABEL: test_x86_sse2_cvtsd2ss_load:
444 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
445 ; X86-SSE-NEXT: cvtsd2ss (%eax), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x00]
446 ; X86-SSE-NEXT: retl ## encoding: [0xc3]
448 ; X86-AVX1-LABEL: test_x86_sse2_cvtsd2ss_load:
449 ; X86-AVX1: ## %bb.0:
450 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
451 ; X86-AVX1-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x00]
452 ; X86-AVX1-NEXT: retl ## encoding: [0xc3]
454 ; X86-AVX512-LABEL: test_x86_sse2_cvtsd2ss_load:
455 ; X86-AVX512: ## %bb.0:
456 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
457 ; X86-AVX512-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x00]
458 ; X86-AVX512-NEXT: retl ## encoding: [0xc3]
460 ; X64-SSE-LABEL: test_x86_sse2_cvtsd2ss_load:
462 ; X64-SSE-NEXT: cvtsd2ss (%rdi), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x07]
463 ; X64-SSE-NEXT: retq ## encoding: [0xc3]
465 ; X64-AVX1-LABEL: test_x86_sse2_cvtsd2ss_load:
466 ; X64-AVX1: ## %bb.0:
467 ; X64-AVX1-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x07]
468 ; X64-AVX1-NEXT: retq ## encoding: [0xc3]
470 ; X64-AVX512-LABEL: test_x86_sse2_cvtsd2ss_load:
471 ; X64-AVX512: ## %bb.0:
472 ; X64-AVX512-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x07]
473 ; X64-AVX512-NEXT: retq ## encoding: [0xc3]
474 %a1 = load <2 x double>, ptr %p1
475 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
480 define <4 x float> @test_x86_sse2_cvtsd2ss_load_optsize(<4 x float> %a0, ptr %p1) optsize {
481 ; X86-SSE-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
483 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
484 ; X86-SSE-NEXT: cvtsd2ss (%eax), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x00]
485 ; X86-SSE-NEXT: retl ## encoding: [0xc3]
487 ; X86-AVX1-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
488 ; X86-AVX1: ## %bb.0:
489 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
490 ; X86-AVX1-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x00]
491 ; X86-AVX1-NEXT: retl ## encoding: [0xc3]
493 ; X86-AVX512-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
494 ; X86-AVX512: ## %bb.0:
495 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
496 ; X86-AVX512-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x00]
497 ; X86-AVX512-NEXT: retl ## encoding: [0xc3]
499 ; X64-SSE-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
501 ; X64-SSE-NEXT: cvtsd2ss (%rdi), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x07]
502 ; X64-SSE-NEXT: retq ## encoding: [0xc3]
504 ; X64-AVX1-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
505 ; X64-AVX1: ## %bb.0:
506 ; X64-AVX1-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x07]
507 ; X64-AVX1-NEXT: retq ## encoding: [0xc3]
509 ; X64-AVX512-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
510 ; X64-AVX512: ## %bb.0:
511 ; X64-AVX512-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x07]
512 ; X64-AVX512-NEXT: retq ## encoding: [0xc3]
513 %a1 = load <2 x double>, ptr %p1
514 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
519 define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
520 ; SSE-LABEL: test_x86_sse2_cvttpd2dq:
522 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0xe6,0xc0]
523 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
525 ; AVX1-LABEL: test_x86_sse2_cvttpd2dq:
527 ; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe6,0xc0]
528 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
530 ; AVX512-LABEL: test_x86_sse2_cvttpd2dq:
532 ; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
533 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
534 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
537 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
540 define <2 x i64> @test_mm_cvttpd_epi32_zext(<2 x double> %a0) nounwind {
541 ; SSE-LABEL: test_mm_cvttpd_epi32_zext:
543 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0xe6,0xc0]
544 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
546 ; AVX1-LABEL: test_mm_cvttpd_epi32_zext:
548 ; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe6,0xc0]
549 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
551 ; AVX512-LABEL: test_mm_cvttpd_epi32_zext:
553 ; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
554 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
555 %cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
556 %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
557 %bc = bitcast <4 x i32> %res to <2 x i64>
562 define <2 x i64> @test_mm_cvttpd_epi32_zext_load(ptr %p0) nounwind {
563 ; X86-SSE-LABEL: test_mm_cvttpd_epi32_zext_load:
565 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
566 ; X86-SSE-NEXT: cvttpd2dq (%eax), %xmm0 ## encoding: [0x66,0x0f,0xe6,0x00]
567 ; X86-SSE-NEXT: retl ## encoding: [0xc3]
569 ; X86-AVX1-LABEL: test_mm_cvttpd_epi32_zext_load:
570 ; X86-AVX1: ## %bb.0:
571 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
572 ; X86-AVX1-NEXT: vcvttpd2dqx (%eax), %xmm0 ## encoding: [0xc5,0xf9,0xe6,0x00]
573 ; X86-AVX1-NEXT: retl ## encoding: [0xc3]
575 ; X86-AVX512-LABEL: test_mm_cvttpd_epi32_zext_load:
576 ; X86-AVX512: ## %bb.0:
577 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
578 ; X86-AVX512-NEXT: vcvttpd2dqx (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0x00]
579 ; X86-AVX512-NEXT: retl ## encoding: [0xc3]
581 ; X64-SSE-LABEL: test_mm_cvttpd_epi32_zext_load:
583 ; X64-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 ## encoding: [0x66,0x0f,0xe6,0x07]
584 ; X64-SSE-NEXT: retq ## encoding: [0xc3]
586 ; X64-AVX1-LABEL: test_mm_cvttpd_epi32_zext_load:
587 ; X64-AVX1: ## %bb.0:
588 ; X64-AVX1-NEXT: vcvttpd2dqx (%rdi), %xmm0 ## encoding: [0xc5,0xf9,0xe6,0x07]
589 ; X64-AVX1-NEXT: retq ## encoding: [0xc3]
591 ; X64-AVX512-LABEL: test_mm_cvttpd_epi32_zext_load:
592 ; X64-AVX512: ## %bb.0:
593 ; X64-AVX512-NEXT: vcvttpd2dqx (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0x07]
594 ; X64-AVX512-NEXT: retq ## encoding: [0xc3]
595 %a0 = load <2 x double>, ptr %p0
596 %cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
597 %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
598 %bc = bitcast <4 x i32> %res to <2 x i64>
603 define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
604 ; SSE-LABEL: test_x86_sse2_cvttps2dq:
606 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x5b,0xc0]
607 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
609 ; AVX1-LABEL: test_x86_sse2_cvttps2dq:
611 ; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5b,0xc0]
612 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
614 ; AVX512-LABEL: test_x86_sse2_cvttps2dq:
616 ; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xc0]
617 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
618 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
621 declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
624 define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
625 ; SSE-LABEL: test_x86_sse2_cvttsd2si:
627 ; SSE-NEXT: cvttsd2si %xmm0, %eax ## encoding: [0xf2,0x0f,0x2c,0xc0]
628 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
630 ; AVX1-LABEL: test_x86_sse2_cvttsd2si:
632 ; AVX1-NEXT: vcvttsd2si %xmm0, %eax ## encoding: [0xc5,0xfb,0x2c,0xc0]
633 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
635 ; AVX512-LABEL: test_x86_sse2_cvttsd2si:
637 ; AVX512-NEXT: vcvttsd2si %xmm0, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2c,0xc0]
638 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
639 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
642 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
645 define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
646 ; SSE-LABEL: test_x86_sse2_max_pd:
648 ; SSE-NEXT: maxpd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x5f,0xc1]
649 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
651 ; AVX1-LABEL: test_x86_sse2_max_pd:
653 ; AVX1-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x5f,0xc1]
654 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
656 ; AVX512-LABEL: test_x86_sse2_max_pd:
658 ; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5f,0xc1]
659 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
660 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
661 ret <2 x double> %res
663 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
666 define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
667 ; SSE-LABEL: test_x86_sse2_max_sd:
669 ; SSE-NEXT: maxsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5f,0xc1]
670 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
672 ; AVX1-LABEL: test_x86_sse2_max_sd:
674 ; AVX1-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5f,0xc1]
675 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
677 ; AVX512-LABEL: test_x86_sse2_max_sd:
679 ; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5f,0xc1]
680 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
681 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
682 ret <2 x double> %res
684 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
687 define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
688 ; SSE-LABEL: test_x86_sse2_min_pd:
690 ; SSE-NEXT: minpd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x5d,0xc1]
691 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
693 ; AVX1-LABEL: test_x86_sse2_min_pd:
695 ; AVX1-NEXT: vminpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x5d,0xc1]
696 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
698 ; AVX512-LABEL: test_x86_sse2_min_pd:
700 ; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5d,0xc1]
701 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
702 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
703 ret <2 x double> %res
705 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
708 define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
709 ; SSE-LABEL: test_x86_sse2_min_sd:
711 ; SSE-NEXT: minsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5d,0xc1]
712 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
714 ; AVX1-LABEL: test_x86_sse2_min_sd:
716 ; AVX1-NEXT: vminsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5d,0xc1]
717 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
719 ; AVX512-LABEL: test_x86_sse2_min_sd:
721 ; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5d,0xc1]
722 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
723 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
724 ret <2 x double> %res
726 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
729 define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
730 ; SSE-LABEL: test_x86_sse2_movmsk_pd:
732 ; SSE-NEXT: movmskpd %xmm0, %eax ## encoding: [0x66,0x0f,0x50,0xc0]
733 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
735 ; AVX-LABEL: test_x86_sse2_movmsk_pd:
737 ; AVX-NEXT: vmovmskpd %xmm0, %eax ## encoding: [0xc5,0xf9,0x50,0xc0]
738 ; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
739 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
742 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
745 define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
746 ; SSE-LABEL: test_x86_sse2_packssdw_128:
748 ; SSE-NEXT: packssdw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x6b,0xc1]
749 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
751 ; AVX1-LABEL: test_x86_sse2_packssdw_128:
753 ; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x6b,0xc1]
754 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
756 ; AVX512-LABEL: test_x86_sse2_packssdw_128:
758 ; AVX512-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1]
759 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
760 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
763 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
766 define <8 x i16> @test_x86_sse2_packssdw_128_fold() {
767 ; X86-SSE-LABEL: test_x86_sse2_packssdw_128_fold:
769 ; X86-SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768]
770 ; X86-SSE-NEXT: ## encoding: [0x0f,0x28,0x05,A,A,A,A]
771 ; X86-SSE-NEXT: ## fixup A - offset: 3, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
772 ; X86-SSE-NEXT: retl ## encoding: [0xc3]
774 ; X86-AVX1-LABEL: test_x86_sse2_packssdw_128_fold:
775 ; X86-AVX1: ## %bb.0:
776 ; X86-AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768]
777 ; X86-AVX1-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
778 ; X86-AVX1-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
779 ; X86-AVX1-NEXT: retl ## encoding: [0xc3]
781 ; X86-AVX512-LABEL: test_x86_sse2_packssdw_128_fold:
782 ; X86-AVX512: ## %bb.0:
783 ; X86-AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768]
784 ; X86-AVX512-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
785 ; X86-AVX512-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
786 ; X86-AVX512-NEXT: retl ## encoding: [0xc3]
788 ; X64-SSE-LABEL: test_x86_sse2_packssdw_128_fold:
790 ; X64-SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768]
791 ; X64-SSE-NEXT: ## encoding: [0x0f,0x28,0x05,A,A,A,A]
792 ; X64-SSE-NEXT: ## fixup A - offset: 3, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
793 ; X64-SSE-NEXT: retq ## encoding: [0xc3]
795 ; X64-AVX1-LABEL: test_x86_sse2_packssdw_128_fold:
796 ; X64-AVX1: ## %bb.0:
797 ; X64-AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768]
798 ; X64-AVX1-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
799 ; X64-AVX1-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
800 ; X64-AVX1-NEXT: retq ## encoding: [0xc3]
802 ; X64-AVX512-LABEL: test_x86_sse2_packssdw_128_fold:
803 ; X64-AVX512: ## %bb.0:
804 ; X64-AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768]
805 ; X64-AVX512-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
806 ; X64-AVX512-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
807 ; X64-AVX512-NEXT: retq ## encoding: [0xc3]
808 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> <i32 65535, i32 65536, i32 -1, i32 -131072>)
813 define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
814 ; SSE-LABEL: test_x86_sse2_packsswb_128:
816 ; SSE-NEXT: packsswb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x63,0xc1]
817 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
819 ; AVX1-LABEL: test_x86_sse2_packsswb_128:
821 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc1]
822 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
824 ; AVX512-LABEL: test_x86_sse2_packsswb_128:
826 ; AVX512-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1]
827 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
828 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
831 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
834 define <16 x i8> @test_x86_sse2_packsswb_128_fold() {
835 ; X86-SSE-LABEL: test_x86_sse2_packsswb_128_fold:
837 ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
838 ; X86-SSE-NEXT: ## encoding: [0xf2,0x0f,0x10,0x05,A,A,A,A]
839 ; X86-SSE-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
840 ; X86-SSE-NEXT: retl ## encoding: [0xc3]
842 ; X86-AVX1-LABEL: test_x86_sse2_packsswb_128_fold:
843 ; X86-AVX1: ## %bb.0:
844 ; X86-AVX1-NEXT: vmovsd {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
845 ; X86-AVX1-NEXT: ## encoding: [0xc5,0xfb,0x10,0x05,A,A,A,A]
846 ; X86-AVX1-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
847 ; X86-AVX1-NEXT: retl ## encoding: [0xc3]
849 ; X86-AVX512-LABEL: test_x86_sse2_packsswb_128_fold:
850 ; X86-AVX512: ## %bb.0:
851 ; X86-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
852 ; X86-AVX512-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x05,A,A,A,A]
853 ; X86-AVX512-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
854 ; X86-AVX512-NEXT: retl ## encoding: [0xc3]
856 ; X64-SSE-LABEL: test_x86_sse2_packsswb_128_fold:
858 ; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
859 ; X64-SSE-NEXT: ## encoding: [0xf2,0x0f,0x10,0x05,A,A,A,A]
860 ; X64-SSE-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
861 ; X64-SSE-NEXT: retq ## encoding: [0xc3]
863 ; X64-AVX1-LABEL: test_x86_sse2_packsswb_128_fold:
864 ; X64-AVX1: ## %bb.0:
865 ; X64-AVX1-NEXT: vmovsd {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
866 ; X64-AVX1-NEXT: ## encoding: [0xc5,0xfb,0x10,0x05,A,A,A,A]
867 ; X64-AVX1-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
868 ; X64-AVX1-NEXT: retq ## encoding: [0xc3]
870 ; X64-AVX512-LABEL: test_x86_sse2_packsswb_128_fold:
871 ; X64-AVX512: ## %bb.0:
872 ; X64-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
873 ; X64-AVX512-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x05,A,A,A,A]
874 ; X64-AVX512-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
875 ; X64-AVX512-NEXT: retq ## encoding: [0xc3]
876 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <8 x i16> zeroinitializer)
881 define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
882 ; SSE-LABEL: test_x86_sse2_packuswb_128:
884 ; SSE-NEXT: packuswb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x67,0xc1]
885 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
887 ; AVX1-LABEL: test_x86_sse2_packuswb_128:
889 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x67,0xc1]
890 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
892 ; AVX512-LABEL: test_x86_sse2_packuswb_128:
894 ; AVX512-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1]
895 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
896 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
899 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
902 define <16 x i8> @test_x86_sse2_packuswb_128_fold() {
903 ; X86-SSE-LABEL: test_x86_sse2_packuswb_128_fold:
905 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
906 ; X86-SSE-NEXT: ## encoding: [0xf3,0x0f,0x10,0x05,A,A,A,A]
907 ; X86-SSE-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
908 ; X86-SSE-NEXT: retl ## encoding: [0xc3]
910 ; X86-AVX1-LABEL: test_x86_sse2_packuswb_128_fold:
911 ; X86-AVX1: ## %bb.0:
912 ; X86-AVX1-NEXT: vmovss {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
913 ; X86-AVX1-NEXT: ## encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A]
914 ; X86-AVX1-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
915 ; X86-AVX1-NEXT: retl ## encoding: [0xc3]
917 ; X86-AVX512-LABEL: test_x86_sse2_packuswb_128_fold:
918 ; X86-AVX512: ## %bb.0:
919 ; X86-AVX512-NEXT: vmovss {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
920 ; X86-AVX512-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A]
921 ; X86-AVX512-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
922 ; X86-AVX512-NEXT: retl ## encoding: [0xc3]
924 ; X64-SSE-LABEL: test_x86_sse2_packuswb_128_fold:
926 ; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
927 ; X64-SSE-NEXT: ## encoding: [0xf3,0x0f,0x10,0x05,A,A,A,A]
928 ; X64-SSE-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
929 ; X64-SSE-NEXT: retq ## encoding: [0xc3]
931 ; X64-AVX1-LABEL: test_x86_sse2_packuswb_128_fold:
932 ; X64-AVX1: ## %bb.0:
933 ; X64-AVX1-NEXT: vmovss {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
934 ; X64-AVX1-NEXT: ## encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A]
935 ; X64-AVX1-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
936 ; X64-AVX1-NEXT: retq ## encoding: [0xc3]
938 ; X64-AVX512-LABEL: test_x86_sse2_packuswb_128_fold:
939 ; X64-AVX512: ## %bb.0:
940 ; X64-AVX512-NEXT: vmovss {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
941 ; X64-AVX512-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A]
942 ; X64-AVX512-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
943 ; X64-AVX512-NEXT: retq ## encoding: [0xc3]
944 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <8 x i16> zeroinitializer)
949 define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
950 ; SSE-LABEL: test_x86_sse2_pavg_b:
952 ; SSE-NEXT: pavgb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe0,0xc1]
953 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
955 ; AVX1-LABEL: test_x86_sse2_pavg_b:
957 ; AVX1-NEXT: vpavgb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe0,0xc1]
958 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
960 ; AVX512-LABEL: test_x86_sse2_pavg_b:
962 ; AVX512-NEXT: vpavgb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xc1]
963 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
964 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
967 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
970 define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
971 ; SSE-LABEL: test_x86_sse2_pavg_w:
973 ; SSE-NEXT: pavgw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe3,0xc1]
974 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
976 ; AVX1-LABEL: test_x86_sse2_pavg_w:
978 ; AVX1-NEXT: vpavgw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe3,0xc1]
979 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
981 ; AVX512-LABEL: test_x86_sse2_pavg_w:
983 ; AVX512-NEXT: vpavgw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xc1]
984 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
985 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
988 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
991 define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
992 ; SSE-LABEL: test_x86_sse2_pmadd_wd:
994 ; SSE-NEXT: pmaddwd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf5,0xc1]
995 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
997 ; AVX1-LABEL: test_x86_sse2_pmadd_wd:
999 ; AVX1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf5,0xc1]
1000 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1002 ; AVX512-LABEL: test_x86_sse2_pmadd_wd:
1004 ; AVX512-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xc1]
1005 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1006 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
1009 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
1012 define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
1013 ; SSE-LABEL: test_x86_sse2_pmovmskb_128:
1015 ; SSE-NEXT: pmovmskb %xmm0, %eax ## encoding: [0x66,0x0f,0xd7,0xc0]
1016 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1018 ; AVX-LABEL: test_x86_sse2_pmovmskb_128:
1020 ; AVX-NEXT: vpmovmskb %xmm0, %eax ## encoding: [0xc5,0xf9,0xd7,0xc0]
1021 ; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1022 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
1025 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
1028 define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
1029 ; SSE-LABEL: test_x86_sse2_pmulh_w:
1031 ; SSE-NEXT: pmulhw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe5,0xc1]
1032 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1034 ; AVX1-LABEL: test_x86_sse2_pmulh_w:
1036 ; AVX1-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe5,0xc1]
1037 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1039 ; AVX512-LABEL: test_x86_sse2_pmulh_w:
1041 ; AVX512-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xc1]
1042 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1043 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1046 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
1049 define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
1050 ; SSE-LABEL: test_x86_sse2_pmulhu_w:
1052 ; SSE-NEXT: pmulhuw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe4,0xc1]
1053 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1055 ; AVX1-LABEL: test_x86_sse2_pmulhu_w:
1057 ; AVX1-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe4,0xc1]
1058 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1060 ; AVX512-LABEL: test_x86_sse2_pmulhu_w:
1062 ; AVX512-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xc1]
1063 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1064 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1067 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
1070 define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
1071 ; SSE-LABEL: test_x86_sse2_psad_bw:
1073 ; SSE-NEXT: psadbw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf6,0xc1]
1074 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1076 ; AVX1-LABEL: test_x86_sse2_psad_bw:
1078 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf6,0xc1]
1079 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1081 ; AVX512-LABEL: test_x86_sse2_psad_bw:
1083 ; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf6,0xc1]
1084 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1085 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
1088 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
1091 define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
1092 ; SSE-LABEL: test_x86_sse2_psll_d:
1094 ; SSE-NEXT: pslld %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf2,0xc1]
1095 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1097 ; AVX1-LABEL: test_x86_sse2_psll_d:
1099 ; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf2,0xc1]
1100 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1102 ; AVX512-LABEL: test_x86_sse2_psll_d:
1104 ; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xc1]
1105 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1106 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1109 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
1112 define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
1113 ; SSE-LABEL: test_x86_sse2_psll_q:
1115 ; SSE-NEXT: psllq %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf3,0xc1]
1116 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1118 ; AVX1-LABEL: test_x86_sse2_psll_q:
1120 ; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf3,0xc1]
1121 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1123 ; AVX512-LABEL: test_x86_sse2_psll_q:
1125 ; AVX512-NEXT: vpsllq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf3,0xc1]
1126 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1127 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
1130 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
1133 define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
1134 ; SSE-LABEL: test_x86_sse2_psll_w:
1136 ; SSE-NEXT: psllw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf1,0xc1]
1137 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1139 ; AVX1-LABEL: test_x86_sse2_psll_w:
1141 ; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf1,0xc1]
1142 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1144 ; AVX512-LABEL: test_x86_sse2_psll_w:
1146 ; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xc1]
1147 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1148 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1151 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
1154 define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
1155 ; SSE-LABEL: test_x86_sse2_pslli_d:
1157 ; SSE-NEXT: pslld $7, %xmm0 ## encoding: [0x66,0x0f,0x72,0xf0,0x07]
1158 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1160 ; AVX1-LABEL: test_x86_sse2_pslli_d:
1162 ; AVX1-NEXT: vpslld $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x72,0xf0,0x07]
1163 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1165 ; AVX512-LABEL: test_x86_sse2_pslli_d:
1167 ; AVX512-NEXT: vpslld $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xf0,0x07]
1168 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1169 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
1172 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
1175 define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
1176 ; SSE-LABEL: test_x86_sse2_pslli_q:
1178 ; SSE-NEXT: psllq $7, %xmm0 ## encoding: [0x66,0x0f,0x73,0xf0,0x07]
1179 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1181 ; AVX1-LABEL: test_x86_sse2_pslli_q:
1183 ; AVX1-NEXT: vpsllq $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xf0,0x07]
1184 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1186 ; AVX512-LABEL: test_x86_sse2_pslli_q:
1188 ; AVX512-NEXT: vpsllq $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf0,0x07]
1189 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1190 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
1193 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
1196 define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
1197 ; SSE-LABEL: test_x86_sse2_pslli_w:
1199 ; SSE-NEXT: psllw $7, %xmm0 ## encoding: [0x66,0x0f,0x71,0xf0,0x07]
1200 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1202 ; AVX1-LABEL: test_x86_sse2_pslli_w:
1204 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x71,0xf0,0x07]
1205 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1207 ; AVX512-LABEL: test_x86_sse2_pslli_w:
1209 ; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x07]
1210 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1211 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
1214 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
1217 define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
1218 ; SSE-LABEL: test_x86_sse2_psra_d:
1220 ; SSE-NEXT: psrad %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe2,0xc1]
1221 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1223 ; AVX1-LABEL: test_x86_sse2_psra_d:
1225 ; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe2,0xc1]
1226 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1228 ; AVX512-LABEL: test_x86_sse2_psra_d:
1230 ; AVX512-NEXT: vpsrad %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xc1]
1231 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1232 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1235 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
1238 define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
1239 ; SSE-LABEL: test_x86_sse2_psra_w:
1241 ; SSE-NEXT: psraw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe1,0xc1]
1242 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1244 ; AVX1-LABEL: test_x86_sse2_psra_w:
1246 ; AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe1,0xc1]
1247 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1249 ; AVX512-LABEL: test_x86_sse2_psra_w:
1251 ; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xc1]
1252 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1253 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1256 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
1259 define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
1260 ; SSE-LABEL: test_x86_sse2_psrai_d:
1262 ; SSE-NEXT: psrad $7, %xmm0 ## encoding: [0x66,0x0f,0x72,0xe0,0x07]
1263 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1265 ; AVX1-LABEL: test_x86_sse2_psrai_d:
1267 ; AVX1-NEXT: vpsrad $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x72,0xe0,0x07]
1268 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1270 ; AVX512-LABEL: test_x86_sse2_psrai_d:
1272 ; AVX512-NEXT: vpsrad $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xe0,0x07]
1273 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1274 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
1277 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
1280 define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
1281 ; SSE-LABEL: test_x86_sse2_psrai_w:
1283 ; SSE-NEXT: psraw $7, %xmm0 ## encoding: [0x66,0x0f,0x71,0xe0,0x07]
1284 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1286 ; AVX1-LABEL: test_x86_sse2_psrai_w:
1288 ; AVX1-NEXT: vpsraw $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x71,0xe0,0x07]
1289 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1291 ; AVX512-LABEL: test_x86_sse2_psrai_w:
1293 ; AVX512-NEXT: vpsraw $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xe0,0x07]
1294 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1295 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
1298 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
1301 define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
1302 ; SSE-LABEL: test_x86_sse2_psrl_d:
1304 ; SSE-NEXT: psrld %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xd2,0xc1]
1305 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1307 ; AVX1-LABEL: test_x86_sse2_psrl_d:
1309 ; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd2,0xc1]
1310 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1312 ; AVX512-LABEL: test_x86_sse2_psrl_d:
1314 ; AVX512-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xc1]
1315 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1316 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1319 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
1322 define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
1323 ; SSE-LABEL: test_x86_sse2_psrl_q:
1325 ; SSE-NEXT: psrlq %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xd3,0xc1]
1326 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1328 ; AVX1-LABEL: test_x86_sse2_psrl_q:
1330 ; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd3,0xc1]
1331 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1333 ; AVX512-LABEL: test_x86_sse2_psrl_q:
1335 ; AVX512-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xc1]
1336 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1337 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
1340 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
1343 define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
1344 ; SSE-LABEL: test_x86_sse2_psrl_w:
1346 ; SSE-NEXT: psrlw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xd1,0xc1]
1347 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1349 ; AVX1-LABEL: test_x86_sse2_psrl_w:
1351 ; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd1,0xc1]
1352 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1354 ; AVX512-LABEL: test_x86_sse2_psrl_w:
1356 ; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xc1]
1357 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1358 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1361 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
1364 define <8 x i16> @test_x86_sse2_psrl_w_load(<8 x i16> %a0, ptr %p) {
1365 ; X86-SSE-LABEL: test_x86_sse2_psrl_w_load:
1366 ; X86-SSE: ## %bb.0:
1367 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1368 ; X86-SSE-NEXT: psrlw (%eax), %xmm0 ## encoding: [0x66,0x0f,0xd1,0x00]
1369 ; X86-SSE-NEXT: retl ## encoding: [0xc3]
1371 ; X86-AVX1-LABEL: test_x86_sse2_psrl_w_load:
1372 ; X86-AVX1: ## %bb.0:
1373 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1374 ; X86-AVX1-NEXT: vpsrlw (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd1,0x00]
1375 ; X86-AVX1-NEXT: retl ## encoding: [0xc3]
1377 ; X86-AVX512-LABEL: test_x86_sse2_psrl_w_load:
1378 ; X86-AVX512: ## %bb.0:
1379 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1380 ; X86-AVX512-NEXT: vpsrlw (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0x00]
1381 ; X86-AVX512-NEXT: retl ## encoding: [0xc3]
1383 ; X64-SSE-LABEL: test_x86_sse2_psrl_w_load:
1384 ; X64-SSE: ## %bb.0:
1385 ; X64-SSE-NEXT: psrlw (%rdi), %xmm0 ## encoding: [0x66,0x0f,0xd1,0x07]
1386 ; X64-SSE-NEXT: retq ## encoding: [0xc3]
1388 ; X64-AVX1-LABEL: test_x86_sse2_psrl_w_load:
1389 ; X64-AVX1: ## %bb.0:
1390 ; X64-AVX1-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd1,0x07]
1391 ; X64-AVX1-NEXT: retq ## encoding: [0xc3]
1393 ; X64-AVX512-LABEL: test_x86_sse2_psrl_w_load:
1394 ; X64-AVX512: ## %bb.0:
1395 ; X64-AVX512-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0x07]
1396 ; X64-AVX512-NEXT: retq ## encoding: [0xc3]
1397 %a1 = load <8 x i16>, ptr %p
1398 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1403 define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
1404 ; SSE-LABEL: test_x86_sse2_psrli_d:
1406 ; SSE-NEXT: psrld $7, %xmm0 ## encoding: [0x66,0x0f,0x72,0xd0,0x07]
1407 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1409 ; AVX1-LABEL: test_x86_sse2_psrli_d:
1411 ; AVX1-NEXT: vpsrld $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x72,0xd0,0x07]
1412 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1414 ; AVX512-LABEL: test_x86_sse2_psrli_d:
1416 ; AVX512-NEXT: vpsrld $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xd0,0x07]
1417 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1418 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
1421 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
1424 define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
1425 ; SSE-LABEL: test_x86_sse2_psrli_q:
1427 ; SSE-NEXT: psrlq $7, %xmm0 ## encoding: [0x66,0x0f,0x73,0xd0,0x07]
1428 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1430 ; AVX1-LABEL: test_x86_sse2_psrli_q:
1432 ; AVX1-NEXT: vpsrlq $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xd0,0x07]
1433 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1435 ; AVX512-LABEL: test_x86_sse2_psrli_q:
1437 ; AVX512-NEXT: vpsrlq $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x07]
1438 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1439 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
1442 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
1445 define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
1446 ; SSE-LABEL: test_x86_sse2_psrli_w:
1448 ; SSE-NEXT: psrlw $7, %xmm0 ## encoding: [0x66,0x0f,0x71,0xd0,0x07]
1449 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1451 ; AVX1-LABEL: test_x86_sse2_psrli_w:
1453 ; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x71,0xd0,0x07]
1454 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1456 ; AVX512-LABEL: test_x86_sse2_psrli_w:
1458 ; AVX512-NEXT: vpsrlw $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xd0,0x07]
1459 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1460 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
1463 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
1466 define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
1467 ; SSE-LABEL: test_x86_sse2_ucomieq_sd:
1469 ; SSE-NEXT: ucomisd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x2e,0xc1]
1470 ; SSE-NEXT: setnp %al ## encoding: [0x0f,0x9b,0xc0]
1471 ; SSE-NEXT: sete %cl ## encoding: [0x0f,0x94,0xc1]
1472 ; SSE-NEXT: andb %al, %cl ## encoding: [0x20,0xc1]
1473 ; SSE-NEXT: movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
1474 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1476 ; AVX1-LABEL: test_x86_sse2_ucomieq_sd:
1478 ; AVX1-NEXT: vucomisd %xmm1, %xmm0 ## encoding: [0xc5,0xf9,0x2e,0xc1]
1479 ; AVX1-NEXT: setnp %al ## encoding: [0x0f,0x9b,0xc0]
1480 ; AVX1-NEXT: sete %cl ## encoding: [0x0f,0x94,0xc1]
1481 ; AVX1-NEXT: andb %al, %cl ## encoding: [0x20,0xc1]
1482 ; AVX1-NEXT: movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
1483 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1485 ; AVX512-LABEL: test_x86_sse2_ucomieq_sd:
1487 ; AVX512-NEXT: vucomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
1488 ; AVX512-NEXT: setnp %al ## encoding: [0x0f,0x9b,0xc0]
1489 ; AVX512-NEXT: sete %cl ## encoding: [0x0f,0x94,0xc1]
1490 ; AVX512-NEXT: andb %al, %cl ## encoding: [0x20,0xc1]
1491 ; AVX512-NEXT: movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
1492 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1493 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1496 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
1499 define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
1500 ; SSE-LABEL: test_x86_sse2_ucomige_sd:
1502 ; SSE-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
1503 ; SSE-NEXT: ucomisd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x2e,0xc1]
1504 ; SSE-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0]
1505 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1507 ; AVX1-LABEL: test_x86_sse2_ucomige_sd:
1509 ; AVX1-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
1510 ; AVX1-NEXT: vucomisd %xmm1, %xmm0 ## encoding: [0xc5,0xf9,0x2e,0xc1]
1511 ; AVX1-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0]
1512 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1514 ; AVX512-LABEL: test_x86_sse2_ucomige_sd:
1516 ; AVX512-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
1517 ; AVX512-NEXT: vucomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
1518 ; AVX512-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0]
1519 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1520 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1523 declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
1526 define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
1527 ; SSE-LABEL: test_x86_sse2_ucomigt_sd:
1529 ; SSE-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
1530 ; SSE-NEXT: ucomisd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x2e,0xc1]
1531 ; SSE-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0]
1532 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1534 ; AVX1-LABEL: test_x86_sse2_ucomigt_sd:
1536 ; AVX1-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
1537 ; AVX1-NEXT: vucomisd %xmm1, %xmm0 ## encoding: [0xc5,0xf9,0x2e,0xc1]
1538 ; AVX1-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0]
1539 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1541 ; AVX512-LABEL: test_x86_sse2_ucomigt_sd:
1543 ; AVX512-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
1544 ; AVX512-NEXT: vucomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
1545 ; AVX512-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0]
1546 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1547 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1550 declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
1553 define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
1554 ; SSE-LABEL: test_x86_sse2_ucomile_sd:
1556 ; SSE-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
1557 ; SSE-NEXT: ucomisd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x2e,0xc8]
1558 ; SSE-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0]
1559 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1561 ; AVX1-LABEL: test_x86_sse2_ucomile_sd:
1563 ; AVX1-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
1564 ; AVX1-NEXT: vucomisd %xmm0, %xmm1 ## encoding: [0xc5,0xf9,0x2e,0xc8]
1565 ; AVX1-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0]
1566 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1568 ; AVX512-LABEL: test_x86_sse2_ucomile_sd:
1570 ; AVX512-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
1571 ; AVX512-NEXT: vucomisd %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8]
1572 ; AVX512-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0]
1573 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1574 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1577 declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
1580 define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
1581 ; SSE-LABEL: test_x86_sse2_ucomilt_sd:
1583 ; SSE-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
1584 ; SSE-NEXT: ucomisd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x2e,0xc8]
1585 ; SSE-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0]
1586 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1588 ; AVX1-LABEL: test_x86_sse2_ucomilt_sd:
1590 ; AVX1-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
1591 ; AVX1-NEXT: vucomisd %xmm0, %xmm1 ## encoding: [0xc5,0xf9,0x2e,0xc8]
1592 ; AVX1-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0]
1593 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1595 ; AVX512-LABEL: test_x86_sse2_ucomilt_sd:
1597 ; AVX512-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
1598 ; AVX512-NEXT: vucomisd %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8]
1599 ; AVX512-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0]
1600 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1601 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1604 declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
1607 define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
1608 ; SSE-LABEL: test_x86_sse2_ucomineq_sd:
1610 ; SSE-NEXT: ucomisd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x2e,0xc1]
1611 ; SSE-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0]
1612 ; SSE-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1]
1613 ; SSE-NEXT: orb %al, %cl ## encoding: [0x08,0xc1]
1614 ; SSE-NEXT: movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
1615 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1617 ; AVX1-LABEL: test_x86_sse2_ucomineq_sd:
1619 ; AVX1-NEXT: vucomisd %xmm1, %xmm0 ## encoding: [0xc5,0xf9,0x2e,0xc1]
1620 ; AVX1-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0]
1621 ; AVX1-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1]
1622 ; AVX1-NEXT: orb %al, %cl ## encoding: [0x08,0xc1]
1623 ; AVX1-NEXT: movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
1624 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1626 ; AVX512-LABEL: test_x86_sse2_ucomineq_sd:
1628 ; AVX512-NEXT: vucomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
1629 ; AVX512-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0]
1630 ; AVX512-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1]
1631 ; AVX512-NEXT: orb %al, %cl ## encoding: [0x08,0xc1]
1632 ; AVX512-NEXT: movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
1633 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1634 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1637 declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
1639 define void @test_x86_sse2_pause() {
1640 ; CHECK-LABEL: test_x86_sse2_pause:
1642 ; CHECK-NEXT: pause ## encoding: [0xf3,0x90]
1643 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1644 tail call void @llvm.x86.sse2.pause()
1647 declare void @llvm.x86.sse2.pause() nounwind
1649 define void @lfence() nounwind {
1650 ; CHECK-LABEL: lfence:
1652 ; CHECK-NEXT: lfence ## encoding: [0x0f,0xae,0xe8]
1653 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1654 tail call void @llvm.x86.sse2.lfence()
1657 declare void @llvm.x86.sse2.lfence() nounwind
1659 define void @mfence() nounwind {
1660 ; CHECK-LABEL: mfence:
1662 ; CHECK-NEXT: mfence ## encoding: [0x0f,0xae,0xf0]
1663 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1664 tail call void @llvm.x86.sse2.mfence()
1667 declare void @llvm.x86.sse2.mfence() nounwind
1669 define void @clflush(ptr %p) nounwind {
1670 ; X86-LABEL: clflush:
1672 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1673 ; X86-NEXT: clflush (%eax) ## encoding: [0x0f,0xae,0x38]
1674 ; X86-NEXT: retl ## encoding: [0xc3]
1676 ; X64-LABEL: clflush:
1678 ; X64-NEXT: clflush (%rdi) ## encoding: [0x0f,0xae,0x3f]
1679 ; X64-NEXT: retq ## encoding: [0xc3]
1680 tail call void @llvm.x86.sse2.clflush(ptr %p)
1683 declare void @llvm.x86.sse2.clflush(ptr) nounwind