1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,AVX,X86-AVX
3 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,AVX512VL,X86-AVX512VL
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,AVX,X64-AVX
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,AVX512VL,X64-AVX512VL
7 ; We don't check any vinsertf128 variant with immediate 0 because that's just a blend.
9 define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
10 ; AVX-LABEL: test_x86_avx_sqrt_pd_256:
12 ; AVX-NEXT: vsqrtpd %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x51,0xc0]
13 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
15 ; AVX512VL-LABEL: test_x86_avx_sqrt_pd_256:
17 ; AVX512VL-NEXT: vsqrtpd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x51,0xc0]
18 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
19 %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
22 declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
24 define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
25 ; AVX-LABEL: test_x86_avx_sqrt_ps_256:
27 ; AVX-NEXT: vsqrtps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x51,0xc0]
28 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
30 ; AVX512VL-LABEL: test_x86_avx_sqrt_ps_256:
32 ; AVX512VL-NEXT: vsqrtps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x51,0xc0]
33 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
34 %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
37 declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
39 define <4 x double> @test_x86_avx_vinsertf128_pd_256_1(<4 x double> %a0, <2 x double> %a1) {
40 ; AVX-LABEL: test_x86_avx_vinsertf128_pd_256_1:
42 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
43 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
45 ; AVX512VL-LABEL: test_x86_avx_vinsertf128_pd_256_1:
47 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
48 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
49 %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 1)
52 declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
54 define <8 x float> @test_x86_avx_vinsertf128_ps_256_1(<8 x float> %a0, <4 x float> %a1) {
55 ; AVX-LABEL: test_x86_avx_vinsertf128_ps_256_1:
57 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
58 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
60 ; AVX512VL-LABEL: test_x86_avx_vinsertf128_ps_256_1:
62 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
63 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
64 %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 1)
67 declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
69 define <8 x i32> @test_x86_avx_vinsertf128_si_256_1(<8 x i32> %a0, <4 x i32> %a1) {
70 ; AVX-LABEL: test_x86_avx_vinsertf128_si_256_1:
72 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
73 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
75 ; AVX512VL-LABEL: test_x86_avx_vinsertf128_si_256_1:
77 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
78 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
79 %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 1)
83 ; Verify that high bits of the immediate are masked off. This should be the equivalent
84 ; of a vinsertf128 $0 which should be optimized into a blend, so just check that it's
85 ; not a vinsertf128 $1.
86 define <8 x i32> @test_x86_avx_vinsertf128_si_256_2(<8 x i32> %a0, <4 x i32> %a1) {
87 ; CHECK-LABEL: test_x86_avx_vinsertf128_si_256_2:
89 ; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
90 ; CHECK-NEXT: vblendps $15, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0x0f]
91 ; CHECK-NEXT: # ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
92 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
93 %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 2)
96 declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
98 ; We don't check any vextractf128 variant with immediate 0 because that's just a move.
100 define <2 x double> @test_x86_avx_vextractf128_pd_256_1(<4 x double> %a0) {
101 ; AVX-LABEL: test_x86_avx_vextractf128_pd_256_1:
103 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
104 ; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
105 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
107 ; AVX512VL-LABEL: test_x86_avx_vextractf128_pd_256_1:
109 ; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
110 ; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
111 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
112 %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 1)
113 ret <2 x double> %res
115 declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
117 define <4 x float> @test_x86_avx_vextractf128_ps_256_1(<8 x float> %a0) {
118 ; AVX-LABEL: test_x86_avx_vextractf128_ps_256_1:
120 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
121 ; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
122 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
124 ; AVX512VL-LABEL: test_x86_avx_vextractf128_ps_256_1:
126 ; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
127 ; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
128 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
129 %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 1)
132 declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
134 define <4 x i32> @test_x86_avx_vextractf128_si_256_1(<8 x i32> %a0) {
135 ; AVX-LABEL: test_x86_avx_vextractf128_si_256_1:
137 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
138 ; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
139 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
141 ; AVX512VL-LABEL: test_x86_avx_vextractf128_si_256_1:
143 ; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
144 ; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
145 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
146 %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 1)
149 declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone
151 ; Verify that high bits of the immediate are masked off. This should be the equivalent
152 ; of a vextractf128 $0 which should be optimized away, so just check that it's
153 ; not a vextractf128 of any kind.
154 define <2 x double> @test_x86_avx_extractf128_pd_256_2(<4 x double> %a0) {
155 ; CHECK-LABEL: test_x86_avx_extractf128_pd_256_2:
157 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
158 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
159 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
160 %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 2)
161 ret <2 x double> %res
165 define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
166 ; X86-AVX-LABEL: test_x86_avx_vbroadcastf128_pd_256:
168 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
169 ; X86-AVX-NEXT: vbroadcastf128 (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x00]
170 ; X86-AVX-NEXT: # ymm0 = mem[0,1,0,1]
171 ; X86-AVX-NEXT: retl # encoding: [0xc3]
173 ; X86-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_pd_256:
174 ; X86-AVX512VL: # %bb.0:
175 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
176 ; X86-AVX512VL-NEXT: vbroadcastf128 (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x00]
177 ; X86-AVX512VL-NEXT: # ymm0 = mem[0,1,0,1]
178 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
180 ; X64-AVX-LABEL: test_x86_avx_vbroadcastf128_pd_256:
182 ; X64-AVX-NEXT: vbroadcastf128 (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x07]
183 ; X64-AVX-NEXT: # ymm0 = mem[0,1,0,1]
184 ; X64-AVX-NEXT: retq # encoding: [0xc3]
186 ; X64-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_pd_256:
187 ; X64-AVX512VL: # %bb.0:
188 ; X64-AVX512VL-NEXT: vbroadcastf128 (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x07]
189 ; X64-AVX512VL-NEXT: # ymm0 = mem[0,1,0,1]
190 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
191 %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
192 ret <4 x double> %res
194 declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly
197 define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
198 ; X86-AVX-LABEL: test_x86_avx_vbroadcastf128_ps_256:
200 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
201 ; X86-AVX-NEXT: vbroadcastf128 (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x00]
202 ; X86-AVX-NEXT: # ymm0 = mem[0,1,0,1]
203 ; X86-AVX-NEXT: retl # encoding: [0xc3]
205 ; X86-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_ps_256:
206 ; X86-AVX512VL: # %bb.0:
207 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
208 ; X86-AVX512VL-NEXT: vbroadcastf128 (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x00]
209 ; X86-AVX512VL-NEXT: # ymm0 = mem[0,1,0,1]
210 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
212 ; X64-AVX-LABEL: test_x86_avx_vbroadcastf128_ps_256:
214 ; X64-AVX-NEXT: vbroadcastf128 (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x07]
215 ; X64-AVX-NEXT: # ymm0 = mem[0,1,0,1]
216 ; X64-AVX-NEXT: retq # encoding: [0xc3]
218 ; X64-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_ps_256:
219 ; X64-AVX512VL: # %bb.0:
220 ; X64-AVX512VL-NEXT: vbroadcastf128 (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x07]
221 ; X64-AVX512VL-NEXT: # ymm0 = mem[0,1,0,1]
222 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
223 %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
226 declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
229 define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
230 ; CHECK-LABEL: test_x86_avx_blend_pd_256:
232 ; CHECK-NEXT: vblendps $192, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x0c,0xc0,0xc0]
233 ; CHECK-NEXT: # ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
234 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
235 %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
236 ret <4 x double> %res
238 declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone
241 define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
242 ; CHECK-LABEL: test_x86_avx_blend_ps_256:
244 ; CHECK-NEXT: vblendps $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0x07]
245 ; CHECK-NEXT: # ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
246 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
247 %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
250 declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
253 define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
254 ; CHECK-LABEL: test_x86_avx_dp_ps_256:
256 ; CHECK-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x40,0xc1,0x07]
257 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
258 %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
261 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
264 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
265 ; AVX-LABEL: test_x86_sse2_psll_dq:
267 ; AVX-NEXT: vpslldq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf8,0x01]
268 ; AVX-NEXT: # xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
269 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
271 ; AVX512VL-LABEL: test_x86_sse2_psll_dq:
273 ; AVX512VL-NEXT: vpslldq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x01]
274 ; AVX512VL-NEXT: # xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
275 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
276 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
279 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
282 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
283 ; AVX-LABEL: test_x86_sse2_psrl_dq:
285 ; AVX-NEXT: vpsrldq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd8,0x01]
286 ; AVX-NEXT: # xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
287 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
289 ; AVX512VL-LABEL: test_x86_sse2_psrl_dq:
291 ; AVX512VL-NEXT: vpsrldq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x01]
292 ; AVX512VL-NEXT: # xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
293 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
294 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
297 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
300 define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
301 ; CHECK-LABEL: test_x86_sse41_blendpd:
303 ; CHECK-NEXT: vblendps $3, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x03]
304 ; CHECK-NEXT: # xmm0 = xmm0[0,1],xmm1[2,3]
305 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
306 %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 2) ; <<2 x double>> [#uses=1]
307 ret <2 x double> %res
309 declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone
312 define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
313 ; CHECK-LABEL: test_x86_sse41_blendps:
315 ; CHECK-NEXT: vblendps $8, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x08]
316 ; CHECK-NEXT: # xmm0 = xmm1[0,1,2],xmm0[3]
317 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
318 %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
321 declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
324 define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
325 ; CHECK-LABEL: test_x86_sse41_pblendw:
327 ; CHECK-NEXT: vpblendw $7, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x07]
328 ; CHECK-NEXT: # xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7]
329 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
330 %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1]
333 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
336 define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
337 ; AVX-LABEL: test_x86_sse41_pmovsxbd:
339 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x21,0xc0]
340 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
342 ; AVX512VL-LABEL: test_x86_sse41_pmovsxbd:
344 ; AVX512VL-NEXT: vpmovsxbd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0xc0]
345 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
346 %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
349 declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
352 define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
353 ; AVX-LABEL: test_x86_sse41_pmovsxbq:
355 ; AVX-NEXT: vpmovsxbq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x22,0xc0]
356 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
358 ; AVX512VL-LABEL: test_x86_sse41_pmovsxbq:
360 ; AVX512VL-NEXT: vpmovsxbq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0xc0]
361 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
362 %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
365 declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
368 define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
369 ; AVX-LABEL: test_x86_sse41_pmovsxbw:
371 ; AVX-NEXT: vpmovsxbw %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x20,0xc0]
372 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
374 ; AVX512VL-LABEL: test_x86_sse41_pmovsxbw:
376 ; AVX512VL-NEXT: vpmovsxbw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0xc0]
377 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
378 %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
381 declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
384 define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
385 ; AVX-LABEL: test_x86_sse41_pmovsxdq:
387 ; AVX-NEXT: vpmovsxdq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x25,0xc0]
388 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
390 ; AVX512VL-LABEL: test_x86_sse41_pmovsxdq:
392 ; AVX512VL-NEXT: vpmovsxdq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x25,0xc0]
393 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
394 %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
397 declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
400 define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
401 ; AVX-LABEL: test_x86_sse41_pmovsxwd:
403 ; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x23,0xc0]
404 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
406 ; AVX512VL-LABEL: test_x86_sse41_pmovsxwd:
408 ; AVX512VL-NEXT: vpmovsxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x23,0xc0]
409 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
410 %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
413 declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
416 define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
417 ; AVX-LABEL: test_x86_sse41_pmovsxwq:
419 ; AVX-NEXT: vpmovsxwq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x24,0xc0]
420 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
422 ; AVX512VL-LABEL: test_x86_sse41_pmovsxwq:
424 ; AVX512VL-NEXT: vpmovsxwq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x24,0xc0]
425 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
426 %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
429 declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
432 define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
433 ; AVX-LABEL: test_x86_sse41_pmovzxbd:
435 ; AVX-NEXT: vpmovzxbd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x31,0xc0]
436 ; AVX-NEXT: # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
437 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
439 ; AVX512VL-LABEL: test_x86_sse41_pmovzxbd:
441 ; AVX512VL-NEXT: vpmovzxbd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x31,0xc0]
442 ; AVX512VL-NEXT: # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
443 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
444 %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
447 declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
450 define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
451 ; AVX-LABEL: test_x86_sse41_pmovzxbq:
453 ; AVX-NEXT: vpmovzxbq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x32,0xc0]
454 ; AVX-NEXT: # xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
455 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
457 ; AVX512VL-LABEL: test_x86_sse41_pmovzxbq:
459 ; AVX512VL-NEXT: vpmovzxbq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0xc0]
460 ; AVX512VL-NEXT: # xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
461 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
462 %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
465 declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
468 define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
469 ; AVX-LABEL: test_x86_sse41_pmovzxbw:
471 ; AVX-NEXT: vpmovzxbw %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x30,0xc0]
472 ; AVX-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
473 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
475 ; AVX512VL-LABEL: test_x86_sse41_pmovzxbw:
477 ; AVX512VL-NEXT: vpmovzxbw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x30,0xc0]
478 ; AVX512VL-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
479 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
480 %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
483 declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
486 define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
487 ; AVX-LABEL: test_x86_sse41_pmovzxdq:
489 ; AVX-NEXT: vpmovzxdq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x35,0xc0]
490 ; AVX-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero
491 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
493 ; AVX512VL-LABEL: test_x86_sse41_pmovzxdq:
495 ; AVX512VL-NEXT: vpmovzxdq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x35,0xc0]
496 ; AVX512VL-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero
497 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
498 %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
501 declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
504 define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
505 ; AVX-LABEL: test_x86_sse41_pmovzxwd:
507 ; AVX-NEXT: vpmovzxwd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x33,0xc0]
508 ; AVX-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
509 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
511 ; AVX512VL-LABEL: test_x86_sse41_pmovzxwd:
513 ; AVX512VL-NEXT: vpmovzxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xc0]
514 ; AVX512VL-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
515 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
516 %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
519 declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
522 define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
523 ; AVX-LABEL: test_x86_sse41_pmovzxwq:
525 ; AVX-NEXT: vpmovzxwq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x34,0xc0]
526 ; AVX-NEXT: # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
527 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
529 ; AVX512VL-LABEL: test_x86_sse41_pmovzxwq:
531 ; AVX512VL-NEXT: vpmovzxwq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x34,0xc0]
532 ; AVX512VL-NEXT: # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
533 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
534 %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
537 declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
540 define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
541 ; AVX-LABEL: test_x86_sse2_cvtdq2pd:
543 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xe6,0xc0]
544 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
546 ; AVX512VL-LABEL: test_x86_sse2_cvtdq2pd:
548 ; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xc0]
549 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
550 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
551 ret <2 x double> %res
553 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
556 define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
557 ; AVX-LABEL: test_x86_avx_cvtdq2_pd_256:
559 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 # encoding: [0xc5,0xfe,0xe6,0xc0]
560 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
562 ; AVX512VL-LABEL: test_x86_avx_cvtdq2_pd_256:
564 ; AVX512VL-NEXT: vcvtdq2pd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0xe6,0xc0]
565 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
566 %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
567 ret <4 x double> %res
569 declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
572 define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
573 ; AVX-LABEL: test_x86_sse2_cvtps2pd:
575 ; AVX-NEXT: vcvtps2pd %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5a,0xc0]
576 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
578 ; AVX512VL-LABEL: test_x86_sse2_cvtps2pd:
580 ; AVX512VL-NEXT: vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0]
581 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
582 %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
583 ret <2 x double> %res
585 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
588 define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
589 ; AVX-LABEL: test_x86_avx_cvt_ps2_pd_256:
591 ; AVX-NEXT: vcvtps2pd %xmm0, %ymm0 # encoding: [0xc5,0xfc,0x5a,0xc0]
592 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
594 ; AVX512VL-LABEL: test_x86_avx_cvt_ps2_pd_256:
596 ; AVX512VL-NEXT: vcvtps2pd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0xc0]
597 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
598 %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
599 ret <4 x double> %res
601 declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
604 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
605 ; add operation forces the execution domain.
606 ; X86-AVX-LABEL: test_x86_sse2_storeu_dq:
608 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
609 ; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
610 ; X86-AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1]
611 ; X86-AVX-NEXT: vmovdqu %xmm0, (%eax) # encoding: [0xc5,0xfa,0x7f,0x00]
612 ; X86-AVX-NEXT: retl # encoding: [0xc3]
614 ; X86-AVX512VL-LABEL: test_x86_sse2_storeu_dq:
615 ; X86-AVX512VL: # %bb.0:
616 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
617 ; X86-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
618 ; X86-AVX512VL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1]
619 ; X86-AVX512VL-NEXT: vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00]
620 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
622 ; X64-AVX-LABEL: test_x86_sse2_storeu_dq:
624 ; X64-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
625 ; X64-AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1]
626 ; X64-AVX-NEXT: vmovdqu %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x7f,0x07]
627 ; X64-AVX-NEXT: retq # encoding: [0xc3]
629 ; X64-AVX512VL-LABEL: test_x86_sse2_storeu_dq:
630 ; X64-AVX512VL: # %bb.0:
631 ; X64-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
632 ; X64-AVX512VL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1]
633 ; X64-AVX512VL-NEXT: vmovdqu %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x07]
634 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
635 %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
636 call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
639 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
642 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
643 ; fadd operation forces the execution domain.
644 ; X86-AVX-LABEL: test_x86_sse2_storeu_pd:
646 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
647 ; X86-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9]
648 ; X86-AVX-NEXT: vmovhpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
649 ; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
650 ; X86-AVX-NEXT: # xmm1 = xmm1[0],mem[0]
651 ; X86-AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1]
652 ; X86-AVX-NEXT: vmovupd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x11,0x00]
653 ; X86-AVX-NEXT: retl # encoding: [0xc3]
655 ; X86-AVX512VL-LABEL: test_x86_sse2_storeu_pd:
656 ; X86-AVX512VL: # %bb.0:
657 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
658 ; X86-AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9]
659 ; X86-AVX512VL-NEXT: vmovhpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
660 ; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
661 ; X86-AVX512VL-NEXT: # xmm1 = xmm1[0],mem[0]
662 ; X86-AVX512VL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
663 ; X86-AVX512VL-NEXT: vmovupd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x00]
664 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
666 ; X64-AVX-LABEL: test_x86_sse2_storeu_pd:
668 ; X64-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9]
669 ; X64-AVX-NEXT: vmovhpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
670 ; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
671 ; X64-AVX-NEXT: # xmm1 = xmm1[0],mem[0]
672 ; X64-AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1]
673 ; X64-AVX-NEXT: vmovupd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x11,0x07]
674 ; X64-AVX-NEXT: retq # encoding: [0xc3]
676 ; X64-AVX512VL-LABEL: test_x86_sse2_storeu_pd:
677 ; X64-AVX512VL: # %bb.0:
678 ; X64-AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9]
679 ; X64-AVX512VL-NEXT: vmovhpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
680 ; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
681 ; X64-AVX512VL-NEXT: # xmm1 = xmm1[0],mem[0]
682 ; X64-AVX512VL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
683 ; X64-AVX512VL-NEXT: vmovupd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x07]
684 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
685 %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
686 call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
689 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
692 define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
693 ; X86-AVX-LABEL: test_x86_sse_storeu_ps:
695 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
696 ; X86-AVX-NEXT: vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00]
697 ; X86-AVX-NEXT: retl # encoding: [0xc3]
699 ; X86-AVX512VL-LABEL: test_x86_sse_storeu_ps:
700 ; X86-AVX512VL: # %bb.0:
701 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
702 ; X86-AVX512VL-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
703 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
705 ; X64-AVX-LABEL: test_x86_sse_storeu_ps:
707 ; X64-AVX-NEXT: vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07]
708 ; X64-AVX-NEXT: retq # encoding: [0xc3]
710 ; X64-AVX512VL-LABEL: test_x86_sse_storeu_ps:
711 ; X64-AVX512VL: # %bb.0:
712 ; X64-AVX512VL-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
713 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
714 call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
717 declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
720 define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
721 ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions
722 ; add operation forces the execution domain.
723 ; X86-AVX-LABEL: test_x86_avx_storeu_dq_256:
725 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
726 ; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
727 ; X86-AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm2 # encoding: [0xc5,0xf9,0xf8,0xd1]
728 ; X86-AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
729 ; X86-AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1]
730 ; X86-AVX-NEXT: vmovdqu %xmm0, 16(%eax) # encoding: [0xc5,0xfa,0x7f,0x40,0x10]
731 ; X86-AVX-NEXT: vmovdqu %xmm2, (%eax) # encoding: [0xc5,0xfa,0x7f,0x10]
732 ; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
733 ; X86-AVX-NEXT: retl # encoding: [0xc3]
735 ; X86-AVX512VL-LABEL: test_x86_avx_storeu_dq_256:
736 ; X86-AVX512VL: # %bb.0:
737 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
738 ; X86-AVX512VL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x76,0xc9]
739 ; X86-AVX512VL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf8,0xc1]
740 ; X86-AVX512VL-NEXT: vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00]
741 ; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
742 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
744 ; X64-AVX-LABEL: test_x86_avx_storeu_dq_256:
746 ; X64-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
747 ; X64-AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm2 # encoding: [0xc5,0xf9,0xf8,0xd1]
748 ; X64-AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
749 ; X64-AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1]
750 ; X64-AVX-NEXT: vmovdqu %xmm0, 16(%rdi) # encoding: [0xc5,0xfa,0x7f,0x47,0x10]
751 ; X64-AVX-NEXT: vmovdqu %xmm2, (%rdi) # encoding: [0xc5,0xfa,0x7f,0x17]
752 ; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
753 ; X64-AVX-NEXT: retq # encoding: [0xc3]
755 ; X64-AVX512VL-LABEL: test_x86_avx_storeu_dq_256:
756 ; X64-AVX512VL: # %bb.0:
757 ; X64-AVX512VL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x76,0xc9]
758 ; X64-AVX512VL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf8,0xc1]
759 ; X64-AVX512VL-NEXT: vmovdqu %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x07]
760 ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
761 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
762 %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
763 call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
766 declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
769 define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
770 ; add operation forces the execution domain.
771 ; X86-AVX-LABEL: test_x86_avx_storeu_pd_256:
773 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
774 ; X86-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9]
775 ; X86-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1]
776 ; X86-AVX-NEXT: vmovupd %ymm0, (%eax) # encoding: [0xc5,0xfd,0x11,0x00]
777 ; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
778 ; X86-AVX-NEXT: retl # encoding: [0xc3]
780 ; X86-AVX512VL-LABEL: test_x86_avx_storeu_pd_256:
781 ; X86-AVX512VL: # %bb.0:
782 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
783 ; X86-AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9]
784 ; X86-AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
785 ; X86-AVX512VL-NEXT: vmovupd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x00]
786 ; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
787 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
789 ; X64-AVX-LABEL: test_x86_avx_storeu_pd_256:
791 ; X64-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9]
792 ; X64-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1]
793 ; X64-AVX-NEXT: vmovupd %ymm0, (%rdi) # encoding: [0xc5,0xfd,0x11,0x07]
794 ; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
795 ; X64-AVX-NEXT: retq # encoding: [0xc3]
797 ; X64-AVX512VL-LABEL: test_x86_avx_storeu_pd_256:
798 ; X64-AVX512VL: # %bb.0:
799 ; X64-AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9]
800 ; X64-AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
801 ; X64-AVX512VL-NEXT: vmovupd %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x07]
802 ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
803 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
804 %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
805 call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)
808 declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
811 define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
812 ; X86-AVX-LABEL: test_x86_avx_storeu_ps_256:
814 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
815 ; X86-AVX-NEXT: vmovups %ymm0, (%eax) # encoding: [0xc5,0xfc,0x11,0x00]
816 ; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
817 ; X86-AVX-NEXT: retl # encoding: [0xc3]
819 ; X86-AVX512VL-LABEL: test_x86_avx_storeu_ps_256:
820 ; X86-AVX512VL: # %bb.0:
821 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
822 ; X86-AVX512VL-NEXT: vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00]
823 ; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
824 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
826 ; X64-AVX-LABEL: test_x86_avx_storeu_ps_256:
828 ; X64-AVX-NEXT: vmovups %ymm0, (%rdi) # encoding: [0xc5,0xfc,0x11,0x07]
829 ; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
830 ; X64-AVX-NEXT: retq # encoding: [0xc3]
832 ; X64-AVX512VL-LABEL: test_x86_avx_storeu_ps_256:
833 ; X64-AVX512VL: # %bb.0:
834 ; X64-AVX512VL-NEXT: vmovups %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
835 ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
836 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
837 call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
840 declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
843 define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
844 ; AVX-LABEL: test_x86_avx_vpermil_pd:
846 ; AVX-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
847 ; AVX-NEXT: # xmm0 = xmm0[1,0]
848 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
850 ; AVX512VL-LABEL: test_x86_avx_vpermil_pd:
852 ; AVX512VL-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
853 ; AVX512VL-NEXT: # xmm0 = xmm0[1,0]
854 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
855 %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 1) ; <<2 x double>> [#uses=1]
856 ret <2 x double> %res
858 declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone
861 define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) {
862 ; AVX-LABEL: test_x86_avx_vpermil_pd_256:
864 ; AVX-NEXT: vpermilpd $7, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x07]
865 ; AVX-NEXT: # ymm0 = ymm0[1,1,3,2]
866 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
868 ; AVX512VL-LABEL: test_x86_avx_vpermil_pd_256:
870 ; AVX512VL-NEXT: vpermilpd $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x07]
871 ; AVX512VL-NEXT: # ymm0 = ymm0[1,1,3,2]
872 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
873 %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
874 ret <4 x double> %res
876 declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone
879 define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
880 ; AVX-LABEL: test_x86_avx_vpermil_ps:
882 ; AVX-NEXT: vpermilps $7, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x07]
883 ; AVX-NEXT: # xmm0 = xmm0[3,1,0,0]
884 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
886 ; AVX512VL-LABEL: test_x86_avx_vpermil_ps:
888 ; AVX512VL-NEXT: vpermilps $7, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x07]
889 ; AVX512VL-NEXT: # xmm0 = xmm0[3,1,0,0]
890 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
891 %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
894 declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone
897 define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) {
898 ; AVX-LABEL: test_x86_avx_vpermil_ps_256:
900 ; AVX-NEXT: vpermilps $7, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x04,0xc0,0x07]
901 ; AVX-NEXT: # ymm0 = ymm0[3,1,0,0,7,5,4,4]
902 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
904 ; AVX512VL-LABEL: test_x86_avx_vpermil_ps_256:
906 ; AVX512VL-NEXT: vpermilps $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x04,0xc0,0x07]
907 ; AVX512VL-NEXT: # ymm0 = ymm0[3,1,0,0,7,5,4,4]
908 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
909 %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1]
912 declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone
915 define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) {
916 ; AVX-LABEL: test_x86_avx_vperm2f128_pd_256:
918 ; AVX-NEXT: vperm2f128 $33, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21]
919 ; AVX-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1]
920 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
922 ; AVX512VL-LABEL: test_x86_avx_vperm2f128_pd_256:
924 ; AVX512VL-NEXT: vperm2f128 $33, %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21]
925 ; AVX512VL-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1]
926 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
927 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 3) ; <<4 x double>> [#uses=1]
928 ret <4 x double> %res
930 declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
933 define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) {
934 ; AVX-LABEL: test_x86_avx_vperm2f128_ps_256:
936 ; AVX-NEXT: vperm2f128 $33, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21]
937 ; AVX-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1]
938 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
940 ; AVX512VL-LABEL: test_x86_avx_vperm2f128_ps_256:
942 ; AVX512VL-NEXT: vperm2f128 $33, %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21]
943 ; AVX512VL-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1]
944 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
945 %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 3) ; <<8 x float>> [#uses=1]
948 declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
951 define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) {
952 ; AVX-LABEL: test_x86_avx_vperm2f128_si_256:
954 ; AVX-NEXT: vperm2f128 $33, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21]
955 ; AVX-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1]
956 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
958 ; AVX512VL-LABEL: test_x86_avx_vperm2f128_si_256:
960 ; AVX512VL-NEXT: vperm2i128 $33, %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x46,0xc0,0x21]
961 ; AVX512VL-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1]
962 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
963 %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 3) ; <<8 x i32>> [#uses=1]
966 declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
969 define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
970 ; AVX-LABEL: test_x86_avx_cvtdq2_ps_256:
972 ; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x5b,0xc0]
973 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
975 ; AVX512VL-LABEL: test_x86_avx_cvtdq2_ps_256:
977 ; AVX512VL-NEXT: vcvtdq2ps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5b,0xc0]
978 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
979 %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1]
982 declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone