1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+pclmul,+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX
3 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+pclmul,+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX512VL,X86-AVX512VL
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+pclmul,+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+pclmul,+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX512VL,X64-AVX512VL
7 define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
8 ; CHECK-LABEL: test_x86_avx_addsub_pd_256:
10 ; CHECK-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xd0,0xc1]
11 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
12 %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
15 declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
18 define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
19 ; CHECK-LABEL: test_x86_avx_addsub_ps_256:
21 ; CHECK-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xff,0xd0,0xc1]
22 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
23 %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
26 declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
29 define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
30 ; CHECK-LABEL: test_x86_avx_blendv_pd_256:
32 ; CHECK-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x4b,0xc1,0x20]
33 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
34 %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1]
37 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
40 define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
41 ; CHECK-LABEL: test_x86_avx_blendv_ps_256:
43 ; CHECK-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x4a,0xc1,0x20]
44 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
45 %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1]
48 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
51 define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) {
52 ; CHECK-LABEL: test_x86_avx_cmp_pd_256:
54 ; CHECK-NEXT: vcmpordpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xc2,0xc1,0x07]
55 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
56 %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
59 declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
62 define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
63 ; CHECK-LABEL: test_x86_avx_cmp_ps_256:
65 ; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0xc2,0xc1,0x07]
66 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
67 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
71 define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) {
72 ; CHECK-LABEL: test_x86_avx_cmp_ps_256_pseudo_op:
74 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x00]
75 ; CHECK-NEXT: vcmpltps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x01]
76 ; CHECK-NEXT: vcmpleps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x02]
77 ; CHECK-NEXT: vcmpunordps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x03]
78 ; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x04]
79 ; CHECK-NEXT: vcmpnltps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x05]
80 ; CHECK-NEXT: vcmpnleps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x06]
81 ; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x07]
82 ; CHECK-NEXT: vcmpeq_uqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x08]
83 ; CHECK-NEXT: vcmpngeps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x09]
84 ; CHECK-NEXT: vcmpngtps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x0a]
85 ; CHECK-NEXT: vcmpfalseps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x0b]
86 ; CHECK-NEXT: vcmpneq_oqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x0c]
87 ; CHECK-NEXT: vcmpgeps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x0d]
88 ; CHECK-NEXT: vcmpgtps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x0e]
89 ; CHECK-NEXT: vcmptrueps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x0f]
90 ; CHECK-NEXT: vcmpeq_osps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x10]
91 ; CHECK-NEXT: vcmplt_oqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x11]
92 ; CHECK-NEXT: vcmple_oqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x12]
93 ; CHECK-NEXT: vcmpunord_sps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x13]
94 ; CHECK-NEXT: vcmpneq_usps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x14]
95 ; CHECK-NEXT: vcmpnlt_uqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x15]
96 ; CHECK-NEXT: vcmpnle_uqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x16]
97 ; CHECK-NEXT: vcmpord_sps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x17]
98 ; CHECK-NEXT: vcmpeq_usps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x18]
99 ; CHECK-NEXT: vcmpnge_uqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x19]
100 ; CHECK-NEXT: vcmpngt_uqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x1a]
101 ; CHECK-NEXT: vcmpfalse_osps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x1b]
102 ; CHECK-NEXT: vcmpneq_osps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x1c]
103 ; CHECK-NEXT: vcmpge_oqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x1d]
104 ; CHECK-NEXT: vcmpgt_oqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x1e]
105 ; CHECK-NEXT: vcmptrue_usps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0xc2,0xc1,0x1f]
106 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
107 %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1]
108 %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1]
109 %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1]
110 %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1]
111 %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1]
112 %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1]
113 %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1]
114 %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1]
115 %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1]
116 %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1]
117 %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1]
118 %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1]
119 %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1]
120 %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1]
121 %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1]
122 %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1]
123 %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1]
124 %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1]
125 %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1]
126 %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1]
127 %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1]
128 %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1]
129 %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1]
130 %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1]
131 %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1]
132 %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1]
133 %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1]
134 %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1]
135 %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1]
136 %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1]
137 %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1]
138 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1]
141 declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
144 define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) {
145 ; AVX-LABEL: test_x86_avx_cvt_pd2_ps_256:
147 ; AVX-NEXT: vcvtpd2ps %ymm0, %xmm0 # encoding: [0xc5,0xfd,0x5a,0xc0]
148 ; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
149 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
151 ; AVX512VL-LABEL: test_x86_avx_cvt_pd2_ps_256:
153 ; AVX512VL-NEXT: vcvtpd2ps %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5a,0xc0]
154 ; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
155 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
156 %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1]
159 declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone
162 define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) {
163 ; AVX-LABEL: test_x86_avx_cvt_pd2dq_256:
165 ; AVX-NEXT: vcvtpd2dq %ymm0, %xmm0 # encoding: [0xc5,0xff,0xe6,0xc0]
166 ; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
167 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
169 ; AVX512VL-LABEL: test_x86_avx_cvt_pd2dq_256:
171 ; AVX512VL-NEXT: vcvtpd2dq %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xff,0xe6,0xc0]
172 ; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
173 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
174 %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
177 declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
180 define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
181 ; AVX-LABEL: test_x86_avx_cvt_ps2dq_256:
183 ; AVX-NEXT: vcvtps2dq %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x5b,0xc0]
184 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
186 ; AVX512VL-LABEL: test_x86_avx_cvt_ps2dq_256:
188 ; AVX512VL-NEXT: vcvtps2dq %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5b,0xc0]
189 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
190 %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
193 declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
196 define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
197 ; AVX-LABEL: test_x86_avx_cvtt_pd2dq_256:
199 ; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0 # encoding: [0xc5,0xfd,0xe6,0xc0]
200 ; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
201 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
203 ; AVX512VL-LABEL: test_x86_avx_cvtt_pd2dq_256:
205 ; AVX512VL-NEXT: vcvttpd2dq %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xc0]
206 ; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
207 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
208 %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
211 declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
214 define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
215 ; AVX-LABEL: test_x86_avx_cvtt_ps2dq_256:
217 ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 # encoding: [0xc5,0xfe,0x5b,0xc0]
218 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
220 ; AVX512VL-LABEL: test_x86_avx_cvtt_ps2dq_256:
222 ; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xc0]
223 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
224 %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
227 declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
230 define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
231 ; CHECK-LABEL: test_x86_avx_dp_ps_256:
233 ; CHECK-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x40,0xc1,0x07]
234 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
235 %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
238 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
241 define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
242 ; CHECK-LABEL: test_x86_avx_hadd_pd_256:
244 ; CHECK-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x7c,0xc1]
245 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
246 %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
247 ret <4 x double> %res
249 declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
252 define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) {
253 ; CHECK-LABEL: test_x86_avx_hadd_ps_256:
255 ; CHECK-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xff,0x7c,0xc1]
256 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
257 %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
260 declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
263 define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
264 ; CHECK-LABEL: test_x86_avx_hsub_pd_256:
266 ; CHECK-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x7d,0xc1]
267 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
268 %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
269 ret <4 x double> %res
271 declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
274 define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
275 ; CHECK-LABEL: test_x86_avx_hsub_ps_256:
277 ; CHECK-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xff,0x7d,0xc1]
278 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
279 %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
282 declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
285 define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
286 ; X86-LABEL: test_x86_avx_ldu_dq_256:
288 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
289 ; X86-NEXT: vlddqu (%eax), %ymm0 # encoding: [0xc5,0xff,0xf0,0x00]
290 ; X86-NEXT: retl # encoding: [0xc3]
292 ; X64-LABEL: test_x86_avx_ldu_dq_256:
294 ; X64-NEXT: vlddqu (%rdi), %ymm0 # encoding: [0xc5,0xff,0xf0,0x07]
295 ; X64-NEXT: retq # encoding: [0xc3]
296 %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
299 declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
302 define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x i64> %mask) {
303 ; X86-LABEL: test_x86_avx_maskload_pd:
305 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
306 ; X86-NEXT: vmaskmovpd (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2d,0x00]
307 ; X86-NEXT: retl # encoding: [0xc3]
309 ; X64-LABEL: test_x86_avx_maskload_pd:
311 ; X64-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2d,0x07]
312 ; X64-NEXT: retq # encoding: [0xc3]
313 %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %mask) ; <<2 x double>> [#uses=1]
314 ret <2 x double> %res
316 declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>) nounwind readonly
319 define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x i64> %mask) {
320 ; X86-LABEL: test_x86_avx_maskload_pd_256:
322 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
323 ; X86-NEXT: vmaskmovpd (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2d,0x00]
324 ; X86-NEXT: retl # encoding: [0xc3]
326 ; X64-LABEL: test_x86_avx_maskload_pd_256:
328 ; X64-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2d,0x07]
329 ; X64-NEXT: retq # encoding: [0xc3]
330 %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %mask) ; <<4 x double>> [#uses=1]
331 ret <4 x double> %res
333 declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>) nounwind readonly
336 define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x i32> %mask) {
337 ; X86-LABEL: test_x86_avx_maskload_ps:
339 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
340 ; X86-NEXT: vmaskmovps (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2c,0x00]
341 ; X86-NEXT: retl # encoding: [0xc3]
343 ; X64-LABEL: test_x86_avx_maskload_ps:
345 ; X64-NEXT: vmaskmovps (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2c,0x07]
346 ; X64-NEXT: retq # encoding: [0xc3]
347 %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %mask) ; <<4 x float>> [#uses=1]
350 declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>) nounwind readonly
353 define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x i32> %mask) {
354 ; X86-LABEL: test_x86_avx_maskload_ps_256:
356 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
357 ; X86-NEXT: vmaskmovps (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2c,0x00]
358 ; X86-NEXT: retl # encoding: [0xc3]
360 ; X64-LABEL: test_x86_avx_maskload_ps_256:
362 ; X64-NEXT: vmaskmovps (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2c,0x07]
363 ; X64-NEXT: retq # encoding: [0xc3]
364 %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %mask) ; <<8 x float>> [#uses=1]
367 declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>) nounwind readonly
370 define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2) {
371 ; X86-LABEL: test_x86_avx_maskstore_pd:
373 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
374 ; X86-NEXT: vmaskmovpd %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0x79,0x2f,0x08]
375 ; X86-NEXT: retl # encoding: [0xc3]
377 ; X64-LABEL: test_x86_avx_maskstore_pd:
379 ; X64-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # encoding: [0xc4,0xe2,0x79,0x2f,0x0f]
380 ; X64-NEXT: retq # encoding: [0xc3]
381 call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2)
384 declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind
387 define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x i64> %mask, <4 x double> %a2) {
388 ; X86-LABEL: test_x86_avx_maskstore_pd_256:
390 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
391 ; X86-NEXT: vmaskmovpd %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0x7d,0x2f,0x08]
392 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
393 ; X86-NEXT: retl # encoding: [0xc3]
395 ; X64-LABEL: test_x86_avx_maskstore_pd_256:
397 ; X64-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # encoding: [0xc4,0xe2,0x7d,0x2f,0x0f]
398 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
399 ; X64-NEXT: retq # encoding: [0xc3]
400 call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %mask, <4 x double> %a2)
403 declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwind
406 define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2) {
407 ; X86-LABEL: test_x86_avx_maskstore_ps:
409 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
410 ; X86-NEXT: vmaskmovps %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0x79,0x2e,0x08]
411 ; X86-NEXT: retl # encoding: [0xc3]
413 ; X64-LABEL: test_x86_avx_maskstore_ps:
415 ; X64-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # encoding: [0xc4,0xe2,0x79,0x2e,0x0f]
416 ; X64-NEXT: retq # encoding: [0xc3]
417 call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2)
420 declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind
423 define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x i32> %mask, <8 x float> %a2) {
424 ; X86-LABEL: test_x86_avx_maskstore_ps_256:
426 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
427 ; X86-NEXT: vmaskmovps %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0x7d,0x2e,0x08]
428 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
429 ; X86-NEXT: retl # encoding: [0xc3]
431 ; X64-LABEL: test_x86_avx_maskstore_ps_256:
433 ; X64-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # encoding: [0xc4,0xe2,0x7d,0x2e,0x0f]
434 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
435 ; X64-NEXT: retq # encoding: [0xc3]
436 call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %mask, <8 x float> %a2)
439 declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind
442 define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) {
443 ; AVX-LABEL: test_x86_avx_max_pd_256:
445 ; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x5f,0xc1]
446 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
448 ; AVX512VL-LABEL: test_x86_avx_max_pd_256:
450 ; AVX512VL-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5f,0xc1]
451 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
452 %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
453 ret <4 x double> %res
455 declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
458 define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) {
459 ; AVX-LABEL: test_x86_avx_max_ps_256:
461 ; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x5f,0xc1]
462 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
464 ; AVX512VL-LABEL: test_x86_avx_max_ps_256:
466 ; AVX512VL-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5f,0xc1]
467 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
468 %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
471 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
474 define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) {
475 ; AVX-LABEL: test_x86_avx_min_pd_256:
477 ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x5d,0xc1]
478 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
480 ; AVX512VL-LABEL: test_x86_avx_min_pd_256:
482 ; AVX512VL-NEXT: vminpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5d,0xc1]
483 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
484 %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
485 ret <4 x double> %res
487 declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
490 define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) {
491 ; AVX-LABEL: test_x86_avx_min_ps_256:
493 ; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x5d,0xc1]
494 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
496 ; AVX512VL-LABEL: test_x86_avx_min_ps_256:
498 ; AVX512VL-NEXT: vminps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5d,0xc1]
499 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
500 %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
503 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
506 define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) {
507 ; CHECK-LABEL: test_x86_avx_movmsk_pd_256:
509 ; CHECK-NEXT: vmovmskpd %ymm0, %eax # encoding: [0xc5,0xfd,0x50,0xc0]
510 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
511 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
512 %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1]
515 declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
518 define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) {
519 ; CHECK-LABEL: test_x86_avx_movmsk_ps_256:
521 ; CHECK-NEXT: vmovmskps %ymm0, %eax # encoding: [0xc5,0xfc,0x50,0xc0]
522 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
523 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
524 %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1]
527 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
530 define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
531 ; CHECK-LABEL: test_x86_avx_ptestc_256:
533 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
534 ; CHECK-NEXT: vptest %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x17,0xc1]
535 ; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
536 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
537 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
538 %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
541 declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
544 define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) {
545 ; CHECK-LABEL: test_x86_avx_ptestnzc_256:
547 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
548 ; CHECK-NEXT: vptest %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x17,0xc1]
549 ; CHECK-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
550 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
551 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
552 %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
555 declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone
558 define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) {
559 ; CHECK-LABEL: test_x86_avx_ptestz_256:
561 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
562 ; CHECK-NEXT: vptest %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x17,0xc1]
563 ; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
564 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
565 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
566 %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
569 declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone
572 define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) {
573 ; CHECK-LABEL: test_x86_avx_rcp_ps_256:
575 ; CHECK-NEXT: vrcpps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x53,0xc0]
576 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
577 %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
580 declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
583 define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) {
584 ; AVX-LABEL: test_x86_avx_round_pd_256:
586 ; AVX-NEXT: vroundpd $7, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x09,0xc0,0x07]
587 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
589 ; AVX512VL-LABEL: test_x86_avx_round_pd_256:
591 ; AVX512VL-NEXT: vroundpd $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x09,0xc0,0x07]
592 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
593 %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1]
594 ret <4 x double> %res
596 declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
599 define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) {
600 ; AVX-LABEL: test_x86_avx_round_ps_256:
602 ; AVX-NEXT: vroundps $7, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x08,0xc0,0x07]
603 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
605 ; AVX512VL-LABEL: test_x86_avx_round_ps_256:
607 ; AVX512VL-NEXT: vroundps $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x08,0xc0,0x07]
608 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
609 %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1]
612 declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
615 define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) {
616 ; CHECK-LABEL: test_x86_avx_rsqrt_ps_256:
618 ; CHECK-NEXT: vrsqrtps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x52,0xc0]
619 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
620 %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
623 declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
625 define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) {
626 ; AVX-LABEL: test_x86_avx_vpermilvar_pd:
628 ; AVX-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0d,0xc1]
629 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
631 ; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd:
633 ; AVX512VL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0d,0xc1]
634 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
635 %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1]
636 ret <2 x double> %res
638 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
641 define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
642 ; AVX-LABEL: test_x86_avx_vpermilvar_pd_256:
644 ; AVX-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0d,0xc1]
645 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
647 ; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256:
649 ; AVX512VL-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0d,0xc1]
650 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
651 %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1]
652 ret <4 x double> %res
654 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
656 define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) {
657 ; AVX-LABEL: test_x86_avx_vpermilvar_pd_256_2:
659 ; AVX-NEXT: vpermilpd $9, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x09]
660 ; AVX-NEXT: # ymm0 = ymm0[1,0,2,3]
661 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
663 ; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256_2:
665 ; AVX512VL-NEXT: vpermilpd $9, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x09]
666 ; AVX512VL-NEXT: # ymm0 = ymm0[1,0,2,3]
667 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
668 %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1]
669 ret <4 x double> %res
672 define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
673 ; AVX-LABEL: test_x86_avx_vpermilvar_ps:
675 ; AVX-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0c,0xc1]
676 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
678 ; AVX512VL-LABEL: test_x86_avx_vpermilvar_ps:
680 ; AVX512VL-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0xc1]
681 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
682 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
685 define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) {
686 ; X86-AVX-LABEL: test_x86_avx_vpermilvar_ps_load:
688 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
689 ; X86-AVX-NEXT: vpermilps (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0c,0x00]
690 ; X86-AVX-NEXT: retl # encoding: [0xc3]
692 ; X86-AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_load:
693 ; X86-AVX512VL: # %bb.0:
694 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
695 ; X86-AVX512VL-NEXT: vpermilps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0x00]
696 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
698 ; X64-AVX-LABEL: test_x86_avx_vpermilvar_ps_load:
700 ; X64-AVX-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0c,0x07]
701 ; X64-AVX-NEXT: retq # encoding: [0xc3]
703 ; X64-AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_load:
704 ; X64-AVX512VL: # %bb.0:
705 ; X64-AVX512VL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0x07]
706 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
707 %a2 = load <4 x i32>, <4 x i32>* %a1
708 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
711 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
714 define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
715 ; AVX-LABEL: test_x86_avx_vpermilvar_ps_256:
717 ; AVX-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0c,0xc1]
718 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
720 ; AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_256:
722 ; AVX512VL-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0c,0xc1]
723 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
724 %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
727 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
730 define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
731 ; CHECK-LABEL: test_x86_avx_vtestc_pd:
733 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
734 ; CHECK-NEXT: vtestpd %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0f,0xc1]
735 ; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
736 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
737 %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
740 declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
743 define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
744 ; CHECK-LABEL: test_x86_avx_vtestc_pd_256:
746 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
747 ; CHECK-NEXT: vtestpd %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0f,0xc1]
748 ; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
749 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
750 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
751 %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
754 declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
757 define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
758 ; CHECK-LABEL: test_x86_avx_vtestc_ps:
760 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
761 ; CHECK-NEXT: vtestps %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0e,0xc1]
762 ; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
763 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
764 %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
767 declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
770 define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
771 ; CHECK-LABEL: test_x86_avx_vtestc_ps_256:
773 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
774 ; CHECK-NEXT: vtestps %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0e,0xc1]
775 ; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
776 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
777 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
778 %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
781 declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
784 define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) {
785 ; CHECK-LABEL: test_x86_avx_vtestnzc_pd:
787 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
788 ; CHECK-NEXT: vtestpd %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0f,0xc1]
789 ; CHECK-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
790 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
791 %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
794 declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone
797 define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) {
798 ; CHECK-LABEL: test_x86_avx_vtestnzc_pd_256:
800 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
801 ; CHECK-NEXT: vtestpd %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0f,0xc1]
802 ; CHECK-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
803 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
804 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
805 %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
808 declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone
811 define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) {
812 ; CHECK-LABEL: test_x86_avx_vtestnzc_ps:
814 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
815 ; CHECK-NEXT: vtestps %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0e,0xc1]
816 ; CHECK-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
817 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
818 %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
821 declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone
824 define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) {
825 ; CHECK-LABEL: test_x86_avx_vtestnzc_ps_256:
827 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
828 ; CHECK-NEXT: vtestps %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0e,0xc1]
829 ; CHECK-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
830 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
831 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
832 %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
835 declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone
838 define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) {
839 ; CHECK-LABEL: test_x86_avx_vtestz_pd:
841 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
842 ; CHECK-NEXT: vtestpd %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0f,0xc1]
843 ; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
844 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
845 %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
848 declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone
851 define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) {
852 ; CHECK-LABEL: test_x86_avx_vtestz_pd_256:
854 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
855 ; CHECK-NEXT: vtestpd %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0f,0xc1]
856 ; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
857 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
858 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
859 %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
862 declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone
865 define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) {
866 ; CHECK-LABEL: test_x86_avx_vtestz_ps:
868 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
869 ; CHECK-NEXT: vtestps %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0e,0xc1]
870 ; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
871 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
872 %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
875 declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
878 define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) {
879 ; CHECK-LABEL: test_x86_avx_vtestz_ps_256:
881 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
882 ; CHECK-NEXT: vtestps %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0e,0xc1]
883 ; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
884 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
885 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
886 %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
889 declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone
892 define void @test_x86_avx_vzeroall() {
893 ; CHECK-LABEL: test_x86_avx_vzeroall:
895 ; CHECK-NEXT: vzeroall # encoding: [0xc5,0xfc,0x77]
896 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
897 call void @llvm.x86.avx.vzeroall()
900 declare void @llvm.x86.avx.vzeroall() nounwind
903 define void @test_x86_avx_vzeroupper() {
904 ; CHECK-LABEL: test_x86_avx_vzeroupper:
906 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
907 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
908 call void @llvm.x86.avx.vzeroupper()
911 declare void @llvm.x86.avx.vzeroupper() nounwind
913 define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
914 ; X86-AVX-LABEL: movnt_dq:
916 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
917 ; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
918 ; X86-AVX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]
919 ; X86-AVX-NEXT: vmovntdq %ymm0, (%eax) # encoding: [0xc5,0xfd,0xe7,0x00]
920 ; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
921 ; X86-AVX-NEXT: retl # encoding: [0xc3]
923 ; X86-AVX512VL-LABEL: movnt_dq:
924 ; X86-AVX512VL: # %bb.0:
925 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
926 ; X86-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
927 ; X86-AVX512VL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
928 ; X86-AVX512VL-NEXT: vmovntdq %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x00]
929 ; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
930 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
932 ; X64-AVX-LABEL: movnt_dq:
934 ; X64-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
935 ; X64-AVX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]
936 ; X64-AVX-NEXT: vmovntdq %ymm0, (%rdi) # encoding: [0xc5,0xfd,0xe7,0x07]
937 ; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
938 ; X64-AVX-NEXT: retq # encoding: [0xc3]
940 ; X64-AVX512VL-LABEL: movnt_dq:
941 ; X64-AVX512VL: # %bb.0:
942 ; X64-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
943 ; X64-AVX512VL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
944 ; X64-AVX512VL-NEXT: vmovntdq %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x07]
945 ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
946 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
947 %a2 = add <2 x i64> %a1, <i64 1, i64 1>
948 %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
949 tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a3) nounwind
952 declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
954 define void @movnt_ps(i8* %p, <8 x float> %a) nounwind {
955 ; X86-AVX-LABEL: movnt_ps:
957 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
958 ; X86-AVX-NEXT: vmovntps %ymm0, (%eax) # encoding: [0xc5,0xfc,0x2b,0x00]
959 ; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
960 ; X86-AVX-NEXT: retl # encoding: [0xc3]
962 ; X86-AVX512VL-LABEL: movnt_ps:
963 ; X86-AVX512VL: # %bb.0:
964 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
965 ; X86-AVX512VL-NEXT: vmovntps %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x2b,0x00]
966 ; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
967 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
969 ; X64-AVX-LABEL: movnt_ps:
971 ; X64-AVX-NEXT: vmovntps %ymm0, (%rdi) # encoding: [0xc5,0xfc,0x2b,0x07]
972 ; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
973 ; X64-AVX-NEXT: retq # encoding: [0xc3]
975 ; X64-AVX512VL-LABEL: movnt_ps:
976 ; X64-AVX512VL: # %bb.0:
977 ; X64-AVX512VL-NEXT: vmovntps %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x2b,0x07]
978 ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
979 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
980 tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind
983 declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
985 define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {
986 ; add operation forces the execution domain.
987 ; X86-AVX-LABEL: movnt_pd:
989 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
990 ; X86-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9]
991 ; X86-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1]
992 ; X86-AVX-NEXT: vmovntpd %ymm0, (%eax) # encoding: [0xc5,0xfd,0x2b,0x00]
993 ; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
994 ; X86-AVX-NEXT: retl # encoding: [0xc3]
996 ; X86-AVX512VL-LABEL: movnt_pd:
997 ; X86-AVX512VL: # %bb.0:
998 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
999 ; X86-AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9]
1000 ; X86-AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
1001 ; X86-AVX512VL-NEXT: vmovntpd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x2b,0x00]
1002 ; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1003 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
1005 ; X64-AVX-LABEL: movnt_pd:
1007 ; X64-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9]
1008 ; X64-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1]
1009 ; X64-AVX-NEXT: vmovntpd %ymm0, (%rdi) # encoding: [0xc5,0xfd,0x2b,0x07]
1010 ; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1011 ; X64-AVX-NEXT: retq # encoding: [0xc3]
1013 ; X64-AVX512VL-LABEL: movnt_pd:
1014 ; X64-AVX512VL: # %bb.0:
1015 ; X64-AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9]
1016 ; X64-AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
1017 ; X64-AVX512VL-NEXT: vmovntpd %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x2b,0x07]
1018 ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1019 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
1020 %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
1021 tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind
1024 declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
1027 ; Check for pclmulqdq
1028 define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
1029 ; CHECK-LABEL: test_x86_pclmulqdq:
1031 ; CHECK-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x44,0xc1,0x00]
1032 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1033 %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1]
1036 declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone