1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+xop | FileCheck %s
4 define <2 x double> @test_int_x86_xop_vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2) {
5 ; CHECK-LABEL: test_int_x86_xop_vpermil2pd:
7 ; CHECK-NEXT: vpermil2pd $1, %xmm2, %xmm1, %xmm0, %xmm0
9 %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2, i8 1) ; [#uses=1]
12 define <2 x double> @test_int_x86_xop_vpermil2pd_mr(<2 x double> %a0, ptr %a1, <2 x i64> %a2) {
13 ; CHECK-LABEL: test_int_x86_xop_vpermil2pd_mr:
15 ; CHECK-NEXT: vpermil2pd $1, %xmm1, (%rdi), %xmm0, %xmm0
17 %vec = load <2 x double>, ptr %a1
18 %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %vec, <2 x i64> %a2, i8 1) ; [#uses=1]
21 define <2 x double> @test_int_x86_xop_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, ptr %a2) {
22 ; CHECK-LABEL: test_int_x86_xop_vpermil2pd_rm:
24 ; CHECK-NEXT: vpermil2pd $1, (%rdi), %xmm1, %xmm0, %xmm0
26 %vec = load <2 x i64>, ptr %a2
27 %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %vec, i8 1) ; [#uses=1]
30 declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x i64>, i8) nounwind readnone
32 define <4 x double> @test_int_x86_xop_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2) {
33 ; CHECK-LABEL: test_int_x86_xop_vpermil2pd_256:
35 ; CHECK-NEXT: vpermil2pd $2, %ymm2, %ymm1, %ymm0, %ymm0
37 %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2, i8 2) ;
40 define <4 x double> @test_int_x86_xop_vpermil2pd_256_mr(<4 x double> %a0, ptr %a1, <4 x i64> %a2) {
41 ; CHECK-LABEL: test_int_x86_xop_vpermil2pd_256_mr:
43 ; CHECK-NEXT: vpermil2pd $2, %ymm1, (%rdi), %ymm0, %ymm0
45 %vec = load <4 x double>, ptr %a1
46 %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %vec, <4 x i64> %a2, i8 2) ;
49 define <4 x double> @test_int_x86_xop_vpermil2pd_256_rm(<4 x double> %a0, <4 x double> %a1, ptr %a2) {
50 ; CHECK-LABEL: test_int_x86_xop_vpermil2pd_256_rm:
52 ; CHECK-NEXT: vpermil2pd $2, (%rdi), %ymm1, %ymm0, %ymm0
54 %vec = load <4 x i64>, ptr %a2
55 %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %vec, i8 2) ;
58 declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x i64>, i8) nounwind readnone
60 define <4 x float> @test_int_x86_xop_vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> %a2) {
61 ; CHECK-LABEL: test_int_x86_xop_vpermil2ps:
63 ; CHECK-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0
65 %res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> %a2, i8 3) ;
68 declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x i32>, i8) nounwind readnone
70 define <8 x float> @test_int_x86_xop_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x i32> %a2) {
71 ; CHECK-LABEL: test_int_x86_xop_vpermil2ps_256:
73 ; CHECK-NEXT: vpermil2ps $4, %ymm2, %ymm1, %ymm0, %ymm0
75 %res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x i32> %a2, i8 4) ;
78 declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x i32>, i8) nounwind readnone
80 define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
81 ; CHECK-LABEL: test_int_x86_xop_vpcmov:
83 ; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
85 %1 = xor <2 x i64> %a2, <i64 -1, i64 -1>
86 %2 = and <2 x i64> %a0, %a2
87 %3 = and <2 x i64> %a1, %1
88 %4 = or <2 x i64> %2, %3
92 define <4 x i64> @test_int_x86_xop_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
93 ; CHECK-LABEL: test_int_x86_xop_vpcmov_256:
95 ; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
97 %1 = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
98 %2 = and <4 x i64> %a0, %a2
99 %3 = and <4 x i64> %a1, %1
100 %4 = or <4 x i64> %2, %3
103 define <4 x i64> @test_int_x86_xop_vpcmov_256_mr(<4 x i64> %a0, ptr %a1, <4 x i64> %a2) {
104 ; CHECK-LABEL: test_int_x86_xop_vpcmov_256_mr:
106 ; CHECK-NEXT: vpcmov %ymm1, (%rdi), %ymm0, %ymm0
108 %vec = load <4 x i64>, ptr %a1
109 %1 = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
110 %2 = and <4 x i64> %a0, %a2
111 %3 = and <4 x i64> %vec, %1
112 %4 = or <4 x i64> %2, %3
115 define <4 x i64> @test_int_x86_xop_vpcmov_256_rm(<4 x i64> %a0, <4 x i64> %a1, ptr %a2) {
116 ; CHECK-LABEL: test_int_x86_xop_vpcmov_256_rm:
118 ; CHECK-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0
120 %vec = load <4 x i64>, ptr %a2
121 %1 = xor <4 x i64> %vec, <i64 -1, i64 -1, i64 -1, i64 -1>
122 %2 = and <4 x i64> %a0, %vec
123 %3 = and <4 x i64> %a1, %1
124 %4 = or <4 x i64> %2, %3
128 define <4 x i32> @test_int_x86_xop_vphaddbd(<16 x i8> %a0) {
129 ; CHECK-LABEL: test_int_x86_xop_vphaddbd:
131 ; CHECK-NEXT: vphaddbd %xmm0, %xmm0
133 %res = call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %a0) ;
136 declare <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8>) nounwind readnone
138 define <2 x i64> @test_int_x86_xop_vphaddbq(<16 x i8> %a0) {
139 ; CHECK-LABEL: test_int_x86_xop_vphaddbq:
141 ; CHECK-NEXT: vphaddbq %xmm0, %xmm0
143 %res = call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %a0) ;
146 declare <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8>) nounwind readnone
148 define <8 x i16> @test_int_x86_xop_vphaddbw(<16 x i8> %a0) {
149 ; CHECK-LABEL: test_int_x86_xop_vphaddbw:
151 ; CHECK-NEXT: vphaddbw %xmm0, %xmm0
153 %res = call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %a0) ;
156 declare <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8>) nounwind readnone
158 define <2 x i64> @test_int_x86_xop_vphadddq(<4 x i32> %a0) {
159 ; CHECK-LABEL: test_int_x86_xop_vphadddq:
161 ; CHECK-NEXT: vphadddq %xmm0, %xmm0
163 %res = call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %a0) ;
166 declare <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32>) nounwind readnone
168 define <4 x i32> @test_int_x86_xop_vphaddubd(<16 x i8> %a0) {
169 ; CHECK-LABEL: test_int_x86_xop_vphaddubd:
171 ; CHECK-NEXT: vphaddubd %xmm0, %xmm0
173 %res = call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %a0) ;
176 declare <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8>) nounwind readnone
178 define <2 x i64> @test_int_x86_xop_vphaddubq(<16 x i8> %a0) {
179 ; CHECK-LABEL: test_int_x86_xop_vphaddubq:
181 ; CHECK-NEXT: vphaddubq %xmm0, %xmm0
183 %res = call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %a0) ;
186 declare <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8>) nounwind readnone
188 define <8 x i16> @test_int_x86_xop_vphaddubw(<16 x i8> %a0) {
189 ; CHECK-LABEL: test_int_x86_xop_vphaddubw:
191 ; CHECK-NEXT: vphaddubw %xmm0, %xmm0
193 %res = call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %a0) ;
196 declare <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8>) nounwind readnone
198 define <2 x i64> @test_int_x86_xop_vphaddudq(<4 x i32> %a0) {
199 ; CHECK-LABEL: test_int_x86_xop_vphaddudq:
201 ; CHECK-NEXT: vphaddudq %xmm0, %xmm0
203 %res = call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %a0) ;
206 declare <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32>) nounwind readnone
208 define <4 x i32> @test_int_x86_xop_vphadduwd(<8 x i16> %a0) {
209 ; CHECK-LABEL: test_int_x86_xop_vphadduwd:
211 ; CHECK-NEXT: vphadduwd %xmm0, %xmm0
213 %res = call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %a0) ;
216 declare <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16>) nounwind readnone
218 define <2 x i64> @test_int_x86_xop_vphadduwq(<8 x i16> %a0) {
219 ; CHECK-LABEL: test_int_x86_xop_vphadduwq:
221 ; CHECK-NEXT: vphadduwq %xmm0, %xmm0
223 %res = call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %a0) ;
226 declare <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16>) nounwind readnone
228 define <4 x i32> @test_int_x86_xop_vphaddwd(<8 x i16> %a0) {
229 ; CHECK-LABEL: test_int_x86_xop_vphaddwd:
231 ; CHECK-NEXT: vphaddwd %xmm0, %xmm0
233 %res = call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %a0) ;
236 declare <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16>) nounwind readnone
238 define <2 x i64> @test_int_x86_xop_vphaddwq(<8 x i16> %a0) {
239 ; CHECK-LABEL: test_int_x86_xop_vphaddwq:
241 ; CHECK-NEXT: vphaddwq %xmm0, %xmm0
243 %res = call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %a0) ;
246 declare <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16>) nounwind readnone
248 define <8 x i16> @test_int_x86_xop_vphsubbw(<16 x i8> %a0) {
249 ; CHECK-LABEL: test_int_x86_xop_vphsubbw:
251 ; CHECK-NEXT: vphsubbw %xmm0, %xmm0
253 %res = call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %a0) ;
256 declare <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8>) nounwind readnone
258 define <2 x i64> @test_int_x86_xop_vphsubdq(<4 x i32> %a0) {
259 ; CHECK-LABEL: test_int_x86_xop_vphsubdq:
261 ; CHECK-NEXT: vphsubdq %xmm0, %xmm0
263 %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %a0) ;
266 define <2 x i64> @test_int_x86_xop_vphsubdq_mem(ptr %a0) {
267 ; CHECK-LABEL: test_int_x86_xop_vphsubdq_mem:
269 ; CHECK-NEXT: vphsubdq (%rdi), %xmm0
271 %vec = load <4 x i32>, ptr %a0
272 %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %vec) ;
275 declare <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32>) nounwind readnone
277 define <4 x i32> @test_int_x86_xop_vphsubwd(<8 x i16> %a0) {
278 ; CHECK-LABEL: test_int_x86_xop_vphsubwd:
280 ; CHECK-NEXT: vphsubwd %xmm0, %xmm0
282 %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %a0) ;
285 define <4 x i32> @test_int_x86_xop_vphsubwd_mem(ptr %a0) {
286 ; CHECK-LABEL: test_int_x86_xop_vphsubwd_mem:
288 ; CHECK-NEXT: vphsubwd (%rdi), %xmm0
290 %vec = load <8 x i16>, ptr %a0
291 %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %vec) ;
294 declare <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16>) nounwind readnone
296 define <4 x i32> @test_int_x86_xop_vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
297 ; CHECK-LABEL: test_int_x86_xop_vpmacsdd:
299 ; CHECK-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0
301 %res = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ;
304 declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
306 define <2 x i64> @test_int_x86_xop_vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
307 ; CHECK-LABEL: test_int_x86_xop_vpmacsdqh:
309 ; CHECK-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0
311 %res = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
314 declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
316 define <2 x i64> @test_int_x86_xop_vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
317 ; CHECK-LABEL: test_int_x86_xop_vpmacsdql:
319 ; CHECK-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0
321 %res = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
324 declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
326 define <4 x i32> @test_int_x86_xop_vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
327 ; CHECK-LABEL: test_int_x86_xop_vpmacssdd:
329 ; CHECK-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0
331 %res = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ;
334 declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
336 define <2 x i64> @test_int_x86_xop_vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
337 ; CHECK-LABEL: test_int_x86_xop_vpmacssdqh:
339 ; CHECK-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0
341 %res = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
344 declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
346 define <2 x i64> @test_int_x86_xop_vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
347 ; CHECK-LABEL: test_int_x86_xop_vpmacssdql:
349 ; CHECK-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0
351 %res = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
354 declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
356 define <4 x i32> @test_int_x86_xop_vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
357 ; CHECK-LABEL: test_int_x86_xop_vpmacsswd:
359 ; CHECK-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0
361 %res = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
364 declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
366 define <8 x i16> @test_int_x86_xop_vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
367 ; CHECK-LABEL: test_int_x86_xop_vpmacssww:
369 ; CHECK-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0
371 %res = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ;
374 declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
376 define <4 x i32> @test_int_x86_xop_vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
377 ; CHECK-LABEL: test_int_x86_xop_vpmacswd:
379 ; CHECK-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0
381 %res = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
384 declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
386 define <8 x i16> @test_int_x86_xop_vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
387 ; CHECK-LABEL: test_int_x86_xop_vpmacsww:
389 ; CHECK-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0
391 %res = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ;
394 declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
396 define <4 x i32> @test_int_x86_xop_vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
397 ; CHECK-LABEL: test_int_x86_xop_vpmadcsswd:
399 ; CHECK-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0
401 %res = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
404 declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
406 define <4 x i32> @test_int_x86_xop_vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
407 ; CHECK-LABEL: test_int_x86_xop_vpmadcswd:
409 ; CHECK-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0
411 %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
414 define <4 x i32> @test_int_x86_xop_vpmadcswd_mem(<8 x i16> %a0, ptr %a1, <4 x i32> %a2) {
415 ; CHECK-LABEL: test_int_x86_xop_vpmadcswd_mem:
417 ; CHECK-NEXT: vpmadcswd %xmm1, (%rdi), %xmm0, %xmm0
419 %vec = load <8 x i16>, ptr %a1
420 %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %vec, <4 x i32> %a2) ;
423 declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
425 define <16 x i8> @test_int_x86_xop_vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
426 ; CHECK-LABEL: test_int_x86_xop_vpperm:
428 ; CHECK-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0
430 %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ;
433 define <16 x i8> @test_int_x86_xop_vpperm_rm(<16 x i8> %a0, <16 x i8> %a1, ptr %a2) {
434 ; CHECK-LABEL: test_int_x86_xop_vpperm_rm:
436 ; CHECK-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0
438 %vec = load <16 x i8>, ptr %a2
439 %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %vec) ;
442 define <16 x i8> @test_int_x86_xop_vpperm_mr(<16 x i8> %a0, ptr %a1, <16 x i8> %a2) {
443 ; CHECK-LABEL: test_int_x86_xop_vpperm_mr:
445 ; CHECK-NEXT: vpperm %xmm1, (%rdi), %xmm0, %xmm0
447 %vec = load <16 x i8>, ptr %a1
448 %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %vec, <16 x i8> %a2) ;
451 declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
453 define <16 x i8> @test_int_x86_xop_vpshab(<16 x i8> %a0, <16 x i8> %a1) {
454 ; CHECK-LABEL: test_int_x86_xop_vpshab:
456 ; CHECK-NEXT: vpshab %xmm1, %xmm0, %xmm0
458 %res = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %a0, <16 x i8> %a1) ;
461 declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone
463 define <4 x i32> @test_int_x86_xop_vpshad(<4 x i32> %a0, <4 x i32> %a1) {
464 ; CHECK-LABEL: test_int_x86_xop_vpshad:
466 ; CHECK-NEXT: vpshad %xmm1, %xmm0, %xmm0
468 %res = call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %a0, <4 x i32> %a1) ;
471 declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone
473 define <2 x i64> @test_int_x86_xop_vpshaq(<2 x i64> %a0, <2 x i64> %a1) {
474 ; CHECK-LABEL: test_int_x86_xop_vpshaq:
476 ; CHECK-NEXT: vpshaq %xmm1, %xmm0, %xmm0
478 %res = call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %a0, <2 x i64> %a1) ;
481 declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone
483 define <8 x i16> @test_int_x86_xop_vpshaw(<8 x i16> %a0, <8 x i16> %a1) {
484 ; CHECK-LABEL: test_int_x86_xop_vpshaw:
486 ; CHECK-NEXT: vpshaw %xmm1, %xmm0, %xmm0
488 %res = call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %a0, <8 x i16> %a1) ;
491 declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone
493 define <16 x i8> @test_int_x86_xop_vpshlb(<16 x i8> %a0, <16 x i8> %a1) {
494 ; CHECK-LABEL: test_int_x86_xop_vpshlb:
496 ; CHECK-NEXT: vpshlb %xmm1, %xmm0, %xmm0
498 %res = call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %a0, <16 x i8> %a1) ;
501 declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone
503 define <4 x i32> @test_int_x86_xop_vpshld(<4 x i32> %a0, <4 x i32> %a1) {
504 ; CHECK-LABEL: test_int_x86_xop_vpshld:
506 ; CHECK-NEXT: vpshld %xmm1, %xmm0, %xmm0
508 %res = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %a0, <4 x i32> %a1) ;
511 declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone
513 define <2 x i64> @test_int_x86_xop_vpshlq(<2 x i64> %a0, <2 x i64> %a1) {
514 ; CHECK-LABEL: test_int_x86_xop_vpshlq:
516 ; CHECK-NEXT: vpshlq %xmm1, %xmm0, %xmm0
518 %res = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %a0, <2 x i64> %a1) ;
521 declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone
523 define <8 x i16> @test_int_x86_xop_vpshlw(<8 x i16> %a0, <8 x i16> %a1) {
524 ; CHECK-LABEL: test_int_x86_xop_vpshlw:
526 ; CHECK-NEXT: vpshlw %xmm1, %xmm0, %xmm0
528 %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %a1) ;
531 define <8 x i16> @test_int_x86_xop_vpshlw_rm(<8 x i16> %a0, ptr %a1) {
532 ; CHECK-LABEL: test_int_x86_xop_vpshlw_rm:
534 ; CHECK-NEXT: vpshlw (%rdi), %xmm0, %xmm0
536 %vec = load <8 x i16>, ptr %a1
537 %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %vec) ;
540 define <8 x i16> @test_int_x86_xop_vpshlw_mr(ptr %a0, <8 x i16> %a1) {
541 ; CHECK-LABEL: test_int_x86_xop_vpshlw_mr:
543 ; CHECK-NEXT: vpshlw %xmm0, (%rdi), %xmm0
545 %vec = load <8 x i16>, ptr %a0
546 %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %vec, <8 x i16> %a1) ;
549 declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone
551 define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0) {
552 ; CHECK-LABEL: test_int_x86_xop_vfrcz_ss:
554 ; CHECK-NEXT: vfrczss %xmm0, %xmm0
556 %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0) ;
559 define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(ptr %a0) {
560 ; CHECK-LABEL: test_int_x86_xop_vfrcz_ss_mem:
562 ; CHECK-NEXT: vfrczss (%rdi), %xmm0
564 %elem = load float, ptr %a0
565 %vec = insertelement <4 x float> undef, float %elem, i32 0
566 %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %vec) ;
569 declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone
571 define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0) {
572 ; CHECK-LABEL: test_int_x86_xop_vfrcz_sd:
574 ; CHECK-NEXT: vfrczsd %xmm0, %xmm0
576 %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0) ;
577 ret <2 x double> %res
579 define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(ptr %a0) {
580 ; CHECK-LABEL: test_int_x86_xop_vfrcz_sd_mem:
582 ; CHECK-NEXT: vfrczsd (%rdi), %xmm0
584 %elem = load double, ptr %a0
585 %vec = insertelement <2 x double> undef, double %elem, i32 0
586 %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %vec) ;
587 ret <2 x double> %res
589 declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone
591 define <2 x double> @test_int_x86_xop_vfrcz_pd(<2 x double> %a0) {
592 ; CHECK-LABEL: test_int_x86_xop_vfrcz_pd:
594 ; CHECK-NEXT: vfrczpd %xmm0, %xmm0
596 %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %a0) ;
597 ret <2 x double> %res
599 define <2 x double> @test_int_x86_xop_vfrcz_pd_mem(ptr %a0) {
600 ; CHECK-LABEL: test_int_x86_xop_vfrcz_pd_mem:
602 ; CHECK-NEXT: vfrczpd (%rdi), %xmm0
604 %vec = load <2 x double>, ptr %a0
605 %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %vec) ;
606 ret <2 x double> %res
608 declare <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double>) nounwind readnone
610 define <4 x double> @test_int_x86_xop_vfrcz_pd_256(<4 x double> %a0) {
611 ; CHECK-LABEL: test_int_x86_xop_vfrcz_pd_256:
613 ; CHECK-NEXT: vfrczpd %ymm0, %ymm0
615 %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %a0) ;
616 ret <4 x double> %res
618 define <4 x double> @test_int_x86_xop_vfrcz_pd_256_mem(ptr %a0) {
619 ; CHECK-LABEL: test_int_x86_xop_vfrcz_pd_256_mem:
621 ; CHECK-NEXT: vfrczpd (%rdi), %ymm0
623 %vec = load <4 x double>, ptr %a0
624 %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %vec) ;
625 ret <4 x double> %res
627 declare <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double>) nounwind readnone
629 define <4 x float> @test_int_x86_xop_vfrcz_ps(<4 x float> %a0) {
630 ; CHECK-LABEL: test_int_x86_xop_vfrcz_ps:
632 ; CHECK-NEXT: vfrczps %xmm0, %xmm0
634 %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %a0) ;
637 define <4 x float> @test_int_x86_xop_vfrcz_ps_mem(ptr %a0) {
638 ; CHECK-LABEL: test_int_x86_xop_vfrcz_ps_mem:
640 ; CHECK-NEXT: vfrczps (%rdi), %xmm0
642 %vec = load <4 x float>, ptr %a0
643 %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %vec) ;
646 declare <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float>) nounwind readnone
648 define <8 x float> @test_int_x86_xop_vfrcz_ps_256(<8 x float> %a0) {
649 ; CHECK-LABEL: test_int_x86_xop_vfrcz_ps_256:
651 ; CHECK-NEXT: vfrczps %ymm0, %ymm0
653 %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %a0) ;
656 define <8 x float> @test_int_x86_xop_vfrcz_ps_256_mem(ptr %a0) {
657 ; CHECK-LABEL: test_int_x86_xop_vfrcz_ps_256_mem:
659 ; CHECK-NEXT: vfrczps (%rdi), %ymm0
661 %vec = load <8 x float>, ptr %a0
662 %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %vec) ;
665 declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone