1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512vbmi | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi | FileCheck %s --check-prefixes=CHECK,X64
5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vbmi-builtins.c
7 define <8 x i64> @test_mm512_mask2_permutex2var_epi8(<8 x i64> %__A, <8 x i64> %__I, i64 %__U, <8 x i64> %__B) {
8 ; X86-LABEL: test_mm512_mask2_permutex2var_epi8:
9 ; X86: # %bb.0: # %entry
10 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
11 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
12 ; X86-NEXT: kunpckdq %k1, %k0, %k1
13 ; X86-NEXT: vpermi2b %zmm2, %zmm0, %zmm1 {%k1}
14 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0
17 ; X64-LABEL: test_mm512_mask2_permutex2var_epi8:
18 ; X64: # %bb.0: # %entry
19 ; X64-NEXT: kmovq %rdi, %k1
20 ; X64-NEXT: vpermi2b %zmm2, %zmm0, %zmm1 {%k1}
21 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0
24 %0 = bitcast <8 x i64> %__A to <64 x i8>
25 %1 = bitcast <8 x i64> %__I to <64 x i8>
26 %2 = bitcast <8 x i64> %__B to <64 x i8>
27 %3 = tail call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %0, <64 x i8> %1, <64 x i8> %2)
28 %4 = bitcast i64 %__U to <64 x i1>
29 %5 = select <64 x i1> %4, <64 x i8> %3, <64 x i8> %1
30 %6 = bitcast <64 x i8> %5 to <8 x i64>
34 define <8 x i64> @test_mm512_permutex2var_epi8(<8 x i64> %__A, <8 x i64> %__I, <8 x i64> %__B) {
35 ; CHECK-LABEL: test_mm512_permutex2var_epi8:
36 ; CHECK: # %bb.0: # %entry
37 ; CHECK-NEXT: vpermt2b %zmm2, %zmm1, %zmm0
38 ; CHECK-NEXT: ret{{[l|q]}}
40 %0 = bitcast <8 x i64> %__A to <64 x i8>
41 %1 = bitcast <8 x i64> %__I to <64 x i8>
42 %2 = bitcast <8 x i64> %__B to <64 x i8>
43 %3 = tail call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %0, <64 x i8> %1, <64 x i8> %2)
44 %4 = bitcast <64 x i8> %3 to <8 x i64>
48 define <8 x i64> @test_mm512_mask_permutex2var_epi8(<8 x i64> %__A, i64 %__U, <8 x i64> %__I, <8 x i64> %__B) {
49 ; X86-LABEL: test_mm512_mask_permutex2var_epi8:
50 ; X86: # %bb.0: # %entry
51 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
52 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
53 ; X86-NEXT: kunpckdq %k1, %k0, %k1
54 ; X86-NEXT: vpermt2b %zmm2, %zmm1, %zmm0 {%k1}
57 ; X64-LABEL: test_mm512_mask_permutex2var_epi8:
58 ; X64: # %bb.0: # %entry
59 ; X64-NEXT: kmovq %rdi, %k1
60 ; X64-NEXT: vpermt2b %zmm2, %zmm1, %zmm0 {%k1}
63 %0 = bitcast <8 x i64> %__A to <64 x i8>
64 %1 = bitcast <8 x i64> %__I to <64 x i8>
65 %2 = bitcast <8 x i64> %__B to <64 x i8>
66 %3 = tail call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %0, <64 x i8> %1, <64 x i8> %2)
67 %4 = bitcast i64 %__U to <64 x i1>
68 %5 = select <64 x i1> %4, <64 x i8> %3, <64 x i8> %0
69 %6 = bitcast <64 x i8> %5 to <8 x i64>
73 define <8 x i64> @test_mm512_maskz_permutex2var_epi8(i64 %__U, <8 x i64> %__A, <8 x i64> %__I, <8 x i64> %__B) {
74 ; X86-LABEL: test_mm512_maskz_permutex2var_epi8:
75 ; X86: # %bb.0: # %entry
76 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
77 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
78 ; X86-NEXT: kunpckdq %k1, %k0, %k1
79 ; X86-NEXT: vpermt2b %zmm2, %zmm1, %zmm0 {%k1} {z}
82 ; X64-LABEL: test_mm512_maskz_permutex2var_epi8:
83 ; X64: # %bb.0: # %entry
84 ; X64-NEXT: kmovq %rdi, %k1
85 ; X64-NEXT: vpermt2b %zmm2, %zmm1, %zmm0 {%k1} {z}
88 %0 = bitcast <8 x i64> %__A to <64 x i8>
89 %1 = bitcast <8 x i64> %__I to <64 x i8>
90 %2 = bitcast <8 x i64> %__B to <64 x i8>
91 %3 = tail call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %0, <64 x i8> %1, <64 x i8> %2)
92 %4 = bitcast i64 %__U to <64 x i1>
93 %5 = select <64 x i1> %4, <64 x i8> %3, <64 x i8> zeroinitializer
94 %6 = bitcast <64 x i8> %5 to <8 x i64>
98 declare <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>)
100 define <8 x i64> @test_mm512_mask_multishift_epi64_epi8(<8 x i64> %__W, i64 %__M, <8 x i64> %__X, <8 x i64> %__Y) {
101 ; X86-LABEL: test_mm512_mask_multishift_epi64_epi8:
102 ; X86: # %bb.0: # %entry
103 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
104 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
105 ; X86-NEXT: kunpckdq %k1, %k0, %k1
106 ; X86-NEXT: vpmultishiftqb %zmm2, %zmm1, %zmm0 {%k1}
109 ; X64-LABEL: test_mm512_mask_multishift_epi64_epi8:
110 ; X64: # %bb.0: # %entry
111 ; X64-NEXT: kmovq %rdi, %k1
112 ; X64-NEXT: vpmultishiftqb %zmm2, %zmm1, %zmm0 {%k1}
115 %0 = bitcast <8 x i64> %__X to <64 x i8>
116 %1 = bitcast <8 x i64> %__Y to <64 x i8>
117 %2 = tail call <64 x i8> @llvm.x86.avx512.pmultishift.qb.512(<64 x i8> %0, <64 x i8> %1)
118 %3 = bitcast <8 x i64> %__W to <64 x i8>
119 %4 = bitcast i64 %__M to <64 x i1>
120 %5 = select <64 x i1> %4, <64 x i8> %2, <64 x i8> %3
121 %6 = bitcast <64 x i8> %5 to <8 x i64>
125 define <8 x i64> @test_mm512_maskz_multishift_epi64_epi8(i64 %__M, <8 x i64> %__X, <8 x i64> %__Y) {
126 ; X86-LABEL: test_mm512_maskz_multishift_epi64_epi8:
127 ; X86: # %bb.0: # %entry
128 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
129 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
130 ; X86-NEXT: kunpckdq %k1, %k0, %k1
131 ; X86-NEXT: vpmultishiftqb %zmm1, %zmm0, %zmm0 {%k1} {z}
134 ; X64-LABEL: test_mm512_maskz_multishift_epi64_epi8:
135 ; X64: # %bb.0: # %entry
136 ; X64-NEXT: kmovq %rdi, %k1
137 ; X64-NEXT: vpmultishiftqb %zmm1, %zmm0, %zmm0 {%k1} {z}
140 %0 = bitcast <8 x i64> %__X to <64 x i8>
141 %1 = bitcast <8 x i64> %__Y to <64 x i8>
142 %2 = tail call <64 x i8> @llvm.x86.avx512.pmultishift.qb.512(<64 x i8> %0, <64 x i8> %1)
143 %3 = bitcast i64 %__M to <64 x i1>
144 %4 = select <64 x i1> %3, <64 x i8> %2, <64 x i8> zeroinitializer
145 %5 = bitcast <64 x i8> %4 to <8 x i64>
149 define <8 x i64> @test_mm512_multishift_epi64_epi8(<8 x i64> %__X, <8 x i64> %__Y) {
150 ; CHECK-LABEL: test_mm512_multishift_epi64_epi8:
151 ; CHECK: # %bb.0: # %entry
152 ; CHECK-NEXT: vpmultishiftqb %zmm1, %zmm0, %zmm0
153 ; CHECK-NEXT: ret{{[l|q]}}
155 %0 = bitcast <8 x i64> %__X to <64 x i8>
156 %1 = bitcast <8 x i64> %__Y to <64 x i8>
157 %2 = tail call <64 x i8> @llvm.x86.avx512.pmultishift.qb.512(<64 x i8> %0, <64 x i8> %1)
158 %3 = bitcast <64 x i8> %2 to <8 x i64>
162 declare <64 x i8> @llvm.x86.avx512.pmultishift.qb.512(<64 x i8>, <64 x i8>)