1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
5 declare <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
7 define <16 x i8>@test_int_x86_avx512_mask_permvar_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
8 ; X86-LABEL: test_int_x86_avx512_mask_permvar_qi_128:
10 ; X86-NEXT: vpermb %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x8d,0xd8]
11 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
12 ; X86-NEXT: vpermb %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x8d,0xd0]
13 ; X86-NEXT: vpermb %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x8d,0xc0]
14 ; X86-NEXT: vpaddb %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc3]
15 ; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
16 ; X86-NEXT: retl # encoding: [0xc3]
18 ; X64-LABEL: test_int_x86_avx512_mask_permvar_qi_128:
20 ; X64-NEXT: vpermb %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x8d,0xd8]
21 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
22 ; X64-NEXT: vpermb %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x8d,0xd0]
23 ; X64-NEXT: vpermb %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x8d,0xc0]
24 ; X64-NEXT: vpaddb %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc3]
25 ; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
26 ; X64-NEXT: retq # encoding: [0xc3]
27 %res = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
28 %res1 = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %x3)
29 %res2 = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
30 %res3 = add <16 x i8> %res, %res1
31 %res4 = add <16 x i8> %res3, %res2
35 declare <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
37 define <32 x i8>@test_int_x86_avx512_mask_permvar_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
38 ; X86-LABEL: test_int_x86_avx512_mask_permvar_qi_256:
40 ; X86-NEXT: vpermb %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x8d,0xd8]
41 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
42 ; X86-NEXT: vpermb %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x8d,0xd0]
43 ; X86-NEXT: vpermb %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x8d,0xc0]
44 ; X86-NEXT: vpaddb %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc3]
45 ; X86-NEXT: vpaddb %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0]
46 ; X86-NEXT: retl # encoding: [0xc3]
48 ; X64-LABEL: test_int_x86_avx512_mask_permvar_qi_256:
50 ; X64-NEXT: vpermb %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x8d,0xd8]
51 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
52 ; X64-NEXT: vpermb %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x8d,0xd0]
53 ; X64-NEXT: vpermb %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x8d,0xc0]
54 ; X64-NEXT: vpaddb %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc3]
55 ; X64-NEXT: vpaddb %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0]
56 ; X64-NEXT: retq # encoding: [0xc3]
57 %res = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
58 %res1 = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> zeroinitializer, i32 %x3)
59 %res2 = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
60 %res3 = add <32 x i8> %res, %res1
61 %res4 = add <32 x i8> %res3, %res2
65 declare <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
67 define <16 x i8>@test_int_x86_avx512_mask_pmultishift_qb_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
68 ; X86-LABEL: test_int_x86_avx512_mask_pmultishift_qb_128:
70 ; X86-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x83,0xd9]
71 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
72 ; X86-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x83,0xd1]
73 ; X86-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x83,0xc1]
74 ; X86-NEXT: vpaddb %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc3]
75 ; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
76 ; X86-NEXT: retl # encoding: [0xc3]
78 ; X64-LABEL: test_int_x86_avx512_mask_pmultishift_qb_128:
80 ; X64-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x83,0xd9]
81 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
82 ; X64-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x83,0xd1]
83 ; X64-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x83,0xc1]
84 ; X64-NEXT: vpaddb %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc3]
85 ; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
86 ; X64-NEXT: retq # encoding: [0xc3]
87 %res = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
88 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %x3)
89 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
90 %res3 = add <16 x i8> %res, %res1
91 %res4 = add <16 x i8> %res3, %res2
95 declare <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
97 define <32 x i8>@test_int_x86_avx512_mask_pmultishift_qb_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
98 ; X86-LABEL: test_int_x86_avx512_mask_pmultishift_qb_256:
100 ; X86-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x83,0xd9]
101 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
102 ; X86-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x83,0xd1]
103 ; X86-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x83,0xc1]
104 ; X86-NEXT: vpaddb %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc3]
105 ; X86-NEXT: vpaddb %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0]
106 ; X86-NEXT: retl # encoding: [0xc3]
108 ; X64-LABEL: test_int_x86_avx512_mask_pmultishift_qb_256:
110 ; X64-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x83,0xd9]
111 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
112 ; X64-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x83,0xd1]
113 ; X64-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x83,0xc1]
114 ; X64-NEXT: vpaddb %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc3]
115 ; X64-NEXT: vpaddb %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0]
116 ; X64-NEXT: retq # encoding: [0xc3]
117 %res = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
118 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> zeroinitializer, i32 %x3)
119 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
120 %res3 = add <32 x i8> %res, %res1
121 %res4 = add <32 x i8> %res3, %res2
125 declare <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
127 define <16 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
128 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_128:
130 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
131 ; X86-NEXT: vpermt2b %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7d,0xda]
132 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
133 ; X86-NEXT: vpermi2b %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x75,0xca]
134 ; X86-NEXT: vmovdqa %xmm1, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe1]
135 ; X86-NEXT: vpermi2b %xmm2, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x75,0xe2]
136 ; X86-NEXT: vpaddb %xmm3, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfc,0xc3]
137 ; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
138 ; X86-NEXT: retl # encoding: [0xc3]
140 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_128:
142 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
143 ; X64-NEXT: vpermt2b %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7d,0xda]
144 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
145 ; X64-NEXT: vpermi2b %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x75,0xca]
146 ; X64-NEXT: vmovdqa %xmm1, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe1]
147 ; X64-NEXT: vpermi2b %xmm2, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x75,0xe2]
148 ; X64-NEXT: vpaddb %xmm3, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfc,0xc3]
149 ; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
150 ; X64-NEXT: retq # encoding: [0xc3]
151 %res = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
152 %res1 = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %res, <16 x i8> %x2, i16 %x3)
153 %res2 = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
154 %res3 = add <16 x i8> %res, %res1
155 %res4 = add <16 x i8> %res3, %res2
159 declare <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
161 define <32 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
162 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_256:
164 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
165 ; X86-NEXT: vpermt2b %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7d,0xda]
166 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
167 ; X86-NEXT: vpermi2b %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x75,0xca]
168 ; X86-NEXT: vmovdqa %ymm1, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe1]
169 ; X86-NEXT: vpermi2b %ymm2, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x75,0xe2]
170 ; X86-NEXT: vpaddb %ymm3, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc3]
171 ; X86-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
172 ; X86-NEXT: retl # encoding: [0xc3]
174 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_256:
176 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
177 ; X64-NEXT: vpermt2b %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7d,0xda]
178 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
179 ; X64-NEXT: vpermi2b %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x75,0xca]
180 ; X64-NEXT: vmovdqa %ymm1, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe1]
181 ; X64-NEXT: vpermi2b %ymm2, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x75,0xe2]
182 ; X64-NEXT: vpaddb %ymm3, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc3]
183 ; X64-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
184 ; X64-NEXT: retq # encoding: [0xc3]
185 %res = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
186 %res1 = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %res, <32 x i8> %x2, i32 %x3)
187 %res2 = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
188 %res3 = add <32 x i8> %res, %res1
189 %res4 = add <32 x i8> %res3, %res2
193 declare <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
195 define <16 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
196 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_128:
198 ; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
199 ; X86-NEXT: vpermt2b %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7d,0xda]
200 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
201 ; X86-NEXT: vpermt2b %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7d,0xca]
202 ; X86-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
203 ; X86-NEXT: vpermt2b %xmm2, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7d,0xe2]
204 ; X86-NEXT: vpaddb %xmm3, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfc,0xc3]
205 ; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
206 ; X86-NEXT: retl # encoding: [0xc3]
208 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_128:
210 ; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
211 ; X64-NEXT: vpermt2b %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7d,0xda]
212 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
213 ; X64-NEXT: vpermt2b %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7d,0xca]
214 ; X64-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
215 ; X64-NEXT: vpermt2b %xmm2, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7d,0xe2]
216 ; X64-NEXT: vpaddb %xmm3, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfc,0xc3]
217 ; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
218 ; X64-NEXT: retq # encoding: [0xc3]
219 %res = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
220 %res1 = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> zeroinitializer, <16 x i8> %x2, i16 %x3)
221 %res2 = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
222 %res3 = add <16 x i8> %res, %res1
223 %res4 = add <16 x i8> %res3, %res2
227 declare <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
229 define <32 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
230 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_256:
232 ; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
233 ; X86-NEXT: vpermt2b %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7d,0xda]
234 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
235 ; X86-NEXT: vpermt2b %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7d,0xca]
236 ; X86-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
237 ; X86-NEXT: vpermt2b %ymm2, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7d,0xe2]
238 ; X86-NEXT: vpaddb %ymm3, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc3]
239 ; X86-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
240 ; X86-NEXT: retl # encoding: [0xc3]
242 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_256:
244 ; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
245 ; X64-NEXT: vpermt2b %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7d,0xda]
246 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
247 ; X64-NEXT: vpermt2b %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7d,0xca]
248 ; X64-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
249 ; X64-NEXT: vpermt2b %ymm2, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7d,0xe2]
250 ; X64-NEXT: vpaddb %ymm3, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc3]
251 ; X64-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
252 ; X64-NEXT: retq # encoding: [0xc3]
253 %res = call <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
254 %res1 = call <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> zeroinitializer, <32 x i8> %x2, i32 %x3)
255 %res2 = call <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
256 %res3 = add <32 x i8> %res, %res1
257 %res4 = add <32 x i8> %res3, %res2
261 declare <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
263 define <16 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
264 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_128:
266 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
267 ; X86-NEXT: vpermi2b %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x75,0xc2]
268 ; X86-NEXT: retl # encoding: [0xc3]
270 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_128:
272 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
273 ; X64-NEXT: vpermi2b %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x75,0xc2]
274 ; X64-NEXT: retq # encoding: [0xc3]
275 %res = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
279 declare <32 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
281 define <32 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
282 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_256:
284 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
285 ; X86-NEXT: vpermi2b %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x75,0xc2]
286 ; X86-NEXT: retl # encoding: [0xc3]
288 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_256:
290 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
291 ; X64-NEXT: vpermi2b %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x75,0xc2]
292 ; X64-NEXT: retq # encoding: [0xc3]
293 %res = call <32 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)