1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl,+gfni,+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86BW
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+gfni,+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64BW
4 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl,+gfni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86NOBW
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+gfni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64NOBW
7 declare <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8>, <16 x i8>, i8)
8 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_vgf2p8affineinvqb_128(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %passthru, i16 %mask) {
9 ; X86BW-LABEL: test_vgf2p8affineinvqb_128:
11 ; X86BW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
12 ; X86BW-NEXT: vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xd9,0x03]
13 ; X86BW-NEXT: vgf2p8affineinvqb $4, %xmm1, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xcf,0xe1,0x04]
14 ; X86BW-NEXT: vgf2p8affineinvqb $5, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0xcf,0xd1,0x05]
15 ; X86BW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
16 ; X86BW-NEXT: vmovdqa %xmm4, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcc]
17 ; X86BW-NEXT: retl # encoding: [0xc3]
19 ; X64BW-LABEL: test_vgf2p8affineinvqb_128:
21 ; X64BW-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
22 ; X64BW-NEXT: vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xd9,0x03]
23 ; X64BW-NEXT: vgf2p8affineinvqb $4, %xmm1, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xcf,0xe1,0x04]
24 ; X64BW-NEXT: vgf2p8affineinvqb $5, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0xcf,0xd1,0x05]
25 ; X64BW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
26 ; X64BW-NEXT: vmovdqa %xmm4, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcc]
27 ; X64BW-NEXT: retq # encoding: [0xc3]
29 ; X86NOBW-LABEL: test_vgf2p8affineinvqb_128:
31 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
32 ; X86NOBW-NEXT: vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xd9,0x03]
33 ; X86NOBW-NEXT: vgf2p8affineinvqb $4, %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xe1,0x04]
34 ; X86NOBW-NEXT: vgf2p8affineinvqb $5, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xc1,0x05]
35 ; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
36 ; X86NOBW-NEXT: # zmm1 {%k1} {z} = -1
37 ; X86NOBW-NEXT: vpmovdb %zmm1, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xcd]
38 ; X86NOBW-NEXT: vpand %xmm4, %xmm5, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xd1,0xdb,0xcc]
39 ; X86NOBW-NEXT: vpternlogq $184, %xmm0, %xmm5, %xmm2 # encoding: [0x62,0xf3,0xd5,0x08,0x25,0xd0,0xb8]
40 ; X86NOBW-NEXT: # xmm2 = xmm2 ^ (xmm5 & (xmm2 ^ xmm0))
41 ; X86NOBW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
42 ; X86NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
43 ; X86NOBW-NEXT: retl # encoding: [0xc3]
45 ; X64NOBW-LABEL: test_vgf2p8affineinvqb_128:
47 ; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
48 ; X64NOBW-NEXT: vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xd9,0x03]
49 ; X64NOBW-NEXT: vgf2p8affineinvqb $4, %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xe1,0x04]
50 ; X64NOBW-NEXT: vgf2p8affineinvqb $5, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xc1,0x05]
51 ; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
52 ; X64NOBW-NEXT: # zmm1 {%k1} {z} = -1
53 ; X64NOBW-NEXT: vpmovdb %zmm1, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xcd]
54 ; X64NOBW-NEXT: vpand %xmm4, %xmm5, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xd1,0xdb,0xcc]
55 ; X64NOBW-NEXT: vpternlogq $184, %xmm0, %xmm5, %xmm2 # encoding: [0x62,0xf3,0xd5,0x08,0x25,0xd0,0xb8]
56 ; X64NOBW-NEXT: # xmm2 = xmm2 ^ (xmm5 & (xmm2 ^ xmm0))
57 ; X64NOBW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
58 ; X64NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
59 ; X64NOBW-NEXT: retq # encoding: [0xc3]
60 %1 = bitcast i16 %mask to <16 x i1>
61 %2 = call <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 3)
62 %3 = call <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 4)
63 %4 = call <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 5)
64 %5 = select <16 x i1> %1, <16 x i8> %3, <16 x i8> zeroinitializer
65 %6 = select <16 x i1> %1, <16 x i8> %4, <16 x i8> %passthru
66 %7 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } undef, <16 x i8> %2, 0
67 %8 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %7, <16 x i8> %5, 1
68 %9 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %8, <16 x i8> %6, 2
69 ret { <16 x i8>, <16 x i8>, <16 x i8> } %9
72 declare <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8>, <32 x i8>, i8)
73 define { <32 x i8>, <32 x i8>, <32 x i8> } @test_vgf2p8affineinvqb_256(<32 x i8> %src1, <32 x i8> %src2, <32 x i8> %passthru, i32 %mask) {
74 ; X86BW-LABEL: test_vgf2p8affineinvqb_256:
76 ; X86BW-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
77 ; X86BW-NEXT: vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xd9,0x03]
78 ; X86BW-NEXT: vgf2p8affineinvqb $4, %ymm1, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xcf,0xe1,0x04]
79 ; X86BW-NEXT: vgf2p8affineinvqb $5, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0xcf,0xd1,0x05]
80 ; X86BW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
81 ; X86BW-NEXT: vmovdqa %ymm4, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcc]
82 ; X86BW-NEXT: retl # encoding: [0xc3]
84 ; X64BW-LABEL: test_vgf2p8affineinvqb_256:
86 ; X64BW-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
87 ; X64BW-NEXT: vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xd9,0x03]
88 ; X64BW-NEXT: vgf2p8affineinvqb $4, %ymm1, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xcf,0xe1,0x04]
89 ; X64BW-NEXT: vgf2p8affineinvqb $5, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0xcf,0xd1,0x05]
90 ; X64BW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
91 ; X64BW-NEXT: vmovdqa %ymm4, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcc]
92 ; X64BW-NEXT: retq # encoding: [0xc3]
94 ; X86NOBW-LABEL: test_vgf2p8affineinvqb_256:
96 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
97 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x06]
98 ; X86NOBW-NEXT: vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xd9,0x03]
99 ; X86NOBW-NEXT: vgf2p8affineinvqb $4, %ymm1, %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xe1,0x04]
100 ; X86NOBW-NEXT: vgf2p8affineinvqb $5, %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xc1,0x05]
101 ; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
102 ; X86NOBW-NEXT: # zmm1 {%k1} {z} = -1
103 ; X86NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
104 ; X86NOBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z} # encoding: [0x62,0xf3,0x55,0xca,0x25,0xed,0xff]
105 ; X86NOBW-NEXT: # zmm5 {%k2} {z} = -1
106 ; X86NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
107 ; X86NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xed,0x01]
108 ; X86NOBW-NEXT: vpand %ymm4, %ymm5, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xd5,0xdb,0xcc]
109 ; X86NOBW-NEXT: vpternlogq $184, %ymm0, %ymm5, %ymm2 # encoding: [0x62,0xf3,0xd5,0x28,0x25,0xd0,0xb8]
110 ; X86NOBW-NEXT: # ymm2 = ymm2 ^ (ymm5 & (ymm2 ^ ymm0))
111 ; X86NOBW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
112 ; X86NOBW-NEXT: retl # encoding: [0xc3]
114 ; X64NOBW-LABEL: test_vgf2p8affineinvqb_256:
116 ; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
117 ; X64NOBW-NEXT: shrl $16, %edi # encoding: [0xc1,0xef,0x10]
118 ; X64NOBW-NEXT: kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
119 ; X64NOBW-NEXT: vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xd9,0x03]
120 ; X64NOBW-NEXT: vgf2p8affineinvqb $4, %ymm1, %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xe1,0x04]
121 ; X64NOBW-NEXT: vgf2p8affineinvqb $5, %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xc1,0x05]
122 ; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
123 ; X64NOBW-NEXT: # zmm1 {%k1} {z} = -1
124 ; X64NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
125 ; X64NOBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z} # encoding: [0x62,0xf3,0x55,0xca,0x25,0xed,0xff]
126 ; X64NOBW-NEXT: # zmm5 {%k2} {z} = -1
127 ; X64NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
128 ; X64NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xed,0x01]
129 ; X64NOBW-NEXT: vpand %ymm4, %ymm5, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xd5,0xdb,0xcc]
130 ; X64NOBW-NEXT: vpternlogq $184, %ymm0, %ymm5, %ymm2 # encoding: [0x62,0xf3,0xd5,0x28,0x25,0xd0,0xb8]
131 ; X64NOBW-NEXT: # ymm2 = ymm2 ^ (ymm5 & (ymm2 ^ ymm0))
132 ; X64NOBW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
133 ; X64NOBW-NEXT: retq # encoding: [0xc3]
134 %1 = bitcast i32 %mask to <32 x i1>
135 %2 = call <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 3)
136 %3 = call <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 4)
137 %4 = call <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 5)
138 %5 = select <32 x i1> %1, <32 x i8> %3, <32 x i8> zeroinitializer
139 %6 = select <32 x i1> %1, <32 x i8> %4, <32 x i8> %passthru
140 %7 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } undef, <32 x i8> %2, 0
141 %8 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %7, <32 x i8> %5, 1
142 %9 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %8, <32 x i8> %6, 2
143 ret { <32 x i8>, <32 x i8>, <32 x i8> } %9
146 declare <64 x i8> @llvm.x86.vgf2p8affineinvqb.512(<64 x i8>, <64 x i8>, i8)
147 define { <64 x i8>, <64 x i8>, <64 x i8> } @test_vgf2p8affineinvqb_512(<64 x i8> %src1, <64 x i8> %src2, <64 x i8> %passthru, i64 %mask) {
148 ; X86BW-LABEL: test_vgf2p8affineinvqb_512:
150 ; X86BW-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
151 ; X86BW-NEXT: vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xd9,0x03]
152 ; X86BW-NEXT: vgf2p8affineinvqb $4, %zmm1, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xcf,0xe1,0x04]
153 ; X86BW-NEXT: vgf2p8affineinvqb $5, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0xcf,0xd1,0x05]
154 ; X86BW-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
155 ; X86BW-NEXT: vmovdqa64 %zmm4, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcc]
156 ; X86BW-NEXT: retl # encoding: [0xc3]
158 ; X64BW-LABEL: test_vgf2p8affineinvqb_512:
160 ; X64BW-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
161 ; X64BW-NEXT: vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xd9,0x03]
162 ; X64BW-NEXT: vgf2p8affineinvqb $4, %zmm1, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xcf,0xe1,0x04]
163 ; X64BW-NEXT: vgf2p8affineinvqb $5, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0xcf,0xd1,0x05]
164 ; X64BW-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
165 ; X64BW-NEXT: vmovdqa64 %zmm4, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcc]
166 ; X64BW-NEXT: retq # encoding: [0xc3]
168 ; X86NOBW-LABEL: test_vgf2p8affineinvqb_512:
170 ; X86NOBW-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
171 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x04]
172 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x06]
173 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k3 # encoding: [0xc5,0xf8,0x90,0x5c,0x24,0x0a]
174 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k4 # encoding: [0xc5,0xf8,0x90,0x64,0x24,0x08]
175 ; X86NOBW-NEXT: vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xc1,0x03]
176 ; X86NOBW-NEXT: vgf2p8affineinvqb $4, %zmm1, %zmm3, %zmm4 # encoding: [0x62,0xf3,0xe5,0x48,0xcf,0xe1,0x04]
177 ; X86NOBW-NEXT: vgf2p8affineinvqb $5, %zmm1, %zmm3, %zmm3 # encoding: [0x62,0xf3,0xe5,0x48,0xcf,0xd9,0x05]
178 ; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z} # encoding: [0x62,0xf3,0x75,0xcc,0x25,0xc9,0xff]
179 ; X86NOBW-NEXT: # zmm1 {%k4} {z} = -1
180 ; X86NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
181 ; X86NOBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k3} {z} # encoding: [0x62,0xf3,0x55,0xcb,0x25,0xed,0xff]
182 ; X86NOBW-NEXT: # zmm5 {%k3} {z} = -1
183 ; X86NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
184 ; X86NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xcd,0x01]
185 ; X86NOBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z} # encoding: [0x62,0xf3,0x55,0xca,0x25,0xed,0xff]
186 ; X86NOBW-NEXT: # zmm5 {%k2} {z} = -1
187 ; X86NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
188 ; X86NOBW-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k1} {z} # encoding: [0x62,0xf3,0x4d,0xc9,0x25,0xf6,0xff]
189 ; X86NOBW-NEXT: # zmm6 {%k1} {z} = -1
190 ; X86NOBW-NEXT: vpmovdb %zmm6, %xmm6 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xf6]
191 ; X86NOBW-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x55,0x38,0xee,0x01]
192 ; X86NOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm5, %zmm5 # encoding: [0x62,0xf3,0xd5,0x48,0x3a,0xe9,0x01]
193 ; X86NOBW-NEXT: vpandq %zmm4, %zmm5, %zmm1 # encoding: [0x62,0xf1,0xd5,0x48,0xdb,0xcc]
194 ; X86NOBW-NEXT: vpternlogq $184, %zmm3, %zmm5, %zmm2 # encoding: [0x62,0xf3,0xd5,0x48,0x25,0xd3,0xb8]
195 ; X86NOBW-NEXT: # zmm2 = zmm2 ^ (zmm5 & (zmm2 ^ zmm3))
196 ; X86NOBW-NEXT: retl # encoding: [0xc3]
198 ; X64NOBW-LABEL: test_vgf2p8affineinvqb_512:
200 ; X64NOBW-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
201 ; X64NOBW-NEXT: movl %edi, %ecx # encoding: [0x89,0xf9]
202 ; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
203 ; X64NOBW-NEXT: shrq $32, %rdi # encoding: [0x48,0xc1,0xef,0x20]
204 ; X64NOBW-NEXT: shrq $48, %rax # encoding: [0x48,0xc1,0xe8,0x30]
205 ; X64NOBW-NEXT: shrl $16, %ecx # encoding: [0xc1,0xe9,0x10]
206 ; X64NOBW-NEXT: kmovw %ecx, %k2 # encoding: [0xc5,0xf8,0x92,0xd1]
207 ; X64NOBW-NEXT: kmovw %eax, %k3 # encoding: [0xc5,0xf8,0x92,0xd8]
208 ; X64NOBW-NEXT: kmovw %edi, %k4 # encoding: [0xc5,0xf8,0x92,0xe7]
209 ; X64NOBW-NEXT: vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xd9,0x03]
210 ; X64NOBW-NEXT: vgf2p8affineinvqb $4, %zmm1, %zmm0, %zmm4 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xe1,0x04]
211 ; X64NOBW-NEXT: vgf2p8affineinvqb $5, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xc1,0x05]
212 ; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z} # encoding: [0x62,0xf3,0x75,0xcc,0x25,0xc9,0xff]
213 ; X64NOBW-NEXT: # zmm1 {%k4} {z} = -1
214 ; X64NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
215 ; X64NOBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k3} {z} # encoding: [0x62,0xf3,0x55,0xcb,0x25,0xed,0xff]
216 ; X64NOBW-NEXT: # zmm5 {%k3} {z} = -1
217 ; X64NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
218 ; X64NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xcd,0x01]
219 ; X64NOBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z} # encoding: [0x62,0xf3,0x55,0xc9,0x25,0xed,0xff]
220 ; X64NOBW-NEXT: # zmm5 {%k1} {z} = -1
221 ; X64NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
222 ; X64NOBW-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z} # encoding: [0x62,0xf3,0x4d,0xca,0x25,0xf6,0xff]
223 ; X64NOBW-NEXT: # zmm6 {%k2} {z} = -1
224 ; X64NOBW-NEXT: vpmovdb %zmm6, %xmm6 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xf6]
225 ; X64NOBW-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x55,0x38,0xee,0x01]
226 ; X64NOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm5, %zmm5 # encoding: [0x62,0xf3,0xd5,0x48,0x3a,0xe9,0x01]
227 ; X64NOBW-NEXT: vpandq %zmm4, %zmm5, %zmm1 # encoding: [0x62,0xf1,0xd5,0x48,0xdb,0xcc]
228 ; X64NOBW-NEXT: vpternlogq $184, %zmm0, %zmm5, %zmm2 # encoding: [0x62,0xf3,0xd5,0x48,0x25,0xd0,0xb8]
229 ; X64NOBW-NEXT: # zmm2 = zmm2 ^ (zmm5 & (zmm2 ^ zmm0))
230 ; X64NOBW-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
231 ; X64NOBW-NEXT: retq # encoding: [0xc3]
232 %1 = bitcast i64 %mask to <64 x i1>
233 %2 = call <64 x i8> @llvm.x86.vgf2p8affineinvqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 3)
234 %3 = call <64 x i8> @llvm.x86.vgf2p8affineinvqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 4)
235 %4 = call <64 x i8> @llvm.x86.vgf2p8affineinvqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 5)
236 %5 = select <64 x i1> %1, <64 x i8> %3, <64 x i8> zeroinitializer
237 %6 = select <64 x i1> %1, <64 x i8> %4, <64 x i8> %passthru
238 %7 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } undef, <64 x i8> %2, 0
239 %8 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %7, <64 x i8> %5, 1
240 %9 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %8, <64 x i8> %6, 2
241 ret { <64 x i8>, <64 x i8>, <64 x i8> } %9
244 declare <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8>, <16 x i8>, i8)
245 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_vgf2p8affineqb_128(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %passthru, i16 %mask) {
246 ; X86BW-LABEL: test_vgf2p8affineqb_128:
248 ; X86BW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
249 ; X86BW-NEXT: vgf2p8affineqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xd9,0x03]
250 ; X86BW-NEXT: vgf2p8affineqb $4, %xmm1, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xce,0xe1,0x04]
251 ; X86BW-NEXT: vgf2p8affineqb $5, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0xce,0xd1,0x05]
252 ; X86BW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
253 ; X86BW-NEXT: vmovdqa %xmm4, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcc]
254 ; X86BW-NEXT: retl # encoding: [0xc3]
256 ; X64BW-LABEL: test_vgf2p8affineqb_128:
258 ; X64BW-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
259 ; X64BW-NEXT: vgf2p8affineqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xd9,0x03]
260 ; X64BW-NEXT: vgf2p8affineqb $4, %xmm1, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xce,0xe1,0x04]
261 ; X64BW-NEXT: vgf2p8affineqb $5, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0xce,0xd1,0x05]
262 ; X64BW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
263 ; X64BW-NEXT: vmovdqa %xmm4, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcc]
264 ; X64BW-NEXT: retq # encoding: [0xc3]
266 ; X86NOBW-LABEL: test_vgf2p8affineqb_128:
268 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
269 ; X86NOBW-NEXT: vgf2p8affineqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xd9,0x03]
270 ; X86NOBW-NEXT: vgf2p8affineqb $4, %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xe1,0x04]
271 ; X86NOBW-NEXT: vgf2p8affineqb $5, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xc1,0x05]
272 ; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
273 ; X86NOBW-NEXT: # zmm1 {%k1} {z} = -1
274 ; X86NOBW-NEXT: vpmovdb %zmm1, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xcd]
275 ; X86NOBW-NEXT: vpand %xmm4, %xmm5, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xd1,0xdb,0xcc]
276 ; X86NOBW-NEXT: vpternlogq $184, %xmm0, %xmm5, %xmm2 # encoding: [0x62,0xf3,0xd5,0x08,0x25,0xd0,0xb8]
277 ; X86NOBW-NEXT: # xmm2 = xmm2 ^ (xmm5 & (xmm2 ^ xmm0))
278 ; X86NOBW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
279 ; X86NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
280 ; X86NOBW-NEXT: retl # encoding: [0xc3]
282 ; X64NOBW-LABEL: test_vgf2p8affineqb_128:
284 ; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
285 ; X64NOBW-NEXT: vgf2p8affineqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xd9,0x03]
286 ; X64NOBW-NEXT: vgf2p8affineqb $4, %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xe1,0x04]
287 ; X64NOBW-NEXT: vgf2p8affineqb $5, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xc1,0x05]
288 ; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
289 ; X64NOBW-NEXT: # zmm1 {%k1} {z} = -1
290 ; X64NOBW-NEXT: vpmovdb %zmm1, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xcd]
291 ; X64NOBW-NEXT: vpand %xmm4, %xmm5, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xd1,0xdb,0xcc]
292 ; X64NOBW-NEXT: vpternlogq $184, %xmm0, %xmm5, %xmm2 # encoding: [0x62,0xf3,0xd5,0x08,0x25,0xd0,0xb8]
293 ; X64NOBW-NEXT: # xmm2 = xmm2 ^ (xmm5 & (xmm2 ^ xmm0))
294 ; X64NOBW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
295 ; X64NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
296 ; X64NOBW-NEXT: retq # encoding: [0xc3]
297 %1 = bitcast i16 %mask to <16 x i1>
298 %2 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 3)
299 %3 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 4)
300 %4 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 5)
301 %5 = select <16 x i1> %1, <16 x i8> %3, <16 x i8> zeroinitializer
302 %6 = select <16 x i1> %1, <16 x i8> %4, <16 x i8> %passthru
303 %7 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } undef, <16 x i8> %2, 0
304 %8 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %7, <16 x i8> %5, 1
305 %9 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %8, <16 x i8> %6, 2
306 ret { <16 x i8>, <16 x i8>, <16 x i8> } %9
309 declare <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8>, <32 x i8>, i8)
310 define { <32 x i8>, <32 x i8>, <32 x i8> } @test_vgf2p8affineqb_256(<32 x i8> %src1, <32 x i8> %src2, <32 x i8> %passthru, i32 %mask) {
311 ; X86BW-LABEL: test_vgf2p8affineqb_256:
313 ; X86BW-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
314 ; X86BW-NEXT: vgf2p8affineqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xd9,0x03]
315 ; X86BW-NEXT: vgf2p8affineqb $4, %ymm1, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xce,0xe1,0x04]
316 ; X86BW-NEXT: vgf2p8affineqb $5, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0xce,0xd1,0x05]
317 ; X86BW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
318 ; X86BW-NEXT: vmovdqa %ymm4, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcc]
319 ; X86BW-NEXT: retl # encoding: [0xc3]
321 ; X64BW-LABEL: test_vgf2p8affineqb_256:
323 ; X64BW-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
324 ; X64BW-NEXT: vgf2p8affineqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xd9,0x03]
325 ; X64BW-NEXT: vgf2p8affineqb $4, %ymm1, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xce,0xe1,0x04]
326 ; X64BW-NEXT: vgf2p8affineqb $5, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0xce,0xd1,0x05]
327 ; X64BW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
328 ; X64BW-NEXT: vmovdqa %ymm4, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcc]
329 ; X64BW-NEXT: retq # encoding: [0xc3]
331 ; X86NOBW-LABEL: test_vgf2p8affineqb_256:
333 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
334 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x06]
335 ; X86NOBW-NEXT: vgf2p8affineqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xd9,0x03]
336 ; X86NOBW-NEXT: vgf2p8affineqb $4, %ymm1, %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xe1,0x04]
337 ; X86NOBW-NEXT: vgf2p8affineqb $5, %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xc1,0x05]
338 ; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
339 ; X86NOBW-NEXT: # zmm1 {%k1} {z} = -1
340 ; X86NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
341 ; X86NOBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z} # encoding: [0x62,0xf3,0x55,0xca,0x25,0xed,0xff]
342 ; X86NOBW-NEXT: # zmm5 {%k2} {z} = -1
343 ; X86NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
344 ; X86NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xed,0x01]
345 ; X86NOBW-NEXT: vpand %ymm4, %ymm5, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xd5,0xdb,0xcc]
346 ; X86NOBW-NEXT: vpternlogq $184, %ymm0, %ymm5, %ymm2 # encoding: [0x62,0xf3,0xd5,0x28,0x25,0xd0,0xb8]
347 ; X86NOBW-NEXT: # ymm2 = ymm2 ^ (ymm5 & (ymm2 ^ ymm0))
348 ; X86NOBW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
349 ; X86NOBW-NEXT: retl # encoding: [0xc3]
351 ; X64NOBW-LABEL: test_vgf2p8affineqb_256:
353 ; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
354 ; X64NOBW-NEXT: shrl $16, %edi # encoding: [0xc1,0xef,0x10]
355 ; X64NOBW-NEXT: kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
356 ; X64NOBW-NEXT: vgf2p8affineqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xd9,0x03]
357 ; X64NOBW-NEXT: vgf2p8affineqb $4, %ymm1, %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xe1,0x04]
358 ; X64NOBW-NEXT: vgf2p8affineqb $5, %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xc1,0x05]
359 ; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
360 ; X64NOBW-NEXT: # zmm1 {%k1} {z} = -1
361 ; X64NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
362 ; X64NOBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z} # encoding: [0x62,0xf3,0x55,0xca,0x25,0xed,0xff]
363 ; X64NOBW-NEXT: # zmm5 {%k2} {z} = -1
364 ; X64NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
365 ; X64NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xed,0x01]
366 ; X64NOBW-NEXT: vpand %ymm4, %ymm5, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xd5,0xdb,0xcc]
367 ; X64NOBW-NEXT: vpternlogq $184, %ymm0, %ymm5, %ymm2 # encoding: [0x62,0xf3,0xd5,0x28,0x25,0xd0,0xb8]
368 ; X64NOBW-NEXT: # ymm2 = ymm2 ^ (ymm5 & (ymm2 ^ ymm0))
369 ; X64NOBW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
370 ; X64NOBW-NEXT: retq # encoding: [0xc3]
371 %1 = bitcast i32 %mask to <32 x i1>
372 %2 = call <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 3)
373 %3 = call <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 4)
374 %4 = call <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 5)
375 %5 = select <32 x i1> %1, <32 x i8> %3, <32 x i8> zeroinitializer
376 %6 = select <32 x i1> %1, <32 x i8> %4, <32 x i8> %passthru
377 %7 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } undef, <32 x i8> %2, 0
378 %8 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %7, <32 x i8> %5, 1
379 %9 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %8, <32 x i8> %6, 2
380 ret { <32 x i8>, <32 x i8>, <32 x i8> } %9
383 declare <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8>, <64 x i8>, i8)
384 define { <64 x i8>, <64 x i8>, <64 x i8> } @test_vgf2p8affineqb_512(<64 x i8> %src1, <64 x i8> %src2, <64 x i8> %passthru, i64 %mask) {
385 ; X86BW-LABEL: test_vgf2p8affineqb_512:
387 ; X86BW-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
388 ; X86BW-NEXT: vgf2p8affineqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xd9,0x03]
389 ; X86BW-NEXT: vgf2p8affineqb $4, %zmm1, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xce,0xe1,0x04]
390 ; X86BW-NEXT: vgf2p8affineqb $5, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0xce,0xd1,0x05]
391 ; X86BW-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
392 ; X86BW-NEXT: vmovdqa64 %zmm4, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcc]
393 ; X86BW-NEXT: retl # encoding: [0xc3]
395 ; X64BW-LABEL: test_vgf2p8affineqb_512:
397 ; X64BW-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
398 ; X64BW-NEXT: vgf2p8affineqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xd9,0x03]
399 ; X64BW-NEXT: vgf2p8affineqb $4, %zmm1, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xce,0xe1,0x04]
400 ; X64BW-NEXT: vgf2p8affineqb $5, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0xce,0xd1,0x05]
401 ; X64BW-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
402 ; X64BW-NEXT: vmovdqa64 %zmm4, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcc]
403 ; X64BW-NEXT: retq # encoding: [0xc3]
405 ; X86NOBW-LABEL: test_vgf2p8affineqb_512:
407 ; X86NOBW-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
408 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x04]
409 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x06]
410 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k3 # encoding: [0xc5,0xf8,0x90,0x5c,0x24,0x0a]
411 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k4 # encoding: [0xc5,0xf8,0x90,0x64,0x24,0x08]
412 ; X86NOBW-NEXT: vgf2p8affineqb $3, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xc1,0x03]
413 ; X86NOBW-NEXT: vgf2p8affineqb $4, %zmm1, %zmm3, %zmm4 # encoding: [0x62,0xf3,0xe5,0x48,0xce,0xe1,0x04]
414 ; X86NOBW-NEXT: vgf2p8affineqb $5, %zmm1, %zmm3, %zmm3 # encoding: [0x62,0xf3,0xe5,0x48,0xce,0xd9,0x05]
415 ; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z} # encoding: [0x62,0xf3,0x75,0xcc,0x25,0xc9,0xff]
416 ; X86NOBW-NEXT: # zmm1 {%k4} {z} = -1
417 ; X86NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
418 ; X86NOBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k3} {z} # encoding: [0x62,0xf3,0x55,0xcb,0x25,0xed,0xff]
419 ; X86NOBW-NEXT: # zmm5 {%k3} {z} = -1
420 ; X86NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
421 ; X86NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xcd,0x01]
422 ; X86NOBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z} # encoding: [0x62,0xf3,0x55,0xca,0x25,0xed,0xff]
423 ; X86NOBW-NEXT: # zmm5 {%k2} {z} = -1
424 ; X86NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
425 ; X86NOBW-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k1} {z} # encoding: [0x62,0xf3,0x4d,0xc9,0x25,0xf6,0xff]
426 ; X86NOBW-NEXT: # zmm6 {%k1} {z} = -1
427 ; X86NOBW-NEXT: vpmovdb %zmm6, %xmm6 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xf6]
428 ; X86NOBW-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x55,0x38,0xee,0x01]
429 ; X86NOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm5, %zmm5 # encoding: [0x62,0xf3,0xd5,0x48,0x3a,0xe9,0x01]
430 ; X86NOBW-NEXT: vpandq %zmm4, %zmm5, %zmm1 # encoding: [0x62,0xf1,0xd5,0x48,0xdb,0xcc]
431 ; X86NOBW-NEXT: vpternlogq $184, %zmm3, %zmm5, %zmm2 # encoding: [0x62,0xf3,0xd5,0x48,0x25,0xd3,0xb8]
432 ; X86NOBW-NEXT: # zmm2 = zmm2 ^ (zmm5 & (zmm2 ^ zmm3))
433 ; X86NOBW-NEXT: retl # encoding: [0xc3]
435 ; X64NOBW-LABEL: test_vgf2p8affineqb_512:
437 ; X64NOBW-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
438 ; X64NOBW-NEXT: movl %edi, %ecx # encoding: [0x89,0xf9]
439 ; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
440 ; X64NOBW-NEXT: shrq $32, %rdi # encoding: [0x48,0xc1,0xef,0x20]
441 ; X64NOBW-NEXT: shrq $48, %rax # encoding: [0x48,0xc1,0xe8,0x30]
442 ; X64NOBW-NEXT: shrl $16, %ecx # encoding: [0xc1,0xe9,0x10]
443 ; X64NOBW-NEXT: kmovw %ecx, %k2 # encoding: [0xc5,0xf8,0x92,0xd1]
444 ; X64NOBW-NEXT: kmovw %eax, %k3 # encoding: [0xc5,0xf8,0x92,0xd8]
445 ; X64NOBW-NEXT: kmovw %edi, %k4 # encoding: [0xc5,0xf8,0x92,0xe7]
446 ; X64NOBW-NEXT: vgf2p8affineqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xd9,0x03]
447 ; X64NOBW-NEXT: vgf2p8affineqb $4, %zmm1, %zmm0, %zmm4 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xe1,0x04]
448 ; X64NOBW-NEXT: vgf2p8affineqb $5, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xc1,0x05]
449 ; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z} # encoding: [0x62,0xf3,0x75,0xcc,0x25,0xc9,0xff]
450 ; X64NOBW-NEXT: # zmm1 {%k4} {z} = -1
451 ; X64NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
452 ; X64NOBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k3} {z} # encoding: [0x62,0xf3,0x55,0xcb,0x25,0xed,0xff]
453 ; X64NOBW-NEXT: # zmm5 {%k3} {z} = -1
454 ; X64NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
455 ; X64NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xcd,0x01]
456 ; X64NOBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z} # encoding: [0x62,0xf3,0x55,0xc9,0x25,0xed,0xff]
457 ; X64NOBW-NEXT: # zmm5 {%k1} {z} = -1
458 ; X64NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
459 ; X64NOBW-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z} # encoding: [0x62,0xf3,0x4d,0xca,0x25,0xf6,0xff]
460 ; X64NOBW-NEXT: # zmm6 {%k2} {z} = -1
461 ; X64NOBW-NEXT: vpmovdb %zmm6, %xmm6 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xf6]
462 ; X64NOBW-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x55,0x38,0xee,0x01]
463 ; X64NOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm5, %zmm5 # encoding: [0x62,0xf3,0xd5,0x48,0x3a,0xe9,0x01]
464 ; X64NOBW-NEXT: vpandq %zmm4, %zmm5, %zmm1 # encoding: [0x62,0xf1,0xd5,0x48,0xdb,0xcc]
465 ; X64NOBW-NEXT: vpternlogq $184, %zmm0, %zmm5, %zmm2 # encoding: [0x62,0xf3,0xd5,0x48,0x25,0xd0,0xb8]
466 ; X64NOBW-NEXT: # zmm2 = zmm2 ^ (zmm5 & (zmm2 ^ zmm0))
467 ; X64NOBW-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
468 ; X64NOBW-NEXT: retq # encoding: [0xc3]
469 %1 = bitcast i64 %mask to <64 x i1>
470 %2 = call <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 3)
471 %3 = call <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 4)
472 %4 = call <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 5)
473 %5 = select <64 x i1> %1, <64 x i8> %3, <64 x i8> zeroinitializer
474 %6 = select <64 x i1> %1, <64 x i8> %4, <64 x i8> %passthru
475 %7 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } undef, <64 x i8> %2, 0
476 %8 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %7, <64 x i8> %5, 1
477 %9 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %8, <64 x i8> %6, 2
478 ret { <64 x i8>, <64 x i8>, <64 x i8> } %9
481 declare <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8>, <16 x i8>)
482 define <16 x i8> @test_vgf2p8mulb_128(<16 x i8> %src1, <16 x i8> %src2) {
483 ; CHECK-LABEL: test_vgf2p8mulb_128:
485 ; CHECK-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc1]
486 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
487 %1 = call <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8> %src1, <16 x i8> %src2)
491 define <16 x i8> @test_vgf2p8mulb_128_mask(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %passthru, i16 %mask) {
492 ; X86BW-LABEL: test_vgf2p8mulb_128_mask:
494 ; X86BW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
495 ; X86BW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xcf,0xd1]
496 ; X86BW-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
497 ; X86BW-NEXT: retl # encoding: [0xc3]
499 ; X64BW-LABEL: test_vgf2p8mulb_128_mask:
501 ; X64BW-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
502 ; X64BW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xcf,0xd1]
503 ; X64BW-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
504 ; X64BW-NEXT: retq # encoding: [0xc3]
506 ; X86NOBW-LABEL: test_vgf2p8mulb_128_mask:
508 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
509 ; X86NOBW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc9]
510 ; X86NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
511 ; X86NOBW-NEXT: # zmm0 {%k1} {z} = -1
512 ; X86NOBW-NEXT: vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
513 ; X86NOBW-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xc2,0xca]
514 ; X86NOBW-NEXT: # xmm0 = xmm2 ^ (xmm0 & (xmm1 ^ xmm2))
515 ; X86NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
516 ; X86NOBW-NEXT: retl # encoding: [0xc3]
518 ; X64NOBW-LABEL: test_vgf2p8mulb_128_mask:
520 ; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
521 ; X64NOBW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc9]
522 ; X64NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
523 ; X64NOBW-NEXT: # zmm0 {%k1} {z} = -1
524 ; X64NOBW-NEXT: vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
525 ; X64NOBW-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xc2,0xca]
526 ; X64NOBW-NEXT: # xmm0 = xmm2 ^ (xmm0 & (xmm1 ^ xmm2))
527 ; X64NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
528 ; X64NOBW-NEXT: retq # encoding: [0xc3]
529 %1 = bitcast i16 %mask to <16 x i1>
530 %2 = call <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8> %src1, <16 x i8> %src2)
531 %3 = select <16 x i1> %1, <16 x i8> %2, <16 x i8> %passthru
535 define <16 x i8> @test_vgf2p8mulb_128_maskz(<16 x i8> %src1, <16 x i8> %src2, i16 %mask) {
536 ; X86BW-LABEL: test_vgf2p8mulb_128_maskz:
538 ; X86BW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
539 ; X86BW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0xcf,0xc1]
540 ; X86BW-NEXT: retl # encoding: [0xc3]
542 ; X64BW-LABEL: test_vgf2p8mulb_128_maskz:
544 ; X64BW-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
545 ; X64BW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0xcf,0xc1]
546 ; X64BW-NEXT: retq # encoding: [0xc3]
548 ; X86NOBW-LABEL: test_vgf2p8mulb_128_maskz:
550 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
551 ; X86NOBW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc1]
552 ; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
553 ; X86NOBW-NEXT: # zmm1 {%k1} {z} = -1
554 ; X86NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
555 ; X86NOBW-NEXT: vpand %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xdb,0xc0]
556 ; X86NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
557 ; X86NOBW-NEXT: retl # encoding: [0xc3]
559 ; X64NOBW-LABEL: test_vgf2p8mulb_128_maskz:
561 ; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
562 ; X64NOBW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc1]
563 ; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
564 ; X64NOBW-NEXT: # zmm1 {%k1} {z} = -1
565 ; X64NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
566 ; X64NOBW-NEXT: vpand %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xdb,0xc0]
567 ; X64NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
568 ; X64NOBW-NEXT: retq # encoding: [0xc3]
569 %1 = bitcast i16 %mask to <16 x i1>
570 %2 = call <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8> %src1, <16 x i8> %src2)
571 %3 = select <16 x i1> %1, <16 x i8> %2, <16 x i8> zeroinitializer
575 declare <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8>, <32 x i8>)
576 define <32 x i8> @test_vgf2p8mulb_256(<32 x i8> %src1, <32 x i8> %src2) {
577 ; CHECK-LABEL: test_vgf2p8mulb_256:
579 ; CHECK-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc1]
580 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
581 %1 = call <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8> %src1, <32 x i8> %src2)
585 define <32 x i8> @test_vgf2p8mulb_256_mask(<32 x i8> %src1, <32 x i8> %src2, <32 x i8> %passthru, i32 %mask) {
586 ; X86BW-LABEL: test_vgf2p8mulb_256_mask:
588 ; X86BW-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
589 ; X86BW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xcf,0xd1]
590 ; X86BW-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
591 ; X86BW-NEXT: retl # encoding: [0xc3]
593 ; X64BW-LABEL: test_vgf2p8mulb_256_mask:
595 ; X64BW-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
596 ; X64BW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xcf,0xd1]
597 ; X64BW-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
598 ; X64BW-NEXT: retq # encoding: [0xc3]
600 ; X86NOBW-LABEL: test_vgf2p8mulb_256_mask:
602 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
603 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x06]
604 ; X86NOBW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc9]
605 ; X86NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
606 ; X86NOBW-NEXT: # zmm0 {%k1} {z} = -1
607 ; X86NOBW-NEXT: vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
608 ; X86NOBW-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} # encoding: [0x62,0xf3,0x65,0xca,0x25,0xdb,0xff]
609 ; X86NOBW-NEXT: # zmm3 {%k2} {z} = -1
610 ; X86NOBW-NEXT: vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
611 ; X86NOBW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc3,0x01]
612 ; X86NOBW-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xc2,0xca]
613 ; X86NOBW-NEXT: # ymm0 = ymm2 ^ (ymm0 & (ymm1 ^ ymm2))
614 ; X86NOBW-NEXT: retl # encoding: [0xc3]
616 ; X64NOBW-LABEL: test_vgf2p8mulb_256_mask:
618 ; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
619 ; X64NOBW-NEXT: shrl $16, %edi # encoding: [0xc1,0xef,0x10]
620 ; X64NOBW-NEXT: kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
621 ; X64NOBW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc9]
622 ; X64NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
623 ; X64NOBW-NEXT: # zmm0 {%k1} {z} = -1
624 ; X64NOBW-NEXT: vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
625 ; X64NOBW-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} # encoding: [0x62,0xf3,0x65,0xca,0x25,0xdb,0xff]
626 ; X64NOBW-NEXT: # zmm3 {%k2} {z} = -1
627 ; X64NOBW-NEXT: vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
628 ; X64NOBW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc3,0x01]
629 ; X64NOBW-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xc2,0xca]
630 ; X64NOBW-NEXT: # ymm0 = ymm2 ^ (ymm0 & (ymm1 ^ ymm2))
631 ; X64NOBW-NEXT: retq # encoding: [0xc3]
632 %1 = bitcast i32 %mask to <32 x i1>
633 %2 = call <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8> %src1, <32 x i8> %src2)
634 %3 = select <32 x i1> %1, <32 x i8> %2, <32 x i8> %passthru
638 define <32 x i8> @test_vgf2p8mulb_256_maskz(<32 x i8> %src1, <32 x i8> %src2, i32 %mask) {
639 ; X86BW-LABEL: test_vgf2p8mulb_256_maskz:
641 ; X86BW-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
642 ; X86BW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0xcf,0xc1]
643 ; X86BW-NEXT: retl # encoding: [0xc3]
645 ; X64BW-LABEL: test_vgf2p8mulb_256_maskz:
647 ; X64BW-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
648 ; X64BW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0xcf,0xc1]
649 ; X64BW-NEXT: retq # encoding: [0xc3]
651 ; X86NOBW-LABEL: test_vgf2p8mulb_256_maskz:
653 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
654 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x06]
655 ; X86NOBW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc1]
656 ; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
657 ; X86NOBW-NEXT: # zmm1 {%k1} {z} = -1
658 ; X86NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
659 ; X86NOBW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} # encoding: [0x62,0xf3,0x6d,0xca,0x25,0xd2,0xff]
660 ; X86NOBW-NEXT: # zmm2 {%k2} {z} = -1
661 ; X86NOBW-NEXT: vpmovdb %zmm2, %xmm2 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xd2]
662 ; X86NOBW-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xca,0x01]
663 ; X86NOBW-NEXT: vpand %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xdb,0xc0]
664 ; X86NOBW-NEXT: retl # encoding: [0xc3]
666 ; X64NOBW-LABEL: test_vgf2p8mulb_256_maskz:
668 ; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
669 ; X64NOBW-NEXT: shrl $16, %edi # encoding: [0xc1,0xef,0x10]
670 ; X64NOBW-NEXT: kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
671 ; X64NOBW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc1]
672 ; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
673 ; X64NOBW-NEXT: # zmm1 {%k1} {z} = -1
674 ; X64NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
675 ; X64NOBW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} # encoding: [0x62,0xf3,0x6d,0xca,0x25,0xd2,0xff]
676 ; X64NOBW-NEXT: # zmm2 {%k2} {z} = -1
677 ; X64NOBW-NEXT: vpmovdb %zmm2, %xmm2 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xd2]
678 ; X64NOBW-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xca,0x01]
679 ; X64NOBW-NEXT: vpand %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xdb,0xc0]
680 ; X64NOBW-NEXT: retq # encoding: [0xc3]
681 %1 = bitcast i32 %mask to <32 x i1>
682 %2 = call <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8> %src1, <32 x i8> %src2)
683 %3 = select <32 x i1> %1, <32 x i8> %2, <32 x i8> zeroinitializer
687 declare <64 x i8> @llvm.x86.vgf2p8mulb.512(<64 x i8>, <64 x i8>)
688 define <64 x i8> @test_vgf2p8mulb_512(<64 x i8> %src1, <64 x i8> %src2) {
689 ; CHECK-LABEL: test_vgf2p8mulb_512:
691 ; CHECK-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0xcf,0xc1]
692 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
693 %1 = call <64 x i8> @llvm.x86.vgf2p8mulb.512(<64 x i8> %src1, <64 x i8> %src2)
697 define <64 x i8> @test_vgf2p8mulb_512_mask(<64 x i8> %src1, <64 x i8> %src2, <64 x i8> %passthru, i64 %mask) {
698 ; X86BW-LABEL: test_vgf2p8mulb_512_mask:
700 ; X86BW-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
701 ; X86BW-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xcf,0xd1]
702 ; X86BW-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
703 ; X86BW-NEXT: retl # encoding: [0xc3]
705 ; X64BW-LABEL: test_vgf2p8mulb_512_mask:
707 ; X64BW-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
708 ; X64BW-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xcf,0xd1]
709 ; X64BW-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
710 ; X64BW-NEXT: retq # encoding: [0xc3]
712 ; X86NOBW-LABEL: test_vgf2p8mulb_512_mask:
714 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
715 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x06]
716 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k3 # encoding: [0xc5,0xf8,0x90,0x5c,0x24,0x0a]
717 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k4 # encoding: [0xc5,0xf8,0x90,0x64,0x24,0x08]
718 ; X86NOBW-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0xcf,0xc9]
719 ; X86NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z} # encoding: [0x62,0xf3,0x7d,0xcc,0x25,0xc0,0xff]
720 ; X86NOBW-NEXT: # zmm0 {%k4} {z} = -1
721 ; X86NOBW-NEXT: vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
722 ; X86NOBW-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k3} {z} # encoding: [0x62,0xf3,0x65,0xcb,0x25,0xdb,0xff]
723 ; X86NOBW-NEXT: # zmm3 {%k3} {z} = -1
724 ; X86NOBW-NEXT: vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
725 ; X86NOBW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc3,0x01]
726 ; X86NOBW-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x65,0xc9,0x25,0xdb,0xff]
727 ; X86NOBW-NEXT: # zmm3 {%k1} {z} = -1
728 ; X86NOBW-NEXT: vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
729 ; X86NOBW-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k2} {z} # encoding: [0x62,0xf3,0x5d,0xca,0x25,0xe4,0xff]
730 ; X86NOBW-NEXT: # zmm4 {%k2} {z} = -1
731 ; X86NOBW-NEXT: vpmovdb %zmm4, %xmm4 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xe4]
732 ; X86NOBW-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x65,0x38,0xdc,0x01]
733 ; X86NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0 # encoding: [0x62,0xf3,0xe5,0x48,0x3a,0xc0,0x01]
734 ; X86NOBW-NEXT: vpternlogq $202, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf3,0xf5,0x48,0x25,0xc2,0xca]
735 ; X86NOBW-NEXT: # zmm0 = zmm2 ^ (zmm0 & (zmm1 ^ zmm2))
736 ; X86NOBW-NEXT: retl # encoding: [0xc3]
738 ; X64NOBW-LABEL: test_vgf2p8mulb_512_mask:
740 ; X64NOBW-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
741 ; X64NOBW-NEXT: movl %edi, %ecx # encoding: [0x89,0xf9]
742 ; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
743 ; X64NOBW-NEXT: shrq $32, %rdi # encoding: [0x48,0xc1,0xef,0x20]
744 ; X64NOBW-NEXT: shrq $48, %rax # encoding: [0x48,0xc1,0xe8,0x30]
745 ; X64NOBW-NEXT: shrl $16, %ecx # encoding: [0xc1,0xe9,0x10]
746 ; X64NOBW-NEXT: kmovw %ecx, %k2 # encoding: [0xc5,0xf8,0x92,0xd1]
747 ; X64NOBW-NEXT: kmovw %eax, %k3 # encoding: [0xc5,0xf8,0x92,0xd8]
748 ; X64NOBW-NEXT: kmovw %edi, %k4 # encoding: [0xc5,0xf8,0x92,0xe7]
749 ; X64NOBW-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0xcf,0xc9]
750 ; X64NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z} # encoding: [0x62,0xf3,0x7d,0xcc,0x25,0xc0,0xff]
751 ; X64NOBW-NEXT: # zmm0 {%k4} {z} = -1
752 ; X64NOBW-NEXT: vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
753 ; X64NOBW-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k3} {z} # encoding: [0x62,0xf3,0x65,0xcb,0x25,0xdb,0xff]
754 ; X64NOBW-NEXT: # zmm3 {%k3} {z} = -1
755 ; X64NOBW-NEXT: vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
756 ; X64NOBW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc3,0x01]
757 ; X64NOBW-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x65,0xc9,0x25,0xdb,0xff]
758 ; X64NOBW-NEXT: # zmm3 {%k1} {z} = -1
759 ; X64NOBW-NEXT: vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
760 ; X64NOBW-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k2} {z} # encoding: [0x62,0xf3,0x5d,0xca,0x25,0xe4,0xff]
761 ; X64NOBW-NEXT: # zmm4 {%k2} {z} = -1
762 ; X64NOBW-NEXT: vpmovdb %zmm4, %xmm4 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xe4]
763 ; X64NOBW-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x65,0x38,0xdc,0x01]
764 ; X64NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0 # encoding: [0x62,0xf3,0xe5,0x48,0x3a,0xc0,0x01]
765 ; X64NOBW-NEXT: vpternlogq $202, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf3,0xf5,0x48,0x25,0xc2,0xca]
766 ; X64NOBW-NEXT: # zmm0 = zmm2 ^ (zmm0 & (zmm1 ^ zmm2))
767 ; X64NOBW-NEXT: retq # encoding: [0xc3]
768 %1 = bitcast i64 %mask to <64 x i1>
769 %2 = call <64 x i8> @llvm.x86.vgf2p8mulb.512(<64 x i8> %src1, <64 x i8> %src2)
770 %3 = select <64 x i1> %1, <64 x i8> %2, <64 x i8> %passthru
774 define <64 x i8> @test_vgf2p8mulb_512_maskz(<64 x i8> %src1, <64 x i8> %src2, i64 %mask) {
775 ; X86BW-LABEL: test_vgf2p8mulb_512_maskz:
777 ; X86BW-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
778 ; X86BW-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcf,0xc1]
779 ; X86BW-NEXT: retl # encoding: [0xc3]
781 ; X64BW-LABEL: test_vgf2p8mulb_512_maskz:
783 ; X64BW-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
784 ; X64BW-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcf,0xc1]
785 ; X64BW-NEXT: retq # encoding: [0xc3]
787 ; X86NOBW-LABEL: test_vgf2p8mulb_512_maskz:
789 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
790 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x06]
791 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k3 # encoding: [0xc5,0xf8,0x90,0x5c,0x24,0x0a]
792 ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k4 # encoding: [0xc5,0xf8,0x90,0x64,0x24,0x08]
793 ; X86NOBW-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0xcf,0xc1]
794 ; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z} # encoding: [0x62,0xf3,0x75,0xcc,0x25,0xc9,0xff]
795 ; X86NOBW-NEXT: # zmm1 {%k4} {z} = -1
796 ; X86NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
797 ; X86NOBW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k3} {z} # encoding: [0x62,0xf3,0x6d,0xcb,0x25,0xd2,0xff]
798 ; X86NOBW-NEXT: # zmm2 {%k3} {z} = -1
799 ; X86NOBW-NEXT: vpmovdb %zmm2, %xmm2 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xd2]
800 ; X86NOBW-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xca,0x01]
801 ; X86NOBW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} # encoding: [0x62,0xf3,0x6d,0xc9,0x25,0xd2,0xff]
802 ; X86NOBW-NEXT: # zmm2 {%k1} {z} = -1
803 ; X86NOBW-NEXT: vpmovdb %zmm2, %xmm2 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xd2]
804 ; X86NOBW-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} # encoding: [0x62,0xf3,0x65,0xca,0x25,0xdb,0xff]
805 ; X86NOBW-NEXT: # zmm3 {%k2} {z} = -1
806 ; X86NOBW-NEXT: vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
807 ; X86NOBW-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x6d,0x38,0xd3,0x01]
808 ; X86NOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 # encoding: [0x62,0xf3,0xed,0x48,0x3a,0xc9,0x01]
809 ; X86NOBW-NEXT: vpandq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xdb,0xc0]
810 ; X86NOBW-NEXT: retl # encoding: [0xc3]
812 ; X64NOBW-LABEL: test_vgf2p8mulb_512_maskz:
814 ; X64NOBW-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
815 ; X64NOBW-NEXT: movl %edi, %ecx # encoding: [0x89,0xf9]
816 ; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
817 ; X64NOBW-NEXT: shrq $32, %rdi # encoding: [0x48,0xc1,0xef,0x20]
818 ; X64NOBW-NEXT: shrq $48, %rax # encoding: [0x48,0xc1,0xe8,0x30]
819 ; X64NOBW-NEXT: shrl $16, %ecx # encoding: [0xc1,0xe9,0x10]
820 ; X64NOBW-NEXT: kmovw %ecx, %k2 # encoding: [0xc5,0xf8,0x92,0xd1]
821 ; X64NOBW-NEXT: kmovw %eax, %k3 # encoding: [0xc5,0xf8,0x92,0xd8]
822 ; X64NOBW-NEXT: kmovw %edi, %k4 # encoding: [0xc5,0xf8,0x92,0xe7]
823 ; X64NOBW-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0xcf,0xc1]
824 ; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z} # encoding: [0x62,0xf3,0x75,0xcc,0x25,0xc9,0xff]
825 ; X64NOBW-NEXT: # zmm1 {%k4} {z} = -1
826 ; X64NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
827 ; X64NOBW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k3} {z} # encoding: [0x62,0xf3,0x6d,0xcb,0x25,0xd2,0xff]
828 ; X64NOBW-NEXT: # zmm2 {%k3} {z} = -1
829 ; X64NOBW-NEXT: vpmovdb %zmm2, %xmm2 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xd2]
830 ; X64NOBW-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xca,0x01]
831 ; X64NOBW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} # encoding: [0x62,0xf3,0x6d,0xc9,0x25,0xd2,0xff]
832 ; X64NOBW-NEXT: # zmm2 {%k1} {z} = -1
833 ; X64NOBW-NEXT: vpmovdb %zmm2, %xmm2 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xd2]
834 ; X64NOBW-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} # encoding: [0x62,0xf3,0x65,0xca,0x25,0xdb,0xff]
835 ; X64NOBW-NEXT: # zmm3 {%k2} {z} = -1
836 ; X64NOBW-NEXT: vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
837 ; X64NOBW-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x6d,0x38,0xd3,0x01]
838 ; X64NOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 # encoding: [0x62,0xf3,0xed,0x48,0x3a,0xc9,0x01]
839 ; X64NOBW-NEXT: vpandq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xdb,0xc0]
840 ; X64NOBW-NEXT: retq # encoding: [0xc3]
841 %1 = bitcast i64 %mask to <64 x i1>
842 %2 = call <64 x i8> @llvm.x86.vgf2p8mulb.512(<64 x i8> %src1, <64 x i8> %src2)
843 %3 = select <64 x i1> %1, <64 x i8> %2, <64 x i8> zeroinitializer