[RISCV] Add shrinkwrap test cases showing gaps in current impl
[llvm-project.git] / llvm / test / CodeGen / X86 / avx512-gfni-intrinsics.ll
blob3b7803427ae5ed8ab61e6e71bc438a909b5268e9
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl,+gfni,+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86BW
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+gfni,+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64BW
4 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl,+gfni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86NOBW
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+gfni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64NOBW
7 declare <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8>, <16 x i8>, i8)
8 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_vgf2p8affineinvqb_128(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %passthru, i16 %mask) {
9 ; X86BW-LABEL: test_vgf2p8affineinvqb_128:
10 ; X86BW:       # %bb.0:
11 ; X86BW-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
12 ; X86BW-NEXT:    vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xd9,0x03]
13 ; X86BW-NEXT:    vgf2p8affineinvqb $4, %xmm1, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xcf,0xe1,0x04]
14 ; X86BW-NEXT:    vgf2p8affineinvqb $5, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0xcf,0xd1,0x05]
15 ; X86BW-NEXT:    vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
16 ; X86BW-NEXT:    vmovdqa %xmm4, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcc]
17 ; X86BW-NEXT:    retl # encoding: [0xc3]
19 ; X64BW-LABEL: test_vgf2p8affineinvqb_128:
20 ; X64BW:       # %bb.0:
21 ; X64BW-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
22 ; X64BW-NEXT:    vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xd9,0x03]
23 ; X64BW-NEXT:    vgf2p8affineinvqb $4, %xmm1, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xcf,0xe1,0x04]
24 ; X64BW-NEXT:    vgf2p8affineinvqb $5, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0xcf,0xd1,0x05]
25 ; X64BW-NEXT:    vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
26 ; X64BW-NEXT:    vmovdqa %xmm4, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcc]
27 ; X64BW-NEXT:    retq # encoding: [0xc3]
29 ; X86NOBW-LABEL: test_vgf2p8affineinvqb_128:
30 ; X86NOBW:       # %bb.0:
31 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
32 ; X86NOBW-NEXT:    vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xd9,0x03]
33 ; X86NOBW-NEXT:    vgf2p8affineinvqb $4, %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xe1,0x04]
34 ; X86NOBW-NEXT:    vgf2p8affineinvqb $5, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xc1,0x05]
35 ; X86NOBW-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
36 ; X86NOBW-NEXT:    # zmm1 {%k1} {z} = -1
37 ; X86NOBW-NEXT:    vpmovdb %zmm1, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xcd]
38 ; X86NOBW-NEXT:    vpand %xmm4, %xmm5, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xd1,0xdb,0xcc]
39 ; X86NOBW-NEXT:    vpternlogq $184, %xmm0, %xmm5, %xmm2 # encoding: [0x62,0xf3,0xd5,0x08,0x25,0xd0,0xb8]
40 ; X86NOBW-NEXT:    # xmm2 = xmm2 ^ (xmm5 & (xmm2 ^ xmm0))
41 ; X86NOBW-NEXT:    vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
42 ; X86NOBW-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
43 ; X86NOBW-NEXT:    retl # encoding: [0xc3]
45 ; X64NOBW-LABEL: test_vgf2p8affineinvqb_128:
46 ; X64NOBW:       # %bb.0:
47 ; X64NOBW-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
48 ; X64NOBW-NEXT:    vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xd9,0x03]
49 ; X64NOBW-NEXT:    vgf2p8affineinvqb $4, %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xe1,0x04]
50 ; X64NOBW-NEXT:    vgf2p8affineinvqb $5, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xc1,0x05]
51 ; X64NOBW-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
52 ; X64NOBW-NEXT:    # zmm1 {%k1} {z} = -1
53 ; X64NOBW-NEXT:    vpmovdb %zmm1, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xcd]
54 ; X64NOBW-NEXT:    vpand %xmm4, %xmm5, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xd1,0xdb,0xcc]
55 ; X64NOBW-NEXT:    vpternlogq $184, %xmm0, %xmm5, %xmm2 # encoding: [0x62,0xf3,0xd5,0x08,0x25,0xd0,0xb8]
56 ; X64NOBW-NEXT:    # xmm2 = xmm2 ^ (xmm5 & (xmm2 ^ xmm0))
57 ; X64NOBW-NEXT:    vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
58 ; X64NOBW-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
59 ; X64NOBW-NEXT:    retq # encoding: [0xc3]
60   %1 = bitcast i16 %mask to <16 x i1>
61   %2 = call <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 3)
62   %3 = call <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 4)
63   %4 = call <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 5)
64   %5 = select <16 x i1> %1, <16 x i8> %3, <16 x i8> zeroinitializer
65   %6 = select <16 x i1> %1, <16 x i8> %4, <16 x i8> %passthru
66   %7 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } undef, <16 x i8> %2, 0
67   %8 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> }    %7, <16 x i8> %5, 1
68   %9 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> }    %8, <16 x i8> %6, 2
69   ret { <16 x i8>, <16 x i8>, <16 x i8> } %9
72 declare <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8>, <32 x i8>, i8)
73 define { <32 x i8>, <32 x i8>, <32 x i8> } @test_vgf2p8affineinvqb_256(<32 x i8> %src1, <32 x i8> %src2, <32 x i8> %passthru, i32 %mask) {
74 ; X86BW-LABEL: test_vgf2p8affineinvqb_256:
75 ; X86BW:       # %bb.0:
76 ; X86BW-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
77 ; X86BW-NEXT:    vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xd9,0x03]
78 ; X86BW-NEXT:    vgf2p8affineinvqb $4, %ymm1, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xcf,0xe1,0x04]
79 ; X86BW-NEXT:    vgf2p8affineinvqb $5, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0xcf,0xd1,0x05]
80 ; X86BW-NEXT:    vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
81 ; X86BW-NEXT:    vmovdqa %ymm4, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcc]
82 ; X86BW-NEXT:    retl # encoding: [0xc3]
84 ; X64BW-LABEL: test_vgf2p8affineinvqb_256:
85 ; X64BW:       # %bb.0:
86 ; X64BW-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
87 ; X64BW-NEXT:    vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xd9,0x03]
88 ; X64BW-NEXT:    vgf2p8affineinvqb $4, %ymm1, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xcf,0xe1,0x04]
89 ; X64BW-NEXT:    vgf2p8affineinvqb $5, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0xcf,0xd1,0x05]
90 ; X64BW-NEXT:    vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
91 ; X64BW-NEXT:    vmovdqa %ymm4, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcc]
92 ; X64BW-NEXT:    retq # encoding: [0xc3]
94 ; X86NOBW-LABEL: test_vgf2p8affineinvqb_256:
95 ; X86NOBW:       # %bb.0:
96 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
97 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x06]
98 ; X86NOBW-NEXT:    vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xd9,0x03]
99 ; X86NOBW-NEXT:    vgf2p8affineinvqb $4, %ymm1, %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xe1,0x04]
100 ; X86NOBW-NEXT:    vgf2p8affineinvqb $5, %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xc1,0x05]
101 ; X86NOBW-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
102 ; X86NOBW-NEXT:    # zmm1 {%k1} {z} = -1
103 ; X86NOBW-NEXT:    vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
104 ; X86NOBW-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z} # encoding: [0x62,0xf3,0x55,0xca,0x25,0xed,0xff]
105 ; X86NOBW-NEXT:    # zmm5 {%k2} {z} = -1
106 ; X86NOBW-NEXT:    vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
107 ; X86NOBW-NEXT:    vinserti128 $1, %xmm5, %ymm1, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xed,0x01]
108 ; X86NOBW-NEXT:    vpand %ymm4, %ymm5, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xd5,0xdb,0xcc]
109 ; X86NOBW-NEXT:    vpternlogq $184, %ymm0, %ymm5, %ymm2 # encoding: [0x62,0xf3,0xd5,0x28,0x25,0xd0,0xb8]
110 ; X86NOBW-NEXT:    # ymm2 = ymm2 ^ (ymm5 & (ymm2 ^ ymm0))
111 ; X86NOBW-NEXT:    vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
112 ; X86NOBW-NEXT:    retl # encoding: [0xc3]
114 ; X64NOBW-LABEL: test_vgf2p8affineinvqb_256:
115 ; X64NOBW:       # %bb.0:
116 ; X64NOBW-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
117 ; X64NOBW-NEXT:    shrl $16, %edi # encoding: [0xc1,0xef,0x10]
118 ; X64NOBW-NEXT:    kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
119 ; X64NOBW-NEXT:    vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xd9,0x03]
120 ; X64NOBW-NEXT:    vgf2p8affineinvqb $4, %ymm1, %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xe1,0x04]
121 ; X64NOBW-NEXT:    vgf2p8affineinvqb $5, %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xc1,0x05]
122 ; X64NOBW-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
123 ; X64NOBW-NEXT:    # zmm1 {%k1} {z} = -1
124 ; X64NOBW-NEXT:    vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
125 ; X64NOBW-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z} # encoding: [0x62,0xf3,0x55,0xca,0x25,0xed,0xff]
126 ; X64NOBW-NEXT:    # zmm5 {%k2} {z} = -1
127 ; X64NOBW-NEXT:    vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
128 ; X64NOBW-NEXT:    vinserti128 $1, %xmm5, %ymm1, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xed,0x01]
129 ; X64NOBW-NEXT:    vpand %ymm4, %ymm5, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xd5,0xdb,0xcc]
130 ; X64NOBW-NEXT:    vpternlogq $184, %ymm0, %ymm5, %ymm2 # encoding: [0x62,0xf3,0xd5,0x28,0x25,0xd0,0xb8]
131 ; X64NOBW-NEXT:    # ymm2 = ymm2 ^ (ymm5 & (ymm2 ^ ymm0))
132 ; X64NOBW-NEXT:    vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
133 ; X64NOBW-NEXT:    retq # encoding: [0xc3]
134   %1 = bitcast i32 %mask to <32 x i1>
135   %2 = call <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 3)
136   %3 = call <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 4)
137   %4 = call <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 5)
138   %5 = select <32 x i1> %1, <32 x i8> %3, <32 x i8> zeroinitializer
139   %6 = select <32 x i1> %1, <32 x i8> %4, <32 x i8> %passthru
140   %7 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } undef, <32 x i8> %2, 0
141   %8 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> }    %7, <32 x i8> %5, 1
142   %9 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> }    %8, <32 x i8> %6, 2
143   ret { <32 x i8>, <32 x i8>, <32 x i8> } %9
146 declare <64 x i8> @llvm.x86.vgf2p8affineinvqb.512(<64 x i8>, <64 x i8>, i8)
147 define { <64 x i8>, <64 x i8>, <64 x i8> } @test_vgf2p8affineinvqb_512(<64 x i8> %src1, <64 x i8> %src2, <64 x i8> %passthru, i64 %mask) {
148 ; X86BW-LABEL: test_vgf2p8affineinvqb_512:
149 ; X86BW:       # %bb.0:
150 ; X86BW-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
151 ; X86BW-NEXT:    vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xd9,0x03]
152 ; X86BW-NEXT:    vgf2p8affineinvqb $4, %zmm1, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xcf,0xe1,0x04]
153 ; X86BW-NEXT:    vgf2p8affineinvqb $5, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0xcf,0xd1,0x05]
154 ; X86BW-NEXT:    vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
155 ; X86BW-NEXT:    vmovdqa64 %zmm4, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcc]
156 ; X86BW-NEXT:    retl # encoding: [0xc3]
158 ; X64BW-LABEL: test_vgf2p8affineinvqb_512:
159 ; X64BW:       # %bb.0:
160 ; X64BW-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
161 ; X64BW-NEXT:    vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xd9,0x03]
162 ; X64BW-NEXT:    vgf2p8affineinvqb $4, %zmm1, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xcf,0xe1,0x04]
163 ; X64BW-NEXT:    vgf2p8affineinvqb $5, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0xcf,0xd1,0x05]
164 ; X64BW-NEXT:    vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
165 ; X64BW-NEXT:    vmovdqa64 %zmm4, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcc]
166 ; X64BW-NEXT:    retq # encoding: [0xc3]
168 ; X86NOBW-LABEL: test_vgf2p8affineinvqb_512:
169 ; X86NOBW:       # %bb.0:
170 ; X86NOBW-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
171 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x04]
172 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x06]
173 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k3 # encoding: [0xc5,0xf8,0x90,0x5c,0x24,0x0a]
174 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k4 # encoding: [0xc5,0xf8,0x90,0x64,0x24,0x08]
175 ; X86NOBW-NEXT:    vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xc1,0x03]
176 ; X86NOBW-NEXT:    vgf2p8affineinvqb $4, %zmm1, %zmm3, %zmm4 # encoding: [0x62,0xf3,0xe5,0x48,0xcf,0xe1,0x04]
177 ; X86NOBW-NEXT:    vgf2p8affineinvqb $5, %zmm1, %zmm3, %zmm3 # encoding: [0x62,0xf3,0xe5,0x48,0xcf,0xd9,0x05]
178 ; X86NOBW-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z} # encoding: [0x62,0xf3,0x75,0xcc,0x25,0xc9,0xff]
179 ; X86NOBW-NEXT:    # zmm1 {%k4} {z} = -1
180 ; X86NOBW-NEXT:    vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
181 ; X86NOBW-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k3} {z} # encoding: [0x62,0xf3,0x55,0xcb,0x25,0xed,0xff]
182 ; X86NOBW-NEXT:    # zmm5 {%k3} {z} = -1
183 ; X86NOBW-NEXT:    vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
184 ; X86NOBW-NEXT:    vinserti128 $1, %xmm5, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xcd,0x01]
185 ; X86NOBW-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z} # encoding: [0x62,0xf3,0x55,0xca,0x25,0xed,0xff]
186 ; X86NOBW-NEXT:    # zmm5 {%k2} {z} = -1
187 ; X86NOBW-NEXT:    vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
188 ; X86NOBW-NEXT:    vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k1} {z} # encoding: [0x62,0xf3,0x4d,0xc9,0x25,0xf6,0xff]
189 ; X86NOBW-NEXT:    # zmm6 {%k1} {z} = -1
190 ; X86NOBW-NEXT:    vpmovdb %zmm6, %xmm6 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xf6]
191 ; X86NOBW-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x55,0x38,0xee,0x01]
192 ; X86NOBW-NEXT:    vinserti64x4 $1, %ymm1, %zmm5, %zmm5 # encoding: [0x62,0xf3,0xd5,0x48,0x3a,0xe9,0x01]
193 ; X86NOBW-NEXT:    vpandq %zmm4, %zmm5, %zmm1 # encoding: [0x62,0xf1,0xd5,0x48,0xdb,0xcc]
194 ; X86NOBW-NEXT:    vpternlogq $184, %zmm3, %zmm5, %zmm2 # encoding: [0x62,0xf3,0xd5,0x48,0x25,0xd3,0xb8]
195 ; X86NOBW-NEXT:    # zmm2 = zmm2 ^ (zmm5 & (zmm2 ^ zmm3))
196 ; X86NOBW-NEXT:    retl # encoding: [0xc3]
198 ; X64NOBW-LABEL: test_vgf2p8affineinvqb_512:
199 ; X64NOBW:       # %bb.0:
200 ; X64NOBW-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
201 ; X64NOBW-NEXT:    movl %edi, %ecx # encoding: [0x89,0xf9]
202 ; X64NOBW-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
203 ; X64NOBW-NEXT:    shrq $32, %rdi # encoding: [0x48,0xc1,0xef,0x20]
204 ; X64NOBW-NEXT:    shrq $48, %rax # encoding: [0x48,0xc1,0xe8,0x30]
205 ; X64NOBW-NEXT:    shrl $16, %ecx # encoding: [0xc1,0xe9,0x10]
206 ; X64NOBW-NEXT:    kmovw %ecx, %k2 # encoding: [0xc5,0xf8,0x92,0xd1]
207 ; X64NOBW-NEXT:    kmovw %eax, %k3 # encoding: [0xc5,0xf8,0x92,0xd8]
208 ; X64NOBW-NEXT:    kmovw %edi, %k4 # encoding: [0xc5,0xf8,0x92,0xe7]
209 ; X64NOBW-NEXT:    vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xd9,0x03]
210 ; X64NOBW-NEXT:    vgf2p8affineinvqb $4, %zmm1, %zmm0, %zmm4 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xe1,0x04]
211 ; X64NOBW-NEXT:    vgf2p8affineinvqb $5, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xc1,0x05]
212 ; X64NOBW-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z} # encoding: [0x62,0xf3,0x75,0xcc,0x25,0xc9,0xff]
213 ; X64NOBW-NEXT:    # zmm1 {%k4} {z} = -1
214 ; X64NOBW-NEXT:    vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
215 ; X64NOBW-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k3} {z} # encoding: [0x62,0xf3,0x55,0xcb,0x25,0xed,0xff]
216 ; X64NOBW-NEXT:    # zmm5 {%k3} {z} = -1
217 ; X64NOBW-NEXT:    vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
218 ; X64NOBW-NEXT:    vinserti128 $1, %xmm5, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xcd,0x01]
219 ; X64NOBW-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z} # encoding: [0x62,0xf3,0x55,0xc9,0x25,0xed,0xff]
220 ; X64NOBW-NEXT:    # zmm5 {%k1} {z} = -1
221 ; X64NOBW-NEXT:    vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
222 ; X64NOBW-NEXT:    vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z} # encoding: [0x62,0xf3,0x4d,0xca,0x25,0xf6,0xff]
223 ; X64NOBW-NEXT:    # zmm6 {%k2} {z} = -1
224 ; X64NOBW-NEXT:    vpmovdb %zmm6, %xmm6 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xf6]
225 ; X64NOBW-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x55,0x38,0xee,0x01]
226 ; X64NOBW-NEXT:    vinserti64x4 $1, %ymm1, %zmm5, %zmm5 # encoding: [0x62,0xf3,0xd5,0x48,0x3a,0xe9,0x01]
227 ; X64NOBW-NEXT:    vpandq %zmm4, %zmm5, %zmm1 # encoding: [0x62,0xf1,0xd5,0x48,0xdb,0xcc]
228 ; X64NOBW-NEXT:    vpternlogq $184, %zmm0, %zmm5, %zmm2 # encoding: [0x62,0xf3,0xd5,0x48,0x25,0xd0,0xb8]
229 ; X64NOBW-NEXT:    # zmm2 = zmm2 ^ (zmm5 & (zmm2 ^ zmm0))
230 ; X64NOBW-NEXT:    vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
231 ; X64NOBW-NEXT:    retq # encoding: [0xc3]
232   %1 = bitcast i64 %mask to <64 x i1>
233   %2 = call <64 x i8> @llvm.x86.vgf2p8affineinvqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 3)
234   %3 = call <64 x i8> @llvm.x86.vgf2p8affineinvqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 4)
235   %4 = call <64 x i8> @llvm.x86.vgf2p8affineinvqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 5)
236   %5 = select <64 x i1> %1, <64 x i8> %3, <64 x i8> zeroinitializer
237   %6 = select <64 x i1> %1, <64 x i8> %4, <64 x i8> %passthru
238   %7 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } undef, <64 x i8> %2, 0
239   %8 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> }    %7, <64 x i8> %5, 1
240   %9 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> }    %8, <64 x i8> %6, 2
241   ret { <64 x i8>, <64 x i8>, <64 x i8> } %9
244 declare <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8>, <16 x i8>, i8)
245 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_vgf2p8affineqb_128(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %passthru, i16 %mask) {
246 ; X86BW-LABEL: test_vgf2p8affineqb_128:
247 ; X86BW:       # %bb.0:
248 ; X86BW-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
249 ; X86BW-NEXT:    vgf2p8affineqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xd9,0x03]
250 ; X86BW-NEXT:    vgf2p8affineqb $4, %xmm1, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xce,0xe1,0x04]
251 ; X86BW-NEXT:    vgf2p8affineqb $5, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0xce,0xd1,0x05]
252 ; X86BW-NEXT:    vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
253 ; X86BW-NEXT:    vmovdqa %xmm4, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcc]
254 ; X86BW-NEXT:    retl # encoding: [0xc3]
256 ; X64BW-LABEL: test_vgf2p8affineqb_128:
257 ; X64BW:       # %bb.0:
258 ; X64BW-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
259 ; X64BW-NEXT:    vgf2p8affineqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xd9,0x03]
260 ; X64BW-NEXT:    vgf2p8affineqb $4, %xmm1, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xce,0xe1,0x04]
261 ; X64BW-NEXT:    vgf2p8affineqb $5, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0xce,0xd1,0x05]
262 ; X64BW-NEXT:    vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
263 ; X64BW-NEXT:    vmovdqa %xmm4, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcc]
264 ; X64BW-NEXT:    retq # encoding: [0xc3]
266 ; X86NOBW-LABEL: test_vgf2p8affineqb_128:
267 ; X86NOBW:       # %bb.0:
268 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
269 ; X86NOBW-NEXT:    vgf2p8affineqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xd9,0x03]
270 ; X86NOBW-NEXT:    vgf2p8affineqb $4, %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xe1,0x04]
271 ; X86NOBW-NEXT:    vgf2p8affineqb $5, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xc1,0x05]
272 ; X86NOBW-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
273 ; X86NOBW-NEXT:    # zmm1 {%k1} {z} = -1
274 ; X86NOBW-NEXT:    vpmovdb %zmm1, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xcd]
275 ; X86NOBW-NEXT:    vpand %xmm4, %xmm5, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xd1,0xdb,0xcc]
276 ; X86NOBW-NEXT:    vpternlogq $184, %xmm0, %xmm5, %xmm2 # encoding: [0x62,0xf3,0xd5,0x08,0x25,0xd0,0xb8]
277 ; X86NOBW-NEXT:    # xmm2 = xmm2 ^ (xmm5 & (xmm2 ^ xmm0))
278 ; X86NOBW-NEXT:    vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
279 ; X86NOBW-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
280 ; X86NOBW-NEXT:    retl # encoding: [0xc3]
282 ; X64NOBW-LABEL: test_vgf2p8affineqb_128:
283 ; X64NOBW:       # %bb.0:
284 ; X64NOBW-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
285 ; X64NOBW-NEXT:    vgf2p8affineqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xd9,0x03]
286 ; X64NOBW-NEXT:    vgf2p8affineqb $4, %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xe1,0x04]
287 ; X64NOBW-NEXT:    vgf2p8affineqb $5, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xc1,0x05]
288 ; X64NOBW-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
289 ; X64NOBW-NEXT:    # zmm1 {%k1} {z} = -1
290 ; X64NOBW-NEXT:    vpmovdb %zmm1, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xcd]
291 ; X64NOBW-NEXT:    vpand %xmm4, %xmm5, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xd1,0xdb,0xcc]
292 ; X64NOBW-NEXT:    vpternlogq $184, %xmm0, %xmm5, %xmm2 # encoding: [0x62,0xf3,0xd5,0x08,0x25,0xd0,0xb8]
293 ; X64NOBW-NEXT:    # xmm2 = xmm2 ^ (xmm5 & (xmm2 ^ xmm0))
294 ; X64NOBW-NEXT:    vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
295 ; X64NOBW-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
296 ; X64NOBW-NEXT:    retq # encoding: [0xc3]
297   %1 = bitcast i16 %mask to <16 x i1>
298   %2 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 3)
299   %3 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 4)
300   %4 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 5)
301   %5 = select <16 x i1> %1, <16 x i8> %3, <16 x i8> zeroinitializer
302   %6 = select <16 x i1> %1, <16 x i8> %4, <16 x i8> %passthru
303   %7 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } undef, <16 x i8> %2, 0
304   %8 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> }    %7, <16 x i8> %5, 1
305   %9 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> }    %8, <16 x i8> %6, 2
306   ret { <16 x i8>, <16 x i8>, <16 x i8> } %9
309 declare <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8>, <32 x i8>, i8)
310 define { <32 x i8>, <32 x i8>, <32 x i8> } @test_vgf2p8affineqb_256(<32 x i8> %src1, <32 x i8> %src2, <32 x i8> %passthru, i32 %mask) {
311 ; X86BW-LABEL: test_vgf2p8affineqb_256:
312 ; X86BW:       # %bb.0:
313 ; X86BW-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
314 ; X86BW-NEXT:    vgf2p8affineqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xd9,0x03]
315 ; X86BW-NEXT:    vgf2p8affineqb $4, %ymm1, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xce,0xe1,0x04]
316 ; X86BW-NEXT:    vgf2p8affineqb $5, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0xce,0xd1,0x05]
317 ; X86BW-NEXT:    vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
318 ; X86BW-NEXT:    vmovdqa %ymm4, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcc]
319 ; X86BW-NEXT:    retl # encoding: [0xc3]
321 ; X64BW-LABEL: test_vgf2p8affineqb_256:
322 ; X64BW:       # %bb.0:
323 ; X64BW-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
324 ; X64BW-NEXT:    vgf2p8affineqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xd9,0x03]
325 ; X64BW-NEXT:    vgf2p8affineqb $4, %ymm1, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xce,0xe1,0x04]
326 ; X64BW-NEXT:    vgf2p8affineqb $5, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0xce,0xd1,0x05]
327 ; X64BW-NEXT:    vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
328 ; X64BW-NEXT:    vmovdqa %ymm4, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcc]
329 ; X64BW-NEXT:    retq # encoding: [0xc3]
331 ; X86NOBW-LABEL: test_vgf2p8affineqb_256:
332 ; X86NOBW:       # %bb.0:
333 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
334 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x06]
335 ; X86NOBW-NEXT:    vgf2p8affineqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xd9,0x03]
336 ; X86NOBW-NEXT:    vgf2p8affineqb $4, %ymm1, %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xe1,0x04]
337 ; X86NOBW-NEXT:    vgf2p8affineqb $5, %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xc1,0x05]
338 ; X86NOBW-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
339 ; X86NOBW-NEXT:    # zmm1 {%k1} {z} = -1
340 ; X86NOBW-NEXT:    vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
341 ; X86NOBW-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z} # encoding: [0x62,0xf3,0x55,0xca,0x25,0xed,0xff]
342 ; X86NOBW-NEXT:    # zmm5 {%k2} {z} = -1
343 ; X86NOBW-NEXT:    vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
344 ; X86NOBW-NEXT:    vinserti128 $1, %xmm5, %ymm1, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xed,0x01]
345 ; X86NOBW-NEXT:    vpand %ymm4, %ymm5, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xd5,0xdb,0xcc]
346 ; X86NOBW-NEXT:    vpternlogq $184, %ymm0, %ymm5, %ymm2 # encoding: [0x62,0xf3,0xd5,0x28,0x25,0xd0,0xb8]
347 ; X86NOBW-NEXT:    # ymm2 = ymm2 ^ (ymm5 & (ymm2 ^ ymm0))
348 ; X86NOBW-NEXT:    vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
349 ; X86NOBW-NEXT:    retl # encoding: [0xc3]
351 ; X64NOBW-LABEL: test_vgf2p8affineqb_256:
352 ; X64NOBW:       # %bb.0:
353 ; X64NOBW-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
354 ; X64NOBW-NEXT:    shrl $16, %edi # encoding: [0xc1,0xef,0x10]
355 ; X64NOBW-NEXT:    kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
356 ; X64NOBW-NEXT:    vgf2p8affineqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xd9,0x03]
357 ; X64NOBW-NEXT:    vgf2p8affineqb $4, %ymm1, %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xe1,0x04]
358 ; X64NOBW-NEXT:    vgf2p8affineqb $5, %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xc1,0x05]
359 ; X64NOBW-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
360 ; X64NOBW-NEXT:    # zmm1 {%k1} {z} = -1
361 ; X64NOBW-NEXT:    vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
362 ; X64NOBW-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z} # encoding: [0x62,0xf3,0x55,0xca,0x25,0xed,0xff]
363 ; X64NOBW-NEXT:    # zmm5 {%k2} {z} = -1
364 ; X64NOBW-NEXT:    vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
365 ; X64NOBW-NEXT:    vinserti128 $1, %xmm5, %ymm1, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xed,0x01]
366 ; X64NOBW-NEXT:    vpand %ymm4, %ymm5, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xd5,0xdb,0xcc]
367 ; X64NOBW-NEXT:    vpternlogq $184, %ymm0, %ymm5, %ymm2 # encoding: [0x62,0xf3,0xd5,0x28,0x25,0xd0,0xb8]
368 ; X64NOBW-NEXT:    # ymm2 = ymm2 ^ (ymm5 & (ymm2 ^ ymm0))
369 ; X64NOBW-NEXT:    vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
370 ; X64NOBW-NEXT:    retq # encoding: [0xc3]
371   %1 = bitcast i32 %mask to <32 x i1>
372   %2 = call <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 3)
373   %3 = call <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 4)
374   %4 = call <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 5)
375   %5 = select <32 x i1> %1, <32 x i8> %3, <32 x i8> zeroinitializer
376   %6 = select <32 x i1> %1, <32 x i8> %4, <32 x i8> %passthru
377   %7 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } undef, <32 x i8> %2, 0
378   %8 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> }    %7, <32 x i8> %5, 1
379   %9 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> }    %8, <32 x i8> %6, 2
380   ret { <32 x i8>, <32 x i8>, <32 x i8> } %9
383 declare <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8>, <64 x i8>, i8)
384 define { <64 x i8>, <64 x i8>, <64 x i8> } @test_vgf2p8affineqb_512(<64 x i8> %src1, <64 x i8> %src2, <64 x i8> %passthru, i64 %mask) {
385 ; X86BW-LABEL: test_vgf2p8affineqb_512:
386 ; X86BW:       # %bb.0:
387 ; X86BW-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
388 ; X86BW-NEXT:    vgf2p8affineqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xd9,0x03]
389 ; X86BW-NEXT:    vgf2p8affineqb $4, %zmm1, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xce,0xe1,0x04]
390 ; X86BW-NEXT:    vgf2p8affineqb $5, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0xce,0xd1,0x05]
391 ; X86BW-NEXT:    vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
392 ; X86BW-NEXT:    vmovdqa64 %zmm4, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcc]
393 ; X86BW-NEXT:    retl # encoding: [0xc3]
395 ; X64BW-LABEL: test_vgf2p8affineqb_512:
396 ; X64BW:       # %bb.0:
397 ; X64BW-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
398 ; X64BW-NEXT:    vgf2p8affineqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xd9,0x03]
399 ; X64BW-NEXT:    vgf2p8affineqb $4, %zmm1, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xce,0xe1,0x04]
400 ; X64BW-NEXT:    vgf2p8affineqb $5, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0xce,0xd1,0x05]
401 ; X64BW-NEXT:    vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
402 ; X64BW-NEXT:    vmovdqa64 %zmm4, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcc]
403 ; X64BW-NEXT:    retq # encoding: [0xc3]
405 ; X86NOBW-LABEL: test_vgf2p8affineqb_512:
406 ; X86NOBW:       # %bb.0:
407 ; X86NOBW-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
408 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x04]
409 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x06]
410 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k3 # encoding: [0xc5,0xf8,0x90,0x5c,0x24,0x0a]
411 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k4 # encoding: [0xc5,0xf8,0x90,0x64,0x24,0x08]
412 ; X86NOBW-NEXT:    vgf2p8affineqb $3, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xc1,0x03]
413 ; X86NOBW-NEXT:    vgf2p8affineqb $4, %zmm1, %zmm3, %zmm4 # encoding: [0x62,0xf3,0xe5,0x48,0xce,0xe1,0x04]
414 ; X86NOBW-NEXT:    vgf2p8affineqb $5, %zmm1, %zmm3, %zmm3 # encoding: [0x62,0xf3,0xe5,0x48,0xce,0xd9,0x05]
415 ; X86NOBW-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z} # encoding: [0x62,0xf3,0x75,0xcc,0x25,0xc9,0xff]
416 ; X86NOBW-NEXT:    # zmm1 {%k4} {z} = -1
417 ; X86NOBW-NEXT:    vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
418 ; X86NOBW-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k3} {z} # encoding: [0x62,0xf3,0x55,0xcb,0x25,0xed,0xff]
419 ; X86NOBW-NEXT:    # zmm5 {%k3} {z} = -1
420 ; X86NOBW-NEXT:    vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
421 ; X86NOBW-NEXT:    vinserti128 $1, %xmm5, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xcd,0x01]
422 ; X86NOBW-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z} # encoding: [0x62,0xf3,0x55,0xca,0x25,0xed,0xff]
423 ; X86NOBW-NEXT:    # zmm5 {%k2} {z} = -1
424 ; X86NOBW-NEXT:    vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
425 ; X86NOBW-NEXT:    vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k1} {z} # encoding: [0x62,0xf3,0x4d,0xc9,0x25,0xf6,0xff]
426 ; X86NOBW-NEXT:    # zmm6 {%k1} {z} = -1
427 ; X86NOBW-NEXT:    vpmovdb %zmm6, %xmm6 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xf6]
428 ; X86NOBW-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x55,0x38,0xee,0x01]
429 ; X86NOBW-NEXT:    vinserti64x4 $1, %ymm1, %zmm5, %zmm5 # encoding: [0x62,0xf3,0xd5,0x48,0x3a,0xe9,0x01]
430 ; X86NOBW-NEXT:    vpandq %zmm4, %zmm5, %zmm1 # encoding: [0x62,0xf1,0xd5,0x48,0xdb,0xcc]
431 ; X86NOBW-NEXT:    vpternlogq $184, %zmm3, %zmm5, %zmm2 # encoding: [0x62,0xf3,0xd5,0x48,0x25,0xd3,0xb8]
432 ; X86NOBW-NEXT:    # zmm2 = zmm2 ^ (zmm5 & (zmm2 ^ zmm3))
433 ; X86NOBW-NEXT:    retl # encoding: [0xc3]
435 ; X64NOBW-LABEL: test_vgf2p8affineqb_512:
436 ; X64NOBW:       # %bb.0:
437 ; X64NOBW-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
438 ; X64NOBW-NEXT:    movl %edi, %ecx # encoding: [0x89,0xf9]
439 ; X64NOBW-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
440 ; X64NOBW-NEXT:    shrq $32, %rdi # encoding: [0x48,0xc1,0xef,0x20]
441 ; X64NOBW-NEXT:    shrq $48, %rax # encoding: [0x48,0xc1,0xe8,0x30]
442 ; X64NOBW-NEXT:    shrl $16, %ecx # encoding: [0xc1,0xe9,0x10]
443 ; X64NOBW-NEXT:    kmovw %ecx, %k2 # encoding: [0xc5,0xf8,0x92,0xd1]
444 ; X64NOBW-NEXT:    kmovw %eax, %k3 # encoding: [0xc5,0xf8,0x92,0xd8]
445 ; X64NOBW-NEXT:    kmovw %edi, %k4 # encoding: [0xc5,0xf8,0x92,0xe7]
446 ; X64NOBW-NEXT:    vgf2p8affineqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xd9,0x03]
447 ; X64NOBW-NEXT:    vgf2p8affineqb $4, %zmm1, %zmm0, %zmm4 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xe1,0x04]
448 ; X64NOBW-NEXT:    vgf2p8affineqb $5, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xc1,0x05]
449 ; X64NOBW-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z} # encoding: [0x62,0xf3,0x75,0xcc,0x25,0xc9,0xff]
450 ; X64NOBW-NEXT:    # zmm1 {%k4} {z} = -1
451 ; X64NOBW-NEXT:    vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
452 ; X64NOBW-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k3} {z} # encoding: [0x62,0xf3,0x55,0xcb,0x25,0xed,0xff]
453 ; X64NOBW-NEXT:    # zmm5 {%k3} {z} = -1
454 ; X64NOBW-NEXT:    vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
455 ; X64NOBW-NEXT:    vinserti128 $1, %xmm5, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xcd,0x01]
456 ; X64NOBW-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z} # encoding: [0x62,0xf3,0x55,0xc9,0x25,0xed,0xff]
457 ; X64NOBW-NEXT:    # zmm5 {%k1} {z} = -1
458 ; X64NOBW-NEXT:    vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
459 ; X64NOBW-NEXT:    vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z} # encoding: [0x62,0xf3,0x4d,0xca,0x25,0xf6,0xff]
460 ; X64NOBW-NEXT:    # zmm6 {%k2} {z} = -1
461 ; X64NOBW-NEXT:    vpmovdb %zmm6, %xmm6 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xf6]
462 ; X64NOBW-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x55,0x38,0xee,0x01]
463 ; X64NOBW-NEXT:    vinserti64x4 $1, %ymm1, %zmm5, %zmm5 # encoding: [0x62,0xf3,0xd5,0x48,0x3a,0xe9,0x01]
464 ; X64NOBW-NEXT:    vpandq %zmm4, %zmm5, %zmm1 # encoding: [0x62,0xf1,0xd5,0x48,0xdb,0xcc]
465 ; X64NOBW-NEXT:    vpternlogq $184, %zmm0, %zmm5, %zmm2 # encoding: [0x62,0xf3,0xd5,0x48,0x25,0xd0,0xb8]
466 ; X64NOBW-NEXT:    # zmm2 = zmm2 ^ (zmm5 & (zmm2 ^ zmm0))
467 ; X64NOBW-NEXT:    vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
468 ; X64NOBW-NEXT:    retq # encoding: [0xc3]
469   %1 = bitcast i64 %mask to <64 x i1>
470   %2 = call <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 3)
471   %3 = call <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 4)
472   %4 = call <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 5)
473   %5 = select <64 x i1> %1, <64 x i8> %3, <64 x i8> zeroinitializer
474   %6 = select <64 x i1> %1, <64 x i8> %4, <64 x i8> %passthru
475   %7 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } undef, <64 x i8> %2, 0
476   %8 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> }    %7, <64 x i8> %5, 1
477   %9 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> }    %8, <64 x i8> %6, 2
478   ret { <64 x i8>, <64 x i8>, <64 x i8> } %9
481 declare <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8>, <16 x i8>)
482 define <16 x i8> @test_vgf2p8mulb_128(<16 x i8> %src1, <16 x i8> %src2) {
483 ; CHECK-LABEL: test_vgf2p8mulb_128:
484 ; CHECK:       # %bb.0:
485 ; CHECK-NEXT:    vgf2p8mulb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc1]
486 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
487   %1 = call <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8> %src1, <16 x i8> %src2)
488   ret <16 x i8> %1
491 define <16 x i8> @test_vgf2p8mulb_128_mask(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %passthru, i16 %mask) {
492 ; X86BW-LABEL: test_vgf2p8mulb_128_mask:
493 ; X86BW:       # %bb.0:
494 ; X86BW-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
495 ; X86BW-NEXT:    vgf2p8mulb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xcf,0xd1]
496 ; X86BW-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
497 ; X86BW-NEXT:    retl # encoding: [0xc3]
499 ; X64BW-LABEL: test_vgf2p8mulb_128_mask:
500 ; X64BW:       # %bb.0:
501 ; X64BW-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
502 ; X64BW-NEXT:    vgf2p8mulb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xcf,0xd1]
503 ; X64BW-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
504 ; X64BW-NEXT:    retq # encoding: [0xc3]
506 ; X86NOBW-LABEL: test_vgf2p8mulb_128_mask:
507 ; X86NOBW:       # %bb.0:
508 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
509 ; X86NOBW-NEXT:    vgf2p8mulb %xmm1, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc9]
510 ; X86NOBW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
511 ; X86NOBW-NEXT:    # zmm0 {%k1} {z} = -1
512 ; X86NOBW-NEXT:    vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
513 ; X86NOBW-NEXT:    vpternlogq $202, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xc2,0xca]
514 ; X86NOBW-NEXT:    # xmm0 = xmm2 ^ (xmm0 & (xmm1 ^ xmm2))
515 ; X86NOBW-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
516 ; X86NOBW-NEXT:    retl # encoding: [0xc3]
518 ; X64NOBW-LABEL: test_vgf2p8mulb_128_mask:
519 ; X64NOBW:       # %bb.0:
520 ; X64NOBW-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
521 ; X64NOBW-NEXT:    vgf2p8mulb %xmm1, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc9]
522 ; X64NOBW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
523 ; X64NOBW-NEXT:    # zmm0 {%k1} {z} = -1
524 ; X64NOBW-NEXT:    vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
525 ; X64NOBW-NEXT:    vpternlogq $202, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xc2,0xca]
526 ; X64NOBW-NEXT:    # xmm0 = xmm2 ^ (xmm0 & (xmm1 ^ xmm2))
527 ; X64NOBW-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
528 ; X64NOBW-NEXT:    retq # encoding: [0xc3]
529   %1 = bitcast i16 %mask to <16 x i1>
530   %2 = call <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8> %src1, <16 x i8> %src2)
531   %3 = select <16 x i1> %1, <16 x i8> %2, <16 x i8> %passthru
532   ret <16 x i8> %3
535 define <16 x i8> @test_vgf2p8mulb_128_maskz(<16 x i8> %src1, <16 x i8> %src2, i16 %mask) {
536 ; X86BW-LABEL: test_vgf2p8mulb_128_maskz:
537 ; X86BW:       # %bb.0:
538 ; X86BW-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
539 ; X86BW-NEXT:    vgf2p8mulb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0xcf,0xc1]
540 ; X86BW-NEXT:    retl # encoding: [0xc3]
542 ; X64BW-LABEL: test_vgf2p8mulb_128_maskz:
543 ; X64BW:       # %bb.0:
544 ; X64BW-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
545 ; X64BW-NEXT:    vgf2p8mulb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0xcf,0xc1]
546 ; X64BW-NEXT:    retq # encoding: [0xc3]
548 ; X86NOBW-LABEL: test_vgf2p8mulb_128_maskz:
549 ; X86NOBW:       # %bb.0:
550 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
551 ; X86NOBW-NEXT:    vgf2p8mulb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc1]
552 ; X86NOBW-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
553 ; X86NOBW-NEXT:    # zmm1 {%k1} {z} = -1
554 ; X86NOBW-NEXT:    vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
555 ; X86NOBW-NEXT:    vpand %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xdb,0xc0]
556 ; X86NOBW-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
557 ; X86NOBW-NEXT:    retl # encoding: [0xc3]
559 ; X64NOBW-LABEL: test_vgf2p8mulb_128_maskz:
560 ; X64NOBW:       # %bb.0:
561 ; X64NOBW-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
562 ; X64NOBW-NEXT:    vgf2p8mulb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc1]
563 ; X64NOBW-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
564 ; X64NOBW-NEXT:    # zmm1 {%k1} {z} = -1
565 ; X64NOBW-NEXT:    vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
566 ; X64NOBW-NEXT:    vpand %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xdb,0xc0]
567 ; X64NOBW-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
568 ; X64NOBW-NEXT:    retq # encoding: [0xc3]
569   %1 = bitcast i16 %mask to <16 x i1>
570   %2 = call <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8> %src1, <16 x i8> %src2)
571   %3 = select <16 x i1> %1, <16 x i8> %2, <16 x i8> zeroinitializer
572   ret <16 x i8> %3
575 declare <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8>, <32 x i8>)
576 define <32 x i8> @test_vgf2p8mulb_256(<32 x i8> %src1, <32 x i8> %src2) {
577 ; CHECK-LABEL: test_vgf2p8mulb_256:
578 ; CHECK:       # %bb.0:
579 ; CHECK-NEXT:    vgf2p8mulb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc1]
580 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
581   %1 = call <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8> %src1, <32 x i8> %src2)
582   ret <32 x i8> %1
585 define <32 x i8> @test_vgf2p8mulb_256_mask(<32 x i8> %src1, <32 x i8> %src2, <32 x i8> %passthru, i32 %mask) {
586 ; X86BW-LABEL: test_vgf2p8mulb_256_mask:
587 ; X86BW:       # %bb.0:
588 ; X86BW-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
589 ; X86BW-NEXT:    vgf2p8mulb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xcf,0xd1]
590 ; X86BW-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
591 ; X86BW-NEXT:    retl # encoding: [0xc3]
593 ; X64BW-LABEL: test_vgf2p8mulb_256_mask:
594 ; X64BW:       # %bb.0:
595 ; X64BW-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
596 ; X64BW-NEXT:    vgf2p8mulb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xcf,0xd1]
597 ; X64BW-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
598 ; X64BW-NEXT:    retq # encoding: [0xc3]
600 ; X86NOBW-LABEL: test_vgf2p8mulb_256_mask:
601 ; X86NOBW:       # %bb.0:
602 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
603 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x06]
604 ; X86NOBW-NEXT:    vgf2p8mulb %ymm1, %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc9]
605 ; X86NOBW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
606 ; X86NOBW-NEXT:    # zmm0 {%k1} {z} = -1
607 ; X86NOBW-NEXT:    vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
608 ; X86NOBW-NEXT:    vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} # encoding: [0x62,0xf3,0x65,0xca,0x25,0xdb,0xff]
609 ; X86NOBW-NEXT:    # zmm3 {%k2} {z} = -1
610 ; X86NOBW-NEXT:    vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
611 ; X86NOBW-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc3,0x01]
612 ; X86NOBW-NEXT:    vpternlogq $202, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xc2,0xca]
613 ; X86NOBW-NEXT:    # ymm0 = ymm2 ^ (ymm0 & (ymm1 ^ ymm2))
614 ; X86NOBW-NEXT:    retl # encoding: [0xc3]
616 ; X64NOBW-LABEL: test_vgf2p8mulb_256_mask:
617 ; X64NOBW:       # %bb.0:
618 ; X64NOBW-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
619 ; X64NOBW-NEXT:    shrl $16, %edi # encoding: [0xc1,0xef,0x10]
620 ; X64NOBW-NEXT:    kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
621 ; X64NOBW-NEXT:    vgf2p8mulb %ymm1, %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc9]
622 ; X64NOBW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
623 ; X64NOBW-NEXT:    # zmm0 {%k1} {z} = -1
624 ; X64NOBW-NEXT:    vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
625 ; X64NOBW-NEXT:    vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} # encoding: [0x62,0xf3,0x65,0xca,0x25,0xdb,0xff]
626 ; X64NOBW-NEXT:    # zmm3 {%k2} {z} = -1
627 ; X64NOBW-NEXT:    vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
628 ; X64NOBW-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc3,0x01]
629 ; X64NOBW-NEXT:    vpternlogq $202, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xc2,0xca]
630 ; X64NOBW-NEXT:    # ymm0 = ymm2 ^ (ymm0 & (ymm1 ^ ymm2))
631 ; X64NOBW-NEXT:    retq # encoding: [0xc3]
632   %1 = bitcast i32 %mask to <32 x i1>
633   %2 = call <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8> %src1, <32 x i8> %src2)
634   %3 = select <32 x i1> %1, <32 x i8> %2, <32 x i8> %passthru
635   ret <32 x i8> %3
638 define <32 x i8> @test_vgf2p8mulb_256_maskz(<32 x i8> %src1, <32 x i8> %src2, i32 %mask) {
639 ; X86BW-LABEL: test_vgf2p8mulb_256_maskz:
640 ; X86BW:       # %bb.0:
641 ; X86BW-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
642 ; X86BW-NEXT:    vgf2p8mulb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0xcf,0xc1]
643 ; X86BW-NEXT:    retl # encoding: [0xc3]
645 ; X64BW-LABEL: test_vgf2p8mulb_256_maskz:
646 ; X64BW:       # %bb.0:
647 ; X64BW-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
648 ; X64BW-NEXT:    vgf2p8mulb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0xcf,0xc1]
649 ; X64BW-NEXT:    retq # encoding: [0xc3]
651 ; X86NOBW-LABEL: test_vgf2p8mulb_256_maskz:
652 ; X86NOBW:       # %bb.0:
653 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
654 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x06]
655 ; X86NOBW-NEXT:    vgf2p8mulb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc1]
656 ; X86NOBW-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
657 ; X86NOBW-NEXT:    # zmm1 {%k1} {z} = -1
658 ; X86NOBW-NEXT:    vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
659 ; X86NOBW-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} # encoding: [0x62,0xf3,0x6d,0xca,0x25,0xd2,0xff]
660 ; X86NOBW-NEXT:    # zmm2 {%k2} {z} = -1
661 ; X86NOBW-NEXT:    vpmovdb %zmm2, %xmm2 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xd2]
662 ; X86NOBW-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xca,0x01]
663 ; X86NOBW-NEXT:    vpand %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xdb,0xc0]
664 ; X86NOBW-NEXT:    retl # encoding: [0xc3]
666 ; X64NOBW-LABEL: test_vgf2p8mulb_256_maskz:
667 ; X64NOBW:       # %bb.0:
668 ; X64NOBW-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
669 ; X64NOBW-NEXT:    shrl $16, %edi # encoding: [0xc1,0xef,0x10]
670 ; X64NOBW-NEXT:    kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
671 ; X64NOBW-NEXT:    vgf2p8mulb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc1]
672 ; X64NOBW-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
673 ; X64NOBW-NEXT:    # zmm1 {%k1} {z} = -1
674 ; X64NOBW-NEXT:    vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
675 ; X64NOBW-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} # encoding: [0x62,0xf3,0x6d,0xca,0x25,0xd2,0xff]
676 ; X64NOBW-NEXT:    # zmm2 {%k2} {z} = -1
677 ; X64NOBW-NEXT:    vpmovdb %zmm2, %xmm2 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xd2]
678 ; X64NOBW-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xca,0x01]
679 ; X64NOBW-NEXT:    vpand %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xdb,0xc0]
680 ; X64NOBW-NEXT:    retq # encoding: [0xc3]
681   %1 = bitcast i32 %mask to <32 x i1>
682   %2 = call <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8> %src1, <32 x i8> %src2)
683   %3 = select <32 x i1> %1, <32 x i8> %2, <32 x i8> zeroinitializer
684   ret <32 x i8> %3
687 declare <64 x i8> @llvm.x86.vgf2p8mulb.512(<64 x i8>, <64 x i8>)
688 define <64 x i8> @test_vgf2p8mulb_512(<64 x i8> %src1, <64 x i8> %src2) {
689 ; CHECK-LABEL: test_vgf2p8mulb_512:
690 ; CHECK:       # %bb.0:
691 ; CHECK-NEXT:    vgf2p8mulb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0xcf,0xc1]
692 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
693   %1 = call <64 x i8> @llvm.x86.vgf2p8mulb.512(<64 x i8> %src1, <64 x i8> %src2)
694   ret <64 x i8> %1
697 define <64 x i8> @test_vgf2p8mulb_512_mask(<64 x i8> %src1, <64 x i8> %src2, <64 x i8> %passthru, i64 %mask) {
698 ; X86BW-LABEL: test_vgf2p8mulb_512_mask:
699 ; X86BW:       # %bb.0:
700 ; X86BW-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
701 ; X86BW-NEXT:    vgf2p8mulb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xcf,0xd1]
702 ; X86BW-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
703 ; X86BW-NEXT:    retl # encoding: [0xc3]
705 ; X64BW-LABEL: test_vgf2p8mulb_512_mask:
706 ; X64BW:       # %bb.0:
707 ; X64BW-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
708 ; X64BW-NEXT:    vgf2p8mulb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xcf,0xd1]
709 ; X64BW-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
710 ; X64BW-NEXT:    retq # encoding: [0xc3]
712 ; X86NOBW-LABEL: test_vgf2p8mulb_512_mask:
713 ; X86NOBW:       # %bb.0:
714 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
715 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x06]
716 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k3 # encoding: [0xc5,0xf8,0x90,0x5c,0x24,0x0a]
717 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k4 # encoding: [0xc5,0xf8,0x90,0x64,0x24,0x08]
718 ; X86NOBW-NEXT:    vgf2p8mulb %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0xcf,0xc9]
719 ; X86NOBW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z} # encoding: [0x62,0xf3,0x7d,0xcc,0x25,0xc0,0xff]
720 ; X86NOBW-NEXT:    # zmm0 {%k4} {z} = -1
721 ; X86NOBW-NEXT:    vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
722 ; X86NOBW-NEXT:    vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k3} {z} # encoding: [0x62,0xf3,0x65,0xcb,0x25,0xdb,0xff]
723 ; X86NOBW-NEXT:    # zmm3 {%k3} {z} = -1
724 ; X86NOBW-NEXT:    vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
725 ; X86NOBW-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc3,0x01]
726 ; X86NOBW-NEXT:    vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x65,0xc9,0x25,0xdb,0xff]
727 ; X86NOBW-NEXT:    # zmm3 {%k1} {z} = -1
728 ; X86NOBW-NEXT:    vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
729 ; X86NOBW-NEXT:    vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k2} {z} # encoding: [0x62,0xf3,0x5d,0xca,0x25,0xe4,0xff]
730 ; X86NOBW-NEXT:    # zmm4 {%k2} {z} = -1
731 ; X86NOBW-NEXT:    vpmovdb %zmm4, %xmm4 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xe4]
732 ; X86NOBW-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x65,0x38,0xdc,0x01]
733 ; X86NOBW-NEXT:    vinserti64x4 $1, %ymm0, %zmm3, %zmm0 # encoding: [0x62,0xf3,0xe5,0x48,0x3a,0xc0,0x01]
734 ; X86NOBW-NEXT:    vpternlogq $202, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf3,0xf5,0x48,0x25,0xc2,0xca]
735 ; X86NOBW-NEXT:    # zmm0 = zmm2 ^ (zmm0 & (zmm1 ^ zmm2))
736 ; X86NOBW-NEXT:    retl # encoding: [0xc3]
738 ; X64NOBW-LABEL: test_vgf2p8mulb_512_mask:
739 ; X64NOBW:       # %bb.0:
740 ; X64NOBW-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
741 ; X64NOBW-NEXT:    movl %edi, %ecx # encoding: [0x89,0xf9]
742 ; X64NOBW-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
743 ; X64NOBW-NEXT:    shrq $32, %rdi # encoding: [0x48,0xc1,0xef,0x20]
744 ; X64NOBW-NEXT:    shrq $48, %rax # encoding: [0x48,0xc1,0xe8,0x30]
745 ; X64NOBW-NEXT:    shrl $16, %ecx # encoding: [0xc1,0xe9,0x10]
746 ; X64NOBW-NEXT:    kmovw %ecx, %k2 # encoding: [0xc5,0xf8,0x92,0xd1]
747 ; X64NOBW-NEXT:    kmovw %eax, %k3 # encoding: [0xc5,0xf8,0x92,0xd8]
748 ; X64NOBW-NEXT:    kmovw %edi, %k4 # encoding: [0xc5,0xf8,0x92,0xe7]
749 ; X64NOBW-NEXT:    vgf2p8mulb %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0xcf,0xc9]
750 ; X64NOBW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z} # encoding: [0x62,0xf3,0x7d,0xcc,0x25,0xc0,0xff]
751 ; X64NOBW-NEXT:    # zmm0 {%k4} {z} = -1
752 ; X64NOBW-NEXT:    vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
753 ; X64NOBW-NEXT:    vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k3} {z} # encoding: [0x62,0xf3,0x65,0xcb,0x25,0xdb,0xff]
754 ; X64NOBW-NEXT:    # zmm3 {%k3} {z} = -1
755 ; X64NOBW-NEXT:    vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
756 ; X64NOBW-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc3,0x01]
757 ; X64NOBW-NEXT:    vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x65,0xc9,0x25,0xdb,0xff]
758 ; X64NOBW-NEXT:    # zmm3 {%k1} {z} = -1
759 ; X64NOBW-NEXT:    vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
760 ; X64NOBW-NEXT:    vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k2} {z} # encoding: [0x62,0xf3,0x5d,0xca,0x25,0xe4,0xff]
761 ; X64NOBW-NEXT:    # zmm4 {%k2} {z} = -1
762 ; X64NOBW-NEXT:    vpmovdb %zmm4, %xmm4 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xe4]
763 ; X64NOBW-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x65,0x38,0xdc,0x01]
764 ; X64NOBW-NEXT:    vinserti64x4 $1, %ymm0, %zmm3, %zmm0 # encoding: [0x62,0xf3,0xe5,0x48,0x3a,0xc0,0x01]
765 ; X64NOBW-NEXT:    vpternlogq $202, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf3,0xf5,0x48,0x25,0xc2,0xca]
766 ; X64NOBW-NEXT:    # zmm0 = zmm2 ^ (zmm0 & (zmm1 ^ zmm2))
767 ; X64NOBW-NEXT:    retq # encoding: [0xc3]
768   %1 = bitcast i64 %mask to <64 x i1>
769   %2 = call <64 x i8> @llvm.x86.vgf2p8mulb.512(<64 x i8> %src1, <64 x i8> %src2)
770   %3 = select <64 x i1> %1, <64 x i8> %2, <64 x i8> %passthru
771   ret <64 x i8> %3
774 define <64 x i8> @test_vgf2p8mulb_512_maskz(<64 x i8> %src1, <64 x i8> %src2, i64 %mask) {
775 ; X86BW-LABEL: test_vgf2p8mulb_512_maskz:
776 ; X86BW:       # %bb.0:
777 ; X86BW-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
778 ; X86BW-NEXT:    vgf2p8mulb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcf,0xc1]
779 ; X86BW-NEXT:    retl # encoding: [0xc3]
781 ; X64BW-LABEL: test_vgf2p8mulb_512_maskz:
782 ; X64BW:       # %bb.0:
783 ; X64BW-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
784 ; X64BW-NEXT:    vgf2p8mulb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcf,0xc1]
785 ; X64BW-NEXT:    retq # encoding: [0xc3]
787 ; X86NOBW-LABEL: test_vgf2p8mulb_512_maskz:
788 ; X86NOBW:       # %bb.0:
789 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
790 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x06]
791 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k3 # encoding: [0xc5,0xf8,0x90,0x5c,0x24,0x0a]
792 ; X86NOBW-NEXT:    kmovw {{[0-9]+}}(%esp), %k4 # encoding: [0xc5,0xf8,0x90,0x64,0x24,0x08]
793 ; X86NOBW-NEXT:    vgf2p8mulb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0xcf,0xc1]
794 ; X86NOBW-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z} # encoding: [0x62,0xf3,0x75,0xcc,0x25,0xc9,0xff]
795 ; X86NOBW-NEXT:    # zmm1 {%k4} {z} = -1
796 ; X86NOBW-NEXT:    vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
797 ; X86NOBW-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k3} {z} # encoding: [0x62,0xf3,0x6d,0xcb,0x25,0xd2,0xff]
798 ; X86NOBW-NEXT:    # zmm2 {%k3} {z} = -1
799 ; X86NOBW-NEXT:    vpmovdb %zmm2, %xmm2 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xd2]
800 ; X86NOBW-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xca,0x01]
801 ; X86NOBW-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} # encoding: [0x62,0xf3,0x6d,0xc9,0x25,0xd2,0xff]
802 ; X86NOBW-NEXT:    # zmm2 {%k1} {z} = -1
803 ; X86NOBW-NEXT:    vpmovdb %zmm2, %xmm2 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xd2]
804 ; X86NOBW-NEXT:    vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} # encoding: [0x62,0xf3,0x65,0xca,0x25,0xdb,0xff]
805 ; X86NOBW-NEXT:    # zmm3 {%k2} {z} = -1
806 ; X86NOBW-NEXT:    vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
807 ; X86NOBW-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x6d,0x38,0xd3,0x01]
808 ; X86NOBW-NEXT:    vinserti64x4 $1, %ymm1, %zmm2, %zmm1 # encoding: [0x62,0xf3,0xed,0x48,0x3a,0xc9,0x01]
809 ; X86NOBW-NEXT:    vpandq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xdb,0xc0]
810 ; X86NOBW-NEXT:    retl # encoding: [0xc3]
812 ; X64NOBW-LABEL: test_vgf2p8mulb_512_maskz:
813 ; X64NOBW:       # %bb.0:
814 ; X64NOBW-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
815 ; X64NOBW-NEXT:    movl %edi, %ecx # encoding: [0x89,0xf9]
816 ; X64NOBW-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
817 ; X64NOBW-NEXT:    shrq $32, %rdi # encoding: [0x48,0xc1,0xef,0x20]
818 ; X64NOBW-NEXT:    shrq $48, %rax # encoding: [0x48,0xc1,0xe8,0x30]
819 ; X64NOBW-NEXT:    shrl $16, %ecx # encoding: [0xc1,0xe9,0x10]
820 ; X64NOBW-NEXT:    kmovw %ecx, %k2 # encoding: [0xc5,0xf8,0x92,0xd1]
821 ; X64NOBW-NEXT:    kmovw %eax, %k3 # encoding: [0xc5,0xf8,0x92,0xd8]
822 ; X64NOBW-NEXT:    kmovw %edi, %k4 # encoding: [0xc5,0xf8,0x92,0xe7]
823 ; X64NOBW-NEXT:    vgf2p8mulb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0xcf,0xc1]
824 ; X64NOBW-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z} # encoding: [0x62,0xf3,0x75,0xcc,0x25,0xc9,0xff]
825 ; X64NOBW-NEXT:    # zmm1 {%k4} {z} = -1
826 ; X64NOBW-NEXT:    vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
827 ; X64NOBW-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k3} {z} # encoding: [0x62,0xf3,0x6d,0xcb,0x25,0xd2,0xff]
828 ; X64NOBW-NEXT:    # zmm2 {%k3} {z} = -1
829 ; X64NOBW-NEXT:    vpmovdb %zmm2, %xmm2 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xd2]
830 ; X64NOBW-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xca,0x01]
831 ; X64NOBW-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} # encoding: [0x62,0xf3,0x6d,0xc9,0x25,0xd2,0xff]
832 ; X64NOBW-NEXT:    # zmm2 {%k1} {z} = -1
833 ; X64NOBW-NEXT:    vpmovdb %zmm2, %xmm2 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xd2]
834 ; X64NOBW-NEXT:    vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} # encoding: [0x62,0xf3,0x65,0xca,0x25,0xdb,0xff]
835 ; X64NOBW-NEXT:    # zmm3 {%k2} {z} = -1
836 ; X64NOBW-NEXT:    vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
837 ; X64NOBW-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x6d,0x38,0xd3,0x01]
838 ; X64NOBW-NEXT:    vinserti64x4 $1, %ymm1, %zmm2, %zmm1 # encoding: [0x62,0xf3,0xed,0x48,0x3a,0xc9,0x01]
839 ; X64NOBW-NEXT:    vpandq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xdb,0xc0]
840 ; X64NOBW-NEXT:    retq # encoding: [0xc3]
841   %1 = bitcast i64 %mask to <64 x i1>
842   %2 = call <64 x i8> @llvm.x86.vgf2p8mulb.512(<64 x i8> %src1, <64 x i8> %src2)
843   %3 = select <64 x i1> %1, <64 x i8> %2, <64 x i8> zeroinitializer
844   ret <64 x i8> %3