1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avxvnniint8 --show-mc-encoding | FileCheck %s --check-prefixes=X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxvnniint8 --show-mc-encoding | FileCheck %s --check-prefixes=X64
4 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx10.2-256 --show-mc-encoding | FileCheck %s --check-prefixes=AVX10-X86
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-256 --show-mc-encoding | FileCheck %s --check-prefixes=AVX10-X64
8 declare <4 x i32> @llvm.x86.avx2.vpdpbssd.128(<4 x i32>, <4 x i32>, <4 x i32>)
10 define <4 x i32>@test_int_x86_avx2_vpdpbssd_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4) {
11 ; X86-LABEL: test_int_x86_avx2_vpdpbssd_128:
13 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14 ; X86-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
15 ; X86-NEXT: vpdpbssd (%eax), %xmm1, %xmm3 # encoding: [0xc4,0xe2,0x73,0x50,0x18]
16 ; X86-NEXT: vpdpbssd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x73,0x50,0xc2]
17 ; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfe,0xc0]
18 ; X86-NEXT: retl # encoding: [0xc3]
20 ; X64-LABEL: test_int_x86_avx2_vpdpbssd_128:
22 ; X64-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
23 ; X64-NEXT: vpdpbssd (%rdi), %xmm1, %xmm3 # encoding: [0xc4,0xe2,0x73,0x50,0x1f]
24 ; X64-NEXT: vpdpbssd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x73,0x50,0xc2]
25 ; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfe,0xc0]
26 ; X64-NEXT: retq # encoding: [0xc3]
28 ; AVX10-X86-LABEL: test_int_x86_avx2_vpdpbssd_128:
30 ; AVX10-X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
31 ; AVX10-X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
32 ; AVX10-X86-NEXT: vpdpbssd (%eax), %xmm1, %xmm3 # encoding: [0x62,0xf2,0x77,0x08,0x50,0x18]
33 ; AVX10-X86-NEXT: vpdpbssd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x77,0x08,0x50,0xc2]
34 ; AVX10-X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
35 ; AVX10-X86-NEXT: retl # encoding: [0xc3]
37 ; AVX10-X64-LABEL: test_int_x86_avx2_vpdpbssd_128:
39 ; AVX10-X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
40 ; AVX10-X64-NEXT: vpdpbssd (%rdi), %xmm1, %xmm3 # encoding: [0x62,0xf2,0x77,0x08,0x50,0x1f]
41 ; AVX10-X64-NEXT: vpdpbssd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x77,0x08,0x50,0xc2]
42 ; AVX10-X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
43 ; AVX10-X64-NEXT: retq # encoding: [0xc3]
44 %x2 = load <4 x i32>, ptr %x2p
45 %1 = call <4 x i32> @llvm.x86.avx2.vpdpbssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
46 %2 = call <4 x i32> @llvm.x86.avx2.vpdpbssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
47 %res = add <4 x i32> %1, %2
51 declare <4 x i32> @llvm.x86.avx2.vpdpbssds.128(<4 x i32>, <4 x i32>, <4 x i32>)
53 define <4 x i32>@test_int_x86_avx2_vpdpbssds_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4) {
54 ; X86-LABEL: test_int_x86_avx2_vpdpbssds_128:
56 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
57 ; X86-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
58 ; X86-NEXT: vpdpbssds (%eax), %xmm1, %xmm3 # encoding: [0xc4,0xe2,0x73,0x51,0x18]
59 ; X86-NEXT: vpdpbssds %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x73,0x51,0xc2]
60 ; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfe,0xc0]
61 ; X86-NEXT: retl # encoding: [0xc3]
63 ; X64-LABEL: test_int_x86_avx2_vpdpbssds_128:
65 ; X64-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
66 ; X64-NEXT: vpdpbssds (%rdi), %xmm1, %xmm3 # encoding: [0xc4,0xe2,0x73,0x51,0x1f]
67 ; X64-NEXT: vpdpbssds %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x73,0x51,0xc2]
68 ; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfe,0xc0]
69 ; X64-NEXT: retq # encoding: [0xc3]
71 ; AVX10-X86-LABEL: test_int_x86_avx2_vpdpbssds_128:
73 ; AVX10-X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
74 ; AVX10-X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
75 ; AVX10-X86-NEXT: vpdpbssds (%eax), %xmm1, %xmm3 # encoding: [0x62,0xf2,0x77,0x08,0x51,0x18]
76 ; AVX10-X86-NEXT: vpdpbssds %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x77,0x08,0x51,0xc2]
77 ; AVX10-X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
78 ; AVX10-X86-NEXT: retl # encoding: [0xc3]
80 ; AVX10-X64-LABEL: test_int_x86_avx2_vpdpbssds_128:
82 ; AVX10-X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
83 ; AVX10-X64-NEXT: vpdpbssds (%rdi), %xmm1, %xmm3 # encoding: [0x62,0xf2,0x77,0x08,0x51,0x1f]
84 ; AVX10-X64-NEXT: vpdpbssds %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x77,0x08,0x51,0xc2]
85 ; AVX10-X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
86 ; AVX10-X64-NEXT: retq # encoding: [0xc3]
87 %x2 = load <4 x i32>, ptr %x2p
88 %1 = call <4 x i32> @llvm.x86.avx2.vpdpbssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
89 %2 = call <4 x i32> @llvm.x86.avx2.vpdpbssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
90 %res = add <4 x i32> %1, %2
94 declare <8 x i32> @llvm.x86.avx2.vpdpbssd.256(<8 x i32>, <8 x i32>, <8 x i32>)
96 define <8 x i32>@test_int_x86_avx2_vpdpbssd_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4) {
97 ; X86-LABEL: test_int_x86_avx2_vpdpbssd_256:
99 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
100 ; X86-NEXT: vmovaps %ymm0, %ymm3 # encoding: [0xc5,0xfc,0x28,0xd8]
101 ; X86-NEXT: vpdpbssd (%eax), %ymm1, %ymm3 # encoding: [0xc4,0xe2,0x77,0x50,0x18]
102 ; X86-NEXT: vpdpbssd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x77,0x50,0xc2]
103 ; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # encoding: [0xc5,0xe5,0xfe,0xc0]
104 ; X86-NEXT: retl # encoding: [0xc3]
106 ; X64-LABEL: test_int_x86_avx2_vpdpbssd_256:
108 ; X64-NEXT: vmovaps %ymm0, %ymm3 # encoding: [0xc5,0xfc,0x28,0xd8]
109 ; X64-NEXT: vpdpbssd (%rdi), %ymm1, %ymm3 # encoding: [0xc4,0xe2,0x77,0x50,0x1f]
110 ; X64-NEXT: vpdpbssd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x77,0x50,0xc2]
111 ; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # encoding: [0xc5,0xe5,0xfe,0xc0]
112 ; X64-NEXT: retq # encoding: [0xc3]
114 ; AVX10-X86-LABEL: test_int_x86_avx2_vpdpbssd_256:
115 ; AVX10-X86: # %bb.0:
116 ; AVX10-X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
117 ; AVX10-X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
118 ; AVX10-X86-NEXT: vpdpbssd (%eax), %ymm1, %ymm3 # encoding: [0x62,0xf2,0x77,0x28,0x50,0x18]
119 ; AVX10-X86-NEXT: vpdpbssd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x77,0x28,0x50,0xc2]
120 ; AVX10-X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
121 ; AVX10-X86-NEXT: retl # encoding: [0xc3]
123 ; AVX10-X64-LABEL: test_int_x86_avx2_vpdpbssd_256:
124 ; AVX10-X64: # %bb.0:
125 ; AVX10-X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
126 ; AVX10-X64-NEXT: vpdpbssd (%rdi), %ymm1, %ymm3 # encoding: [0x62,0xf2,0x77,0x28,0x50,0x1f]
127 ; AVX10-X64-NEXT: vpdpbssd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x77,0x28,0x50,0xc2]
128 ; AVX10-X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
129 ; AVX10-X64-NEXT: retq # encoding: [0xc3]
130 %x2 = load <8 x i32>, ptr %x2p
131 %1 = call <8 x i32> @llvm.x86.avx2.vpdpbssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
132 %2 = call <8 x i32> @llvm.x86.avx2.vpdpbssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
133 %res = add <8 x i32> %1, %2
137 declare <8 x i32> @llvm.x86.avx2.vpdpbssds.256(<8 x i32>, <8 x i32>, <8 x i32>)
139 define <8 x i32>@test_int_x86_avx2_vpdpbssds_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4) {
140 ; X86-LABEL: test_int_x86_avx2_vpdpbssds_256:
142 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
143 ; X86-NEXT: vmovaps %ymm0, %ymm3 # encoding: [0xc5,0xfc,0x28,0xd8]
144 ; X86-NEXT: vpdpbssds (%eax), %ymm1, %ymm3 # encoding: [0xc4,0xe2,0x77,0x51,0x18]
145 ; X86-NEXT: vpdpbssds %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x77,0x51,0xc2]
146 ; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # encoding: [0xc5,0xe5,0xfe,0xc0]
147 ; X86-NEXT: retl # encoding: [0xc3]
149 ; X64-LABEL: test_int_x86_avx2_vpdpbssds_256:
151 ; X64-NEXT: vmovaps %ymm0, %ymm3 # encoding: [0xc5,0xfc,0x28,0xd8]
152 ; X64-NEXT: vpdpbssds (%rdi), %ymm1, %ymm3 # encoding: [0xc4,0xe2,0x77,0x51,0x1f]
153 ; X64-NEXT: vpdpbssds %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x77,0x51,0xc2]
154 ; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # encoding: [0xc5,0xe5,0xfe,0xc0]
155 ; X64-NEXT: retq # encoding: [0xc3]
157 ; AVX10-X86-LABEL: test_int_x86_avx2_vpdpbssds_256:
158 ; AVX10-X86: # %bb.0:
159 ; AVX10-X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
160 ; AVX10-X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
161 ; AVX10-X86-NEXT: vpdpbssds (%eax), %ymm1, %ymm3 # encoding: [0x62,0xf2,0x77,0x28,0x51,0x18]
162 ; AVX10-X86-NEXT: vpdpbssds %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x77,0x28,0x51,0xc2]
163 ; AVX10-X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
164 ; AVX10-X86-NEXT: retl # encoding: [0xc3]
166 ; AVX10-X64-LABEL: test_int_x86_avx2_vpdpbssds_256:
167 ; AVX10-X64: # %bb.0:
168 ; AVX10-X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
169 ; AVX10-X64-NEXT: vpdpbssds (%rdi), %ymm1, %ymm3 # encoding: [0x62,0xf2,0x77,0x28,0x51,0x1f]
170 ; AVX10-X64-NEXT: vpdpbssds %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x77,0x28,0x51,0xc2]
171 ; AVX10-X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
172 ; AVX10-X64-NEXT: retq # encoding: [0xc3]
173 %x2 = load <8 x i32>, ptr %x2p
174 %1 = call <8 x i32> @llvm.x86.avx2.vpdpbssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
175 %2 = call <8 x i32> @llvm.x86.avx2.vpdpbssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
176 %res = add <8 x i32> %1, %2
180 declare <4 x i32> @llvm.x86.avx2.vpdpbsud.128(<4 x i32>, <4 x i32>, <4 x i32>)
182 define <4 x i32>@test_int_x86_avx2_vpdpbsud_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4) {
183 ; X86-LABEL: test_int_x86_avx2_vpdpbsud_128:
185 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
186 ; X86-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
187 ; X86-NEXT: vpdpbsud (%eax), %xmm1, %xmm3 # encoding: [0xc4,0xe2,0x72,0x50,0x18]
188 ; X86-NEXT: vpdpbsud %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x72,0x50,0xc2]
189 ; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfe,0xc0]
190 ; X86-NEXT: retl # encoding: [0xc3]
192 ; X64-LABEL: test_int_x86_avx2_vpdpbsud_128:
194 ; X64-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
195 ; X64-NEXT: vpdpbsud (%rdi), %xmm1, %xmm3 # encoding: [0xc4,0xe2,0x72,0x50,0x1f]
196 ; X64-NEXT: vpdpbsud %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x72,0x50,0xc2]
197 ; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfe,0xc0]
198 ; X64-NEXT: retq # encoding: [0xc3]
200 ; AVX10-X86-LABEL: test_int_x86_avx2_vpdpbsud_128:
201 ; AVX10-X86: # %bb.0:
202 ; AVX10-X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
203 ; AVX10-X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
204 ; AVX10-X86-NEXT: vpdpbsud (%eax), %xmm1, %xmm3 # encoding: [0x62,0xf2,0x76,0x08,0x50,0x18]
205 ; AVX10-X86-NEXT: vpdpbsud %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x76,0x08,0x50,0xc2]
206 ; AVX10-X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
207 ; AVX10-X86-NEXT: retl # encoding: [0xc3]
209 ; AVX10-X64-LABEL: test_int_x86_avx2_vpdpbsud_128:
210 ; AVX10-X64: # %bb.0:
211 ; AVX10-X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
212 ; AVX10-X64-NEXT: vpdpbsud (%rdi), %xmm1, %xmm3 # encoding: [0x62,0xf2,0x76,0x08,0x50,0x1f]
213 ; AVX10-X64-NEXT: vpdpbsud %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x76,0x08,0x50,0xc2]
214 ; AVX10-X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
215 ; AVX10-X64-NEXT: retq # encoding: [0xc3]
216 %x2 = load <4 x i32>, ptr %x2p
217 %1 = call <4 x i32> @llvm.x86.avx2.vpdpbsud.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
218 %2 = call <4 x i32> @llvm.x86.avx2.vpdpbsud.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
219 %res = add <4 x i32> %1, %2
223 declare <4 x i32> @llvm.x86.avx2.vpdpbsuds.128(<4 x i32>, <4 x i32>, <4 x i32>)
225 define <4 x i32>@test_int_x86_avx2_vpdpbsuds_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4) {
226 ; X86-LABEL: test_int_x86_avx2_vpdpbsuds_128:
228 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
229 ; X86-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
230 ; X86-NEXT: vpdpbsuds (%eax), %xmm1, %xmm3 # encoding: [0xc4,0xe2,0x72,0x51,0x18]
231 ; X86-NEXT: vpdpbsuds %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x72,0x51,0xc2]
232 ; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfe,0xc0]
233 ; X86-NEXT: retl # encoding: [0xc3]
235 ; X64-LABEL: test_int_x86_avx2_vpdpbsuds_128:
237 ; X64-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
238 ; X64-NEXT: vpdpbsuds (%rdi), %xmm1, %xmm3 # encoding: [0xc4,0xe2,0x72,0x51,0x1f]
239 ; X64-NEXT: vpdpbsuds %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x72,0x51,0xc2]
240 ; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfe,0xc0]
241 ; X64-NEXT: retq # encoding: [0xc3]
243 ; AVX10-X86-LABEL: test_int_x86_avx2_vpdpbsuds_128:
244 ; AVX10-X86: # %bb.0:
245 ; AVX10-X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
246 ; AVX10-X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
247 ; AVX10-X86-NEXT: vpdpbsuds (%eax), %xmm1, %xmm3 # encoding: [0x62,0xf2,0x76,0x08,0x51,0x18]
248 ; AVX10-X86-NEXT: vpdpbsuds %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x76,0x08,0x51,0xc2]
249 ; AVX10-X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
250 ; AVX10-X86-NEXT: retl # encoding: [0xc3]
252 ; AVX10-X64-LABEL: test_int_x86_avx2_vpdpbsuds_128:
253 ; AVX10-X64: # %bb.0:
254 ; AVX10-X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
255 ; AVX10-X64-NEXT: vpdpbsuds (%rdi), %xmm1, %xmm3 # encoding: [0x62,0xf2,0x76,0x08,0x51,0x1f]
256 ; AVX10-X64-NEXT: vpdpbsuds %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x76,0x08,0x51,0xc2]
257 ; AVX10-X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
258 ; AVX10-X64-NEXT: retq # encoding: [0xc3]
259 %x2 = load <4 x i32>, ptr %x2p
260 %1 = call <4 x i32> @llvm.x86.avx2.vpdpbsuds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
261 %2 = call <4 x i32> @llvm.x86.avx2.vpdpbsuds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
262 %res = add <4 x i32> %1, %2
266 declare <8 x i32> @llvm.x86.avx2.vpdpbsud.256(<8 x i32>, <8 x i32>, <8 x i32>)
268 define <8 x i32>@test_int_x86_avx2_vpdpbsud_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4) {
269 ; X86-LABEL: test_int_x86_avx2_vpdpbsud_256:
271 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
272 ; X86-NEXT: vmovaps %ymm0, %ymm3 # encoding: [0xc5,0xfc,0x28,0xd8]
273 ; X86-NEXT: vpdpbsud (%eax), %ymm1, %ymm3 # encoding: [0xc4,0xe2,0x76,0x50,0x18]
274 ; X86-NEXT: vpdpbsud %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x76,0x50,0xc2]
275 ; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # encoding: [0xc5,0xe5,0xfe,0xc0]
276 ; X86-NEXT: retl # encoding: [0xc3]
278 ; X64-LABEL: test_int_x86_avx2_vpdpbsud_256:
280 ; X64-NEXT: vmovaps %ymm0, %ymm3 # encoding: [0xc5,0xfc,0x28,0xd8]
281 ; X64-NEXT: vpdpbsud (%rdi), %ymm1, %ymm3 # encoding: [0xc4,0xe2,0x76,0x50,0x1f]
282 ; X64-NEXT: vpdpbsud %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x76,0x50,0xc2]
283 ; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # encoding: [0xc5,0xe5,0xfe,0xc0]
284 ; X64-NEXT: retq # encoding: [0xc3]
286 ; AVX10-X86-LABEL: test_int_x86_avx2_vpdpbsud_256:
287 ; AVX10-X86: # %bb.0:
288 ; AVX10-X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
289 ; AVX10-X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
290 ; AVX10-X86-NEXT: vpdpbsud (%eax), %ymm1, %ymm3 # encoding: [0x62,0xf2,0x76,0x28,0x50,0x18]
291 ; AVX10-X86-NEXT: vpdpbsud %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x76,0x28,0x50,0xc2]
292 ; AVX10-X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
293 ; AVX10-X86-NEXT: retl # encoding: [0xc3]
295 ; AVX10-X64-LABEL: test_int_x86_avx2_vpdpbsud_256:
296 ; AVX10-X64: # %bb.0:
297 ; AVX10-X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
298 ; AVX10-X64-NEXT: vpdpbsud (%rdi), %ymm1, %ymm3 # encoding: [0x62,0xf2,0x76,0x28,0x50,0x1f]
299 ; AVX10-X64-NEXT: vpdpbsud %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x76,0x28,0x50,0xc2]
300 ; AVX10-X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
301 ; AVX10-X64-NEXT: retq # encoding: [0xc3]
302 %x2 = load <8 x i32>, ptr %x2p
303 %1 = call <8 x i32> @llvm.x86.avx2.vpdpbsud.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
304 %2 = call <8 x i32> @llvm.x86.avx2.vpdpbsud.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
305 %res = add <8 x i32> %1, %2
309 declare <8 x i32> @llvm.x86.avx2.vpdpbsuds.256(<8 x i32>, <8 x i32>, <8 x i32>)
311 define <8 x i32>@test_int_x86_avx2_vpdpbsuds_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4) {
312 ; X86-LABEL: test_int_x86_avx2_vpdpbsuds_256:
314 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
315 ; X86-NEXT: vmovaps %ymm0, %ymm3 # encoding: [0xc5,0xfc,0x28,0xd8]
316 ; X86-NEXT: vpdpbsuds (%eax), %ymm1, %ymm3 # encoding: [0xc4,0xe2,0x76,0x51,0x18]
317 ; X86-NEXT: vpdpbsuds %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x76,0x51,0xc2]
318 ; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # encoding: [0xc5,0xe5,0xfe,0xc0]
319 ; X86-NEXT: retl # encoding: [0xc3]
321 ; X64-LABEL: test_int_x86_avx2_vpdpbsuds_256:
323 ; X64-NEXT: vmovaps %ymm0, %ymm3 # encoding: [0xc5,0xfc,0x28,0xd8]
324 ; X64-NEXT: vpdpbsuds (%rdi), %ymm1, %ymm3 # encoding: [0xc4,0xe2,0x76,0x51,0x1f]
325 ; X64-NEXT: vpdpbsuds %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x76,0x51,0xc2]
326 ; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # encoding: [0xc5,0xe5,0xfe,0xc0]
327 ; X64-NEXT: retq # encoding: [0xc3]
329 ; AVX10-X86-LABEL: test_int_x86_avx2_vpdpbsuds_256:
330 ; AVX10-X86: # %bb.0:
331 ; AVX10-X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
332 ; AVX10-X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
333 ; AVX10-X86-NEXT: vpdpbsuds (%eax), %ymm1, %ymm3 # encoding: [0x62,0xf2,0x76,0x28,0x51,0x18]
334 ; AVX10-X86-NEXT: vpdpbsuds %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x76,0x28,0x51,0xc2]
335 ; AVX10-X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
336 ; AVX10-X86-NEXT: retl # encoding: [0xc3]
338 ; AVX10-X64-LABEL: test_int_x86_avx2_vpdpbsuds_256:
339 ; AVX10-X64: # %bb.0:
340 ; AVX10-X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
341 ; AVX10-X64-NEXT: vpdpbsuds (%rdi), %ymm1, %ymm3 # encoding: [0x62,0xf2,0x76,0x28,0x51,0x1f]
342 ; AVX10-X64-NEXT: vpdpbsuds %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x76,0x28,0x51,0xc2]
343 ; AVX10-X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
344 ; AVX10-X64-NEXT: retq # encoding: [0xc3]
345 %x2 = load <8 x i32>, ptr %x2p
346 %1 = call <8 x i32> @llvm.x86.avx2.vpdpbsuds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
347 %2 = call <8 x i32> @llvm.x86.avx2.vpdpbsuds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
348 %res = add <8 x i32> %1, %2
352 declare <4 x i32> @llvm.x86.avx2.vpdpbuud.128(<4 x i32>, <4 x i32>, <4 x i32>)
354 define <4 x i32>@test_int_x86_avx2_vpdpbuud_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4) {
355 ; X86-LABEL: test_int_x86_avx2_vpdpbuud_128:
357 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
358 ; X86-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
359 ; X86-NEXT: vpdpbuud (%eax), %xmm1, %xmm3 # encoding: [0xc4,0xe2,0x70,0x50,0x18]
360 ; X86-NEXT: vpdpbuud %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x70,0x50,0xc2]
361 ; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfe,0xc0]
362 ; X86-NEXT: retl # encoding: [0xc3]
364 ; X64-LABEL: test_int_x86_avx2_vpdpbuud_128:
366 ; X64-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
367 ; X64-NEXT: vpdpbuud (%rdi), %xmm1, %xmm3 # encoding: [0xc4,0xe2,0x70,0x50,0x1f]
368 ; X64-NEXT: vpdpbuud %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x70,0x50,0xc2]
369 ; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfe,0xc0]
370 ; X64-NEXT: retq # encoding: [0xc3]
372 ; AVX10-X86-LABEL: test_int_x86_avx2_vpdpbuud_128:
373 ; AVX10-X86: # %bb.0:
374 ; AVX10-X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
375 ; AVX10-X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
376 ; AVX10-X86-NEXT: vpdpbuud (%eax), %xmm1, %xmm3 # encoding: [0x62,0xf2,0x74,0x08,0x50,0x18]
377 ; AVX10-X86-NEXT: vpdpbuud %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x74,0x08,0x50,0xc2]
378 ; AVX10-X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
379 ; AVX10-X86-NEXT: retl # encoding: [0xc3]
381 ; AVX10-X64-LABEL: test_int_x86_avx2_vpdpbuud_128:
382 ; AVX10-X64: # %bb.0:
383 ; AVX10-X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
384 ; AVX10-X64-NEXT: vpdpbuud (%rdi), %xmm1, %xmm3 # encoding: [0x62,0xf2,0x74,0x08,0x50,0x1f]
385 ; AVX10-X64-NEXT: vpdpbuud %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x74,0x08,0x50,0xc2]
386 ; AVX10-X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
387 ; AVX10-X64-NEXT: retq # encoding: [0xc3]
388 %x2 = load <4 x i32>, ptr %x2p
389 %1 = call <4 x i32> @llvm.x86.avx2.vpdpbuud.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
390 %2 = call <4 x i32> @llvm.x86.avx2.vpdpbuud.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
391 %res = add <4 x i32> %1, %2
395 declare <4 x i32> @llvm.x86.avx2.vpdpbuuds.128(<4 x i32>, <4 x i32>, <4 x i32>)
397 define <4 x i32>@test_int_x86_avx2_vpdpbuuds_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4) {
398 ; X86-LABEL: test_int_x86_avx2_vpdpbuuds_128:
400 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
401 ; X86-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
402 ; X86-NEXT: vpdpbuuds (%eax), %xmm1, %xmm3 # encoding: [0xc4,0xe2,0x70,0x51,0x18]
403 ; X86-NEXT: vpdpbuuds %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x70,0x51,0xc2]
404 ; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfe,0xc0]
405 ; X86-NEXT: retl # encoding: [0xc3]
407 ; X64-LABEL: test_int_x86_avx2_vpdpbuuds_128:
409 ; X64-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
410 ; X64-NEXT: vpdpbuuds (%rdi), %xmm1, %xmm3 # encoding: [0xc4,0xe2,0x70,0x51,0x1f]
411 ; X64-NEXT: vpdpbuuds %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x70,0x51,0xc2]
412 ; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfe,0xc0]
413 ; X64-NEXT: retq # encoding: [0xc3]
415 ; AVX10-X86-LABEL: test_int_x86_avx2_vpdpbuuds_128:
416 ; AVX10-X86: # %bb.0:
417 ; AVX10-X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
418 ; AVX10-X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
419 ; AVX10-X86-NEXT: vpdpbuuds (%eax), %xmm1, %xmm3 # encoding: [0x62,0xf2,0x74,0x08,0x51,0x18]
420 ; AVX10-X86-NEXT: vpdpbuuds %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x74,0x08,0x51,0xc2]
421 ; AVX10-X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
422 ; AVX10-X86-NEXT: retl # encoding: [0xc3]
424 ; AVX10-X64-LABEL: test_int_x86_avx2_vpdpbuuds_128:
425 ; AVX10-X64: # %bb.0:
426 ; AVX10-X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
427 ; AVX10-X64-NEXT: vpdpbuuds (%rdi), %xmm1, %xmm3 # encoding: [0x62,0xf2,0x74,0x08,0x51,0x1f]
428 ; AVX10-X64-NEXT: vpdpbuuds %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x74,0x08,0x51,0xc2]
429 ; AVX10-X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
430 ; AVX10-X64-NEXT: retq # encoding: [0xc3]
431 %x2 = load <4 x i32>, ptr %x2p
432 %1 = call <4 x i32> @llvm.x86.avx2.vpdpbuuds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
433 %2 = call <4 x i32> @llvm.x86.avx2.vpdpbuuds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
434 %res = add <4 x i32> %1, %2
438 declare <8 x i32> @llvm.x86.avx2.vpdpbuud.256(<8 x i32>, <8 x i32>, <8 x i32>)
440 define <8 x i32>@test_int_x86_avx2_vpdpbuud_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4) {
441 ; X86-LABEL: test_int_x86_avx2_vpdpbuud_256:
443 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
444 ; X86-NEXT: vmovaps %ymm0, %ymm3 # encoding: [0xc5,0xfc,0x28,0xd8]
445 ; X86-NEXT: vpdpbuud (%eax), %ymm1, %ymm3 # encoding: [0xc4,0xe2,0x74,0x50,0x18]
446 ; X86-NEXT: vpdpbuud %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x74,0x50,0xc2]
447 ; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # encoding: [0xc5,0xe5,0xfe,0xc0]
448 ; X86-NEXT: retl # encoding: [0xc3]
450 ; X64-LABEL: test_int_x86_avx2_vpdpbuud_256:
452 ; X64-NEXT: vmovaps %ymm0, %ymm3 # encoding: [0xc5,0xfc,0x28,0xd8]
453 ; X64-NEXT: vpdpbuud (%rdi), %ymm1, %ymm3 # encoding: [0xc4,0xe2,0x74,0x50,0x1f]
454 ; X64-NEXT: vpdpbuud %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x74,0x50,0xc2]
455 ; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # encoding: [0xc5,0xe5,0xfe,0xc0]
456 ; X64-NEXT: retq # encoding: [0xc3]
458 ; AVX10-X86-LABEL: test_int_x86_avx2_vpdpbuud_256:
459 ; AVX10-X86: # %bb.0:
460 ; AVX10-X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
461 ; AVX10-X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
462 ; AVX10-X86-NEXT: vpdpbuud (%eax), %ymm1, %ymm3 # encoding: [0x62,0xf2,0x74,0x28,0x50,0x18]
463 ; AVX10-X86-NEXT: vpdpbuud %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x74,0x28,0x50,0xc2]
464 ; AVX10-X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
465 ; AVX10-X86-NEXT: retl # encoding: [0xc3]
467 ; AVX10-X64-LABEL: test_int_x86_avx2_vpdpbuud_256:
468 ; AVX10-X64: # %bb.0:
469 ; AVX10-X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
470 ; AVX10-X64-NEXT: vpdpbuud (%rdi), %ymm1, %ymm3 # encoding: [0x62,0xf2,0x74,0x28,0x50,0x1f]
471 ; AVX10-X64-NEXT: vpdpbuud %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x74,0x28,0x50,0xc2]
472 ; AVX10-X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
473 ; AVX10-X64-NEXT: retq # encoding: [0xc3]
474 %x2 = load <8 x i32>, ptr %x2p
475 %1 = call <8 x i32> @llvm.x86.avx2.vpdpbuud.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
476 %2 = call <8 x i32> @llvm.x86.avx2.vpdpbuud.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
477 %res = add <8 x i32> %1, %2
481 declare <8 x i32> @llvm.x86.avx2.vpdpbuuds.256(<8 x i32>, <8 x i32>, <8 x i32>)
483 define <8 x i32>@test_int_x86_avx2_vpdpbuuds_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4) {
484 ; X86-LABEL: test_int_x86_avx2_vpdpbuuds_256:
486 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
487 ; X86-NEXT: vmovaps %ymm0, %ymm3 # encoding: [0xc5,0xfc,0x28,0xd8]
488 ; X86-NEXT: vpdpbuuds (%eax), %ymm1, %ymm3 # encoding: [0xc4,0xe2,0x74,0x51,0x18]
489 ; X86-NEXT: vpdpbuuds %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x74,0x51,0xc2]
490 ; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # encoding: [0xc5,0xe5,0xfe,0xc0]
491 ; X86-NEXT: retl # encoding: [0xc3]
493 ; X64-LABEL: test_int_x86_avx2_vpdpbuuds_256:
495 ; X64-NEXT: vmovaps %ymm0, %ymm3 # encoding: [0xc5,0xfc,0x28,0xd8]
496 ; X64-NEXT: vpdpbuuds (%rdi), %ymm1, %ymm3 # encoding: [0xc4,0xe2,0x74,0x51,0x1f]
497 ; X64-NEXT: vpdpbuuds %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x74,0x51,0xc2]
498 ; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # encoding: [0xc5,0xe5,0xfe,0xc0]
499 ; X64-NEXT: retq # encoding: [0xc3]
501 ; AVX10-X86-LABEL: test_int_x86_avx2_vpdpbuuds_256:
502 ; AVX10-X86: # %bb.0:
503 ; AVX10-X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
504 ; AVX10-X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
505 ; AVX10-X86-NEXT: vpdpbuuds (%eax), %ymm1, %ymm3 # encoding: [0x62,0xf2,0x74,0x28,0x51,0x18]
506 ; AVX10-X86-NEXT: vpdpbuuds %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x74,0x28,0x51,0xc2]
507 ; AVX10-X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
508 ; AVX10-X86-NEXT: retl # encoding: [0xc3]
510 ; AVX10-X64-LABEL: test_int_x86_avx2_vpdpbuuds_256:
511 ; AVX10-X64: # %bb.0:
512 ; AVX10-X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
513 ; AVX10-X64-NEXT: vpdpbuuds (%rdi), %ymm1, %ymm3 # encoding: [0x62,0xf2,0x74,0x28,0x51,0x1f]
514 ; AVX10-X64-NEXT: vpdpbuuds %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x74,0x28,0x51,0xc2]
515 ; AVX10-X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
516 ; AVX10-X64-NEXT: retq # encoding: [0xc3]
517 %x2 = load <8 x i32>, ptr %x2p
518 %1 = call <8 x i32> @llvm.x86.avx2.vpdpbuuds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
519 %2 = call <8 x i32> @llvm.x86.avx2.vpdpbuuds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
520 %res = add <8 x i32> %1, %2