1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avxvnniint8 --show-mc-encoding | FileCheck %s --check-prefixes=X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxvnniint8 --show-mc-encoding | FileCheck %s --check-prefixes=X64
6 declare <4 x i32> @llvm.x86.avx2.vpdpbssd.128(<4 x i32>, <4 x i32>, <4 x i32>)
8 define <4 x i32>@test_int_x86_avx2_vpdpbssd_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4) {
9 ; X86-LABEL: test_int_x86_avx2_vpdpbssd_128:
11 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
12 ; X86-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
13 ; X86-NEXT: vpdpbssd (%eax), %xmm1, %xmm3 # encoding: [0xc4,0xe2,0x73,0x50,0x18]
14 ; X86-NEXT: vpdpbssd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x73,0x50,0xc2]
15 ; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfe,0xc0]
16 ; X86-NEXT: retl # encoding: [0xc3]
18 ; X64-LABEL: test_int_x86_avx2_vpdpbssd_128:
20 ; X64-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
21 ; X64-NEXT: vpdpbssd (%rdi), %xmm1, %xmm3 # encoding: [0xc4,0xe2,0x73,0x50,0x1f]
22 ; X64-NEXT: vpdpbssd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x73,0x50,0xc2]
23 ; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfe,0xc0]
24 ; X64-NEXT: retq # encoding: [0xc3]
25 %x2 = load <4 x i32>, ptr %x2p
26 %1 = call <4 x i32> @llvm.x86.avx2.vpdpbssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
27 %2 = call <4 x i32> @llvm.x86.avx2.vpdpbssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
28 %res = add <4 x i32> %1, %2
32 declare <4 x i32> @llvm.x86.avx2.vpdpbssds.128(<4 x i32>, <4 x i32>, <4 x i32>)
34 define <4 x i32>@test_int_x86_avx2_vpdpbssds_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4) {
35 ; X86-LABEL: test_int_x86_avx2_vpdpbssds_128:
37 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
38 ; X86-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
39 ; X86-NEXT: vpdpbssds (%eax), %xmm1, %xmm3 # encoding: [0xc4,0xe2,0x73,0x51,0x18]
40 ; X86-NEXT: vpdpbssds %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x73,0x51,0xc2]
41 ; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfe,0xc0]
42 ; X86-NEXT: retl # encoding: [0xc3]
44 ; X64-LABEL: test_int_x86_avx2_vpdpbssds_128:
46 ; X64-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
47 ; X64-NEXT: vpdpbssds (%rdi), %xmm1, %xmm3 # encoding: [0xc4,0xe2,0x73,0x51,0x1f]
48 ; X64-NEXT: vpdpbssds %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x73,0x51,0xc2]
49 ; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfe,0xc0]
50 ; X64-NEXT: retq # encoding: [0xc3]
51 %x2 = load <4 x i32>, ptr %x2p
52 %1 = call <4 x i32> @llvm.x86.avx2.vpdpbssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
53 %2 = call <4 x i32> @llvm.x86.avx2.vpdpbssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
54 %res = add <4 x i32> %1, %2
58 declare <8 x i32> @llvm.x86.avx2.vpdpbssd.256(<8 x i32>, <8 x i32>, <8 x i32>)
60 define <8 x i32>@test_int_x86_avx2_vpdpbssd_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4) {
61 ; X86-LABEL: test_int_x86_avx2_vpdpbssd_256:
63 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
64 ; X86-NEXT: vmovaps %ymm0, %ymm3 # encoding: [0xc5,0xfc,0x28,0xd8]
65 ; X86-NEXT: vpdpbssd (%eax), %ymm1, %ymm3 # encoding: [0xc4,0xe2,0x77,0x50,0x18]
66 ; X86-NEXT: vpdpbssd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x77,0x50,0xc2]
67 ; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # encoding: [0xc5,0xe5,0xfe,0xc0]
68 ; X86-NEXT: retl # encoding: [0xc3]
70 ; X64-LABEL: test_int_x86_avx2_vpdpbssd_256:
72 ; X64-NEXT: vmovaps %ymm0, %ymm3 # encoding: [0xc5,0xfc,0x28,0xd8]
73 ; X64-NEXT: vpdpbssd (%rdi), %ymm1, %ymm3 # encoding: [0xc4,0xe2,0x77,0x50,0x1f]
74 ; X64-NEXT: vpdpbssd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x77,0x50,0xc2]
75 ; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # encoding: [0xc5,0xe5,0xfe,0xc0]
76 ; X64-NEXT: retq # encoding: [0xc3]
77 %x2 = load <8 x i32>, ptr %x2p
78 %1 = call <8 x i32> @llvm.x86.avx2.vpdpbssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
79 %2 = call <8 x i32> @llvm.x86.avx2.vpdpbssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
80 %res = add <8 x i32> %1, %2
84 declare <8 x i32> @llvm.x86.avx2.vpdpbssds.256(<8 x i32>, <8 x i32>, <8 x i32>)
86 define <8 x i32>@test_int_x86_avx2_vpdpbssds_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4) {
87 ; X86-LABEL: test_int_x86_avx2_vpdpbssds_256:
89 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
90 ; X86-NEXT: vmovaps %ymm0, %ymm3 # encoding: [0xc5,0xfc,0x28,0xd8]
91 ; X86-NEXT: vpdpbssds (%eax), %ymm1, %ymm3 # encoding: [0xc4,0xe2,0x77,0x51,0x18]
92 ; X86-NEXT: vpdpbssds %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x77,0x51,0xc2]
93 ; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # encoding: [0xc5,0xe5,0xfe,0xc0]
94 ; X86-NEXT: retl # encoding: [0xc3]
96 ; X64-LABEL: test_int_x86_avx2_vpdpbssds_256:
98 ; X64-NEXT: vmovaps %ymm0, %ymm3 # encoding: [0xc5,0xfc,0x28,0xd8]
99 ; X64-NEXT: vpdpbssds (%rdi), %ymm1, %ymm3 # encoding: [0xc4,0xe2,0x77,0x51,0x1f]
100 ; X64-NEXT: vpdpbssds %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x77,0x51,0xc2]
101 ; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # encoding: [0xc5,0xe5,0xfe,0xc0]
102 ; X64-NEXT: retq # encoding: [0xc3]
103 %x2 = load <8 x i32>, ptr %x2p
104 %1 = call <8 x i32> @llvm.x86.avx2.vpdpbssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
105 %2 = call <8 x i32> @llvm.x86.avx2.vpdpbssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
106 %res = add <8 x i32> %1, %2
110 declare <4 x i32> @llvm.x86.avx2.vpdpbsud.128(<4 x i32>, <4 x i32>, <4 x i32>)
112 define <4 x i32>@test_int_x86_avx2_vpdpbsud_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4) {
113 ; X86-LABEL: test_int_x86_avx2_vpdpbsud_128:
115 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
116 ; X86-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
117 ; X86-NEXT: vpdpbsud (%eax), %xmm1, %xmm3 # encoding: [0xc4,0xe2,0x72,0x50,0x18]
118 ; X86-NEXT: vpdpbsud %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x72,0x50,0xc2]
119 ; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfe,0xc0]
120 ; X86-NEXT: retl # encoding: [0xc3]
122 ; X64-LABEL: test_int_x86_avx2_vpdpbsud_128:
124 ; X64-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
125 ; X64-NEXT: vpdpbsud (%rdi), %xmm1, %xmm3 # encoding: [0xc4,0xe2,0x72,0x50,0x1f]
126 ; X64-NEXT: vpdpbsud %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x72,0x50,0xc2]
127 ; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfe,0xc0]
128 ; X64-NEXT: retq # encoding: [0xc3]
129 %x2 = load <4 x i32>, ptr %x2p
130 %1 = call <4 x i32> @llvm.x86.avx2.vpdpbsud.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
131 %2 = call <4 x i32> @llvm.x86.avx2.vpdpbsud.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
132 %res = add <4 x i32> %1, %2
136 declare <4 x i32> @llvm.x86.avx2.vpdpbsuds.128(<4 x i32>, <4 x i32>, <4 x i32>)
138 define <4 x i32>@test_int_x86_avx2_vpdpbsuds_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4) {
139 ; X86-LABEL: test_int_x86_avx2_vpdpbsuds_128:
141 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
142 ; X86-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
143 ; X86-NEXT: vpdpbsuds (%eax), %xmm1, %xmm3 # encoding: [0xc4,0xe2,0x72,0x51,0x18]
144 ; X86-NEXT: vpdpbsuds %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x72,0x51,0xc2]
145 ; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfe,0xc0]
146 ; X86-NEXT: retl # encoding: [0xc3]
148 ; X64-LABEL: test_int_x86_avx2_vpdpbsuds_128:
150 ; X64-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
151 ; X64-NEXT: vpdpbsuds (%rdi), %xmm1, %xmm3 # encoding: [0xc4,0xe2,0x72,0x51,0x1f]
152 ; X64-NEXT: vpdpbsuds %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x72,0x51,0xc2]
153 ; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfe,0xc0]
154 ; X64-NEXT: retq # encoding: [0xc3]
155 %x2 = load <4 x i32>, ptr %x2p
156 %1 = call <4 x i32> @llvm.x86.avx2.vpdpbsuds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
157 %2 = call <4 x i32> @llvm.x86.avx2.vpdpbsuds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
158 %res = add <4 x i32> %1, %2
162 declare <8 x i32> @llvm.x86.avx2.vpdpbsud.256(<8 x i32>, <8 x i32>, <8 x i32>)
164 define <8 x i32>@test_int_x86_avx2_vpdpbsud_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4) {
165 ; X86-LABEL: test_int_x86_avx2_vpdpbsud_256:
167 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
168 ; X86-NEXT: vmovaps %ymm0, %ymm3 # encoding: [0xc5,0xfc,0x28,0xd8]
169 ; X86-NEXT: vpdpbsud (%eax), %ymm1, %ymm3 # encoding: [0xc4,0xe2,0x76,0x50,0x18]
170 ; X86-NEXT: vpdpbsud %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x76,0x50,0xc2]
171 ; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # encoding: [0xc5,0xe5,0xfe,0xc0]
172 ; X86-NEXT: retl # encoding: [0xc3]
174 ; X64-LABEL: test_int_x86_avx2_vpdpbsud_256:
176 ; X64-NEXT: vmovaps %ymm0, %ymm3 # encoding: [0xc5,0xfc,0x28,0xd8]
177 ; X64-NEXT: vpdpbsud (%rdi), %ymm1, %ymm3 # encoding: [0xc4,0xe2,0x76,0x50,0x1f]
178 ; X64-NEXT: vpdpbsud %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x76,0x50,0xc2]
179 ; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # encoding: [0xc5,0xe5,0xfe,0xc0]
180 ; X64-NEXT: retq # encoding: [0xc3]
181 %x2 = load <8 x i32>, ptr %x2p
182 %1 = call <8 x i32> @llvm.x86.avx2.vpdpbsud.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
183 %2 = call <8 x i32> @llvm.x86.avx2.vpdpbsud.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
184 %res = add <8 x i32> %1, %2
188 declare <8 x i32> @llvm.x86.avx2.vpdpbsuds.256(<8 x i32>, <8 x i32>, <8 x i32>)
190 define <8 x i32>@test_int_x86_avx2_vpdpbsuds_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4) {
191 ; X86-LABEL: test_int_x86_avx2_vpdpbsuds_256:
193 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
194 ; X86-NEXT: vmovaps %ymm0, %ymm3 # encoding: [0xc5,0xfc,0x28,0xd8]
195 ; X86-NEXT: vpdpbsuds (%eax), %ymm1, %ymm3 # encoding: [0xc4,0xe2,0x76,0x51,0x18]
196 ; X86-NEXT: vpdpbsuds %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x76,0x51,0xc2]
197 ; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # encoding: [0xc5,0xe5,0xfe,0xc0]
198 ; X86-NEXT: retl # encoding: [0xc3]
200 ; X64-LABEL: test_int_x86_avx2_vpdpbsuds_256:
202 ; X64-NEXT: vmovaps %ymm0, %ymm3 # encoding: [0xc5,0xfc,0x28,0xd8]
203 ; X64-NEXT: vpdpbsuds (%rdi), %ymm1, %ymm3 # encoding: [0xc4,0xe2,0x76,0x51,0x1f]
204 ; X64-NEXT: vpdpbsuds %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x76,0x51,0xc2]
205 ; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # encoding: [0xc5,0xe5,0xfe,0xc0]
206 ; X64-NEXT: retq # encoding: [0xc3]
207 %x2 = load <8 x i32>, ptr %x2p
208 %1 = call <8 x i32> @llvm.x86.avx2.vpdpbsuds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
209 %2 = call <8 x i32> @llvm.x86.avx2.vpdpbsuds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
210 %res = add <8 x i32> %1, %2
214 declare <4 x i32> @llvm.x86.avx2.vpdpbuud.128(<4 x i32>, <4 x i32>, <4 x i32>)
216 define <4 x i32>@test_int_x86_avx2_vpdpbuud_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4) {
217 ; X86-LABEL: test_int_x86_avx2_vpdpbuud_128:
219 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
220 ; X86-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
221 ; X86-NEXT: vpdpbuud (%eax), %xmm1, %xmm3 # encoding: [0xc4,0xe2,0x70,0x50,0x18]
222 ; X86-NEXT: vpdpbuud %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x70,0x50,0xc2]
223 ; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfe,0xc0]
224 ; X86-NEXT: retl # encoding: [0xc3]
226 ; X64-LABEL: test_int_x86_avx2_vpdpbuud_128:
228 ; X64-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
229 ; X64-NEXT: vpdpbuud (%rdi), %xmm1, %xmm3 # encoding: [0xc4,0xe2,0x70,0x50,0x1f]
230 ; X64-NEXT: vpdpbuud %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x70,0x50,0xc2]
231 ; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfe,0xc0]
232 ; X64-NEXT: retq # encoding: [0xc3]
233 %x2 = load <4 x i32>, ptr %x2p
234 %1 = call <4 x i32> @llvm.x86.avx2.vpdpbuud.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
235 %2 = call <4 x i32> @llvm.x86.avx2.vpdpbuud.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
236 %res = add <4 x i32> %1, %2
240 declare <4 x i32> @llvm.x86.avx2.vpdpbuuds.128(<4 x i32>, <4 x i32>, <4 x i32>)
242 define <4 x i32>@test_int_x86_avx2_vpdpbuuds_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4) {
243 ; X86-LABEL: test_int_x86_avx2_vpdpbuuds_128:
245 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
246 ; X86-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
247 ; X86-NEXT: vpdpbuuds (%eax), %xmm1, %xmm3 # encoding: [0xc4,0xe2,0x70,0x51,0x18]
248 ; X86-NEXT: vpdpbuuds %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x70,0x51,0xc2]
249 ; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfe,0xc0]
250 ; X86-NEXT: retl # encoding: [0xc3]
252 ; X64-LABEL: test_int_x86_avx2_vpdpbuuds_128:
254 ; X64-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
255 ; X64-NEXT: vpdpbuuds (%rdi), %xmm1, %xmm3 # encoding: [0xc4,0xe2,0x70,0x51,0x1f]
256 ; X64-NEXT: vpdpbuuds %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x70,0x51,0xc2]
257 ; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfe,0xc0]
258 ; X64-NEXT: retq # encoding: [0xc3]
259 %x2 = load <4 x i32>, ptr %x2p
260 %1 = call <4 x i32> @llvm.x86.avx2.vpdpbuuds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
261 %2 = call <4 x i32> @llvm.x86.avx2.vpdpbuuds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
262 %res = add <4 x i32> %1, %2
266 declare <8 x i32> @llvm.x86.avx2.vpdpbuud.256(<8 x i32>, <8 x i32>, <8 x i32>)
268 define <8 x i32>@test_int_x86_avx2_vpdpbuud_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4) {
269 ; X86-LABEL: test_int_x86_avx2_vpdpbuud_256:
271 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
272 ; X86-NEXT: vmovaps %ymm0, %ymm3 # encoding: [0xc5,0xfc,0x28,0xd8]
273 ; X86-NEXT: vpdpbuud (%eax), %ymm1, %ymm3 # encoding: [0xc4,0xe2,0x74,0x50,0x18]
274 ; X86-NEXT: vpdpbuud %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x74,0x50,0xc2]
275 ; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # encoding: [0xc5,0xe5,0xfe,0xc0]
276 ; X86-NEXT: retl # encoding: [0xc3]
278 ; X64-LABEL: test_int_x86_avx2_vpdpbuud_256:
280 ; X64-NEXT: vmovaps %ymm0, %ymm3 # encoding: [0xc5,0xfc,0x28,0xd8]
281 ; X64-NEXT: vpdpbuud (%rdi), %ymm1, %ymm3 # encoding: [0xc4,0xe2,0x74,0x50,0x1f]
282 ; X64-NEXT: vpdpbuud %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x74,0x50,0xc2]
283 ; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # encoding: [0xc5,0xe5,0xfe,0xc0]
284 ; X64-NEXT: retq # encoding: [0xc3]
285 %x2 = load <8 x i32>, ptr %x2p
286 %1 = call <8 x i32> @llvm.x86.avx2.vpdpbuud.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
287 %2 = call <8 x i32> @llvm.x86.avx2.vpdpbuud.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
288 %res = add <8 x i32> %1, %2
292 declare <8 x i32> @llvm.x86.avx2.vpdpbuuds.256(<8 x i32>, <8 x i32>, <8 x i32>)
294 define <8 x i32>@test_int_x86_avx2_vpdpbuuds_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4) {
295 ; X86-LABEL: test_int_x86_avx2_vpdpbuuds_256:
297 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
298 ; X86-NEXT: vmovaps %ymm0, %ymm3 # encoding: [0xc5,0xfc,0x28,0xd8]
299 ; X86-NEXT: vpdpbuuds (%eax), %ymm1, %ymm3 # encoding: [0xc4,0xe2,0x74,0x51,0x18]
300 ; X86-NEXT: vpdpbuuds %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x74,0x51,0xc2]
301 ; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # encoding: [0xc5,0xe5,0xfe,0xc0]
302 ; X86-NEXT: retl # encoding: [0xc3]
304 ; X64-LABEL: test_int_x86_avx2_vpdpbuuds_256:
306 ; X64-NEXT: vmovaps %ymm0, %ymm3 # encoding: [0xc5,0xfc,0x28,0xd8]
307 ; X64-NEXT: vpdpbuuds (%rdi), %ymm1, %ymm3 # encoding: [0xc4,0xe2,0x74,0x51,0x1f]
308 ; X64-NEXT: vpdpbuuds %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x74,0x51,0xc2]
309 ; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # encoding: [0xc5,0xe5,0xfe,0xc0]
310 ; X64-NEXT: retq # encoding: [0xc3]
311 %x2 = load <8 x i32>, ptr %x2p
312 %1 = call <8 x i32> @llvm.x86.avx2.vpdpbuuds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
313 %2 = call <8 x i32> @llvm.x86.avx2.vpdpbuuds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
314 %res = add <8 x i32> %1, %2