1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -O3 -disable-peephole -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avxvnniint16 | FileCheck %s
4 declare <4 x i32> @llvm.x86.avx2.vpdpwsud.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C)
5 declare <8 x i32> @llvm.x86.avx2.vpdpwsud.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C)
6 declare <4 x i32> @llvm.x86.avx2.vpdpwsuds.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C)
7 declare <8 x i32> @llvm.x86.avx2.vpdpwsuds.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C)
8 declare <4 x i32> @llvm.x86.avx2.vpdpwusd.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C)
9 declare <8 x i32> @llvm.x86.avx2.vpdpwusd.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C)
10 declare <4 x i32> @llvm.x86.avx2.vpdpwusds.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C)
11 declare <8 x i32> @llvm.x86.avx2.vpdpwusds.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C)
12 declare <4 x i32> @llvm.x86.avx2.vpdpwuud.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C)
13 declare <8 x i32> @llvm.x86.avx2.vpdpwuud.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C)
14 declare <4 x i32> @llvm.x86.avx2.vpdpwuuds.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C)
15 declare <8 x i32> @llvm.x86.avx2.vpdpwuuds.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C)
17 define <4 x i32> @test_int_x86_avx2_vpdpwsud_128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) {
18 ; CHECK-LABEL: test_int_x86_avx2_vpdpwsud_128:
20 ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
21 ; CHECK-NEXT: # encoding: [0xc5,0xf8,0x29,0x54,0x24,0xe8]
23 ; CHECK-NEXT: nop # encoding: [0x90]
25 ; CHECK-NEXT: vpdpwsud {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload
26 ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x72,0xd2,0x44,0x24,0xe8]
27 ; CHECK-NEXT: retq # encoding: [0xc3]
28 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
29 %ret = call <4 x i32> @llvm.x86.avx2.vpdpwsud.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C)
33 define <8 x i32> @test_int_x86_avx2_vpdpwsud_256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) {
34 ; CHECK-LABEL: test_int_x86_avx2_vpdpwsud_256:
36 ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
37 ; CHECK-NEXT: # encoding: [0xc5,0xfc,0x11,0x54,0x24,0xd8]
39 ; CHECK-NEXT: nop # encoding: [0x90]
41 ; CHECK-NEXT: vpdpwsud {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload
42 ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x76,0xd2,0x44,0x24,0xd8]
43 ; CHECK-NEXT: retq # encoding: [0xc3]
44 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
45 %ret = call <8 x i32> @llvm.x86.avx2.vpdpwsud.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C)
49 define <4 x i32> @test_int_x86_avx2_vpdpwsuds_128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) {
50 ; CHECK-LABEL: test_int_x86_avx2_vpdpwsuds_128:
52 ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
53 ; CHECK-NEXT: # encoding: [0xc5,0xf8,0x29,0x54,0x24,0xe8]
55 ; CHECK-NEXT: nop # encoding: [0x90]
57 ; CHECK-NEXT: vpdpwsuds {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload
58 ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x72,0xd3,0x44,0x24,0xe8]
59 ; CHECK-NEXT: retq # encoding: [0xc3]
60 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
61 %ret = call <4 x i32> @llvm.x86.avx2.vpdpwsuds.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C)
65 define <8 x i32> @test_int_x86_avx2_vpdpwsuds_256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) {
66 ; CHECK-LABEL: test_int_x86_avx2_vpdpwsuds_256:
68 ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
69 ; CHECK-NEXT: # encoding: [0xc5,0xfc,0x11,0x54,0x24,0xd8]
71 ; CHECK-NEXT: nop # encoding: [0x90]
73 ; CHECK-NEXT: vpdpwsuds {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload
74 ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x76,0xd3,0x44,0x24,0xd8]
75 ; CHECK-NEXT: retq # encoding: [0xc3]
76 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
77 %ret = call <8 x i32> @llvm.x86.avx2.vpdpwsuds.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C)
81 define <4 x i32> @test_int_x86_avx2_vpdpwusd_128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) {
82 ; CHECK-LABEL: test_int_x86_avx2_vpdpwusd_128:
84 ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
85 ; CHECK-NEXT: # encoding: [0xc5,0xf8,0x29,0x54,0x24,0xe8]
87 ; CHECK-NEXT: nop # encoding: [0x90]
89 ; CHECK-NEXT: vpdpwusd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload
90 ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x71,0xd2,0x44,0x24,0xe8]
91 ; CHECK-NEXT: retq # encoding: [0xc3]
92 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
93 %ret = call <4 x i32> @llvm.x86.avx2.vpdpwusd.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C)
97 define <8 x i32> @test_int_x86_avx2_vpdpwusd_256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) {
98 ; CHECK-LABEL: test_int_x86_avx2_vpdpwusd_256:
100 ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
101 ; CHECK-NEXT: # encoding: [0xc5,0xfc,0x11,0x54,0x24,0xd8]
103 ; CHECK-NEXT: nop # encoding: [0x90]
104 ; CHECK-NEXT: #NO_APP
105 ; CHECK-NEXT: vpdpwusd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload
106 ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x75,0xd2,0x44,0x24,0xd8]
107 ; CHECK-NEXT: retq # encoding: [0xc3]
108 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
109 %ret = call <8 x i32> @llvm.x86.avx2.vpdpwusd.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C)
113 define <4 x i32> @test_int_x86_avx2_vpdpwusds_128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) {
114 ; CHECK-LABEL: test_int_x86_avx2_vpdpwusds_128:
116 ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
117 ; CHECK-NEXT: # encoding: [0xc5,0xf8,0x29,0x54,0x24,0xe8]
119 ; CHECK-NEXT: nop # encoding: [0x90]
120 ; CHECK-NEXT: #NO_APP
121 ; CHECK-NEXT: vpdpwusds {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload
122 ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x71,0xd3,0x44,0x24,0xe8]
123 ; CHECK-NEXT: retq # encoding: [0xc3]
124 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
125 %ret = call <4 x i32> @llvm.x86.avx2.vpdpwusds.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C)
129 define <8 x i32> @test_int_x86_avx2_vpdpwusds_256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) {
130 ; CHECK-LABEL: test_int_x86_avx2_vpdpwusds_256:
132 ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
133 ; CHECK-NEXT: # encoding: [0xc5,0xfc,0x11,0x54,0x24,0xd8]
135 ; CHECK-NEXT: nop # encoding: [0x90]
136 ; CHECK-NEXT: #NO_APP
137 ; CHECK-NEXT: vpdpwusds {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload
138 ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x75,0xd3,0x44,0x24,0xd8]
139 ; CHECK-NEXT: retq # encoding: [0xc3]
140 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
141 %ret = call <8 x i32> @llvm.x86.avx2.vpdpwusds.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C)
145 define <4 x i32> @test_int_x86_avx2_vpdpwuud_128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) {
146 ; CHECK-LABEL: test_int_x86_avx2_vpdpwuud_128:
148 ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
149 ; CHECK-NEXT: # encoding: [0xc5,0xf8,0x29,0x54,0x24,0xe8]
151 ; CHECK-NEXT: nop # encoding: [0x90]
152 ; CHECK-NEXT: #NO_APP
153 ; CHECK-NEXT: vpdpwuud {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload
154 ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x70,0xd2,0x44,0x24,0xe8]
155 ; CHECK-NEXT: retq # encoding: [0xc3]
156 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
157 %ret = call <4 x i32> @llvm.x86.avx2.vpdpwuud.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C)
161 define <4 x i32> @test_int_x86_avx2_vpdpwuud_128_commuted(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) {
162 ; CHECK-LABEL: test_int_x86_avx2_vpdpwuud_128_commuted:
164 ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
165 ; CHECK-NEXT: # encoding: [0xc5,0xf8,0x29,0x54,0x24,0xe8]
167 ; CHECK-NEXT: nop # encoding: [0x90]
168 ; CHECK-NEXT: #NO_APP
169 ; CHECK-NEXT: vpdpwuud {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload
170 ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x70,0xd2,0x44,0x24,0xe8]
171 ; CHECK-NEXT: retq # encoding: [0xc3]
172 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
173 %ret = call <4 x i32> @llvm.x86.avx2.vpdpwuud.128(<4 x i32> %A, <4 x i32> %C, <4 x i32> %B)
177 define <8 x i32> @test_int_x86_avx2_vpdpwuud_256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) {
178 ; CHECK-LABEL: test_int_x86_avx2_vpdpwuud_256:
180 ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
181 ; CHECK-NEXT: # encoding: [0xc5,0xfc,0x11,0x54,0x24,0xd8]
183 ; CHECK-NEXT: nop # encoding: [0x90]
184 ; CHECK-NEXT: #NO_APP
185 ; CHECK-NEXT: vpdpwuud {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload
186 ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x74,0xd2,0x44,0x24,0xd8]
187 ; CHECK-NEXT: retq # encoding: [0xc3]
188 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
189 %ret = call <8 x i32> @llvm.x86.avx2.vpdpwuud.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C)
193 define <8 x i32> @test_int_x86_avx2_vpdpwuud_256_commuted(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) {
194 ; CHECK-LABEL: test_int_x86_avx2_vpdpwuud_256_commuted:
196 ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
197 ; CHECK-NEXT: # encoding: [0xc5,0xfc,0x11,0x54,0x24,0xd8]
199 ; CHECK-NEXT: nop # encoding: [0x90]
200 ; CHECK-NEXT: #NO_APP
201 ; CHECK-NEXT: vpdpwuud {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload
202 ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x74,0xd2,0x44,0x24,0xd8]
203 ; CHECK-NEXT: retq # encoding: [0xc3]
204 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
205 %ret = call <8 x i32> @llvm.x86.avx2.vpdpwuud.256(<8 x i32> %A, <8 x i32> %C, <8 x i32> %B)
209 define <4 x i32> @test_int_x86_avx2_vpdpwuuds_128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) {
210 ; CHECK-LABEL: test_int_x86_avx2_vpdpwuuds_128:
212 ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
213 ; CHECK-NEXT: # encoding: [0xc5,0xf8,0x29,0x54,0x24,0xe8]
215 ; CHECK-NEXT: nop # encoding: [0x90]
216 ; CHECK-NEXT: #NO_APP
217 ; CHECK-NEXT: vpdpwuuds {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload
218 ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x70,0xd3,0x44,0x24,0xe8]
219 ; CHECK-NEXT: retq # encoding: [0xc3]
220 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
221 %ret = call <4 x i32> @llvm.x86.avx2.vpdpwuuds.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C)
225 define <4 x i32> @test_int_x86_avx2_vpdpwuuds_128_commuted(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) {
226 ; CHECK-LABEL: test_int_x86_avx2_vpdpwuuds_128_commuted:
228 ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
229 ; CHECK-NEXT: # encoding: [0xc5,0xf8,0x29,0x54,0x24,0xe8]
231 ; CHECK-NEXT: nop # encoding: [0x90]
232 ; CHECK-NEXT: #NO_APP
233 ; CHECK-NEXT: vpdpwuuds {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload
234 ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x70,0xd3,0x44,0x24,0xe8]
235 ; CHECK-NEXT: retq # encoding: [0xc3]
236 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
237 %ret = call <4 x i32> @llvm.x86.avx2.vpdpwuuds.128(<4 x i32> %A, <4 x i32> %C, <4 x i32> %B)
241 define <8 x i32> @test_int_x86_avx2_vpdpwuuds_256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) {
242 ; CHECK-LABEL: test_int_x86_avx2_vpdpwuuds_256:
244 ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
245 ; CHECK-NEXT: # encoding: [0xc5,0xfc,0x11,0x54,0x24,0xd8]
247 ; CHECK-NEXT: nop # encoding: [0x90]
248 ; CHECK-NEXT: #NO_APP
249 ; CHECK-NEXT: vpdpwuuds {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload
250 ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x74,0xd3,0x44,0x24,0xd8]
251 ; CHECK-NEXT: retq # encoding: [0xc3]
252 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
253 %ret = call <8 x i32> @llvm.x86.avx2.vpdpwuuds.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C)
257 define <8 x i32> @test_int_x86_avx2_vpdpwuuds_256_commuted(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) {
258 ; CHECK-LABEL: test_int_x86_avx2_vpdpwuuds_256_commuted:
260 ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
261 ; CHECK-NEXT: # encoding: [0xc5,0xfc,0x11,0x54,0x24,0xd8]
263 ; CHECK-NEXT: nop # encoding: [0x90]
264 ; CHECK-NEXT: #NO_APP
265 ; CHECK-NEXT: vpdpwuuds {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload
266 ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x74,0xd3,0x44,0x24,0xd8]
267 ; CHECK-NEXT: retq # encoding: [0xc3]
268 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
269 %ret = call <8 x i32> @llvm.x86.avx2.vpdpwuuds.256(<8 x i32> %A, <8 x i32> %C, <8 x i32> %B)