1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512ifma --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
5 declare <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64>, <8 x i64>, <8 x i64>)
7 define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
8 ; CHECK-LABEL: test_int_x86_avx512_vpmadd52h_uq_512:
10 ; CHECK-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0xc2]
11 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
13 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
17 define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
18 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512:
20 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
21 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
22 ; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0xc2]
23 ; X86-NEXT: retl # encoding: [0xc3]
25 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512:
27 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
28 ; X64-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0xc2]
29 ; X64-NEXT: retq # encoding: [0xc3]
31 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
32 %2 = bitcast i8 %x3 to <8 x i1>
33 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
37 define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
38 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512:
40 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
41 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
42 ; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xc2]
43 ; X86-NEXT: retl # encoding: [0xc3]
45 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512:
47 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
48 ; X64-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xc2]
49 ; X64-NEXT: retq # encoding: [0xc3]
51 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
52 %2 = bitcast i8 %x3 to <8 x i1>
53 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
57 declare <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64>, <8 x i64>, <8 x i64>)
59 define <8 x i64>@test_int_x86_avx512_vpmadd52l_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
60 ; CHECK-LABEL: test_int_x86_avx512_vpmadd52l_uq_512:
62 ; CHECK-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb4,0xc2]
63 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
65 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
69 define <8 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
70 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_512:
72 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
73 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
74 ; X86-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb4,0xc2]
75 ; X86-NEXT: retl # encoding: [0xc3]
77 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_512:
79 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
80 ; X64-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb4,0xc2]
81 ; X64-NEXT: retq # encoding: [0xc3]
83 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
84 %2 = bitcast i8 %x3 to <8 x i1>
85 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
89 define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
90 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_512:
92 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
93 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
94 ; X86-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb4,0xc2]
95 ; X86-NEXT: retl # encoding: [0xc3]
97 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_512:
99 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
100 ; X64-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb4,0xc2]
101 ; X64-NEXT: retq # encoding: [0xc3]
103 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
104 %2 = bitcast i8 %x3 to <8 x i1>
105 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
109 define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, ptr %x2ptr) {
110 ; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load:
112 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
113 ; X86-NEXT: vpmadd52huq (%eax), %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0x00]
114 ; X86-NEXT: retl # encoding: [0xc3]
116 ; X64-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load:
118 ; X64-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0x07]
119 ; X64-NEXT: retq # encoding: [0xc3]
121 %x2 = load <8 x i64>, ptr %x2ptr
122 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
126 define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_bcast(<8 x i64> %x0, <8 x i64> %x1, ptr %x2ptr) {
127 ; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_bcast:
129 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
130 ; X86-NEXT: vpmadd52huq (%eax){1to8}, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xb5,0x00]
131 ; X86-NEXT: retl # encoding: [0xc3]
133 ; X64-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_bcast:
135 ; X64-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xb5,0x07]
136 ; X64-NEXT: retq # encoding: [0xc3]
138 %x2load = load i64, ptr %x2ptr
139 %x2insert = insertelement <8 x i64> undef, i64 %x2load, i64 0
140 %x2 = shufflevector <8 x i64> %x2insert, <8 x i64> undef, <8 x i32> zeroinitializer
141 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
145 define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, ptr %x1ptr, <8 x i64> %x2) {
146 ; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute:
148 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
149 ; X86-NEXT: vpmadd52huq (%eax), %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0x00]
150 ; X86-NEXT: retl # encoding: [0xc3]
152 ; X64-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute:
154 ; X64-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0x07]
155 ; X64-NEXT: retq # encoding: [0xc3]
157 %x1 = load <8 x i64>, ptr %x1ptr
158 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
162 define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_commute_bcast(<8 x i64> %x0, ptr %x1ptr, <8 x i64> %x2) {
163 ; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute_bcast:
165 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
166 ; X86-NEXT: vpmadd52huq (%eax){1to8}, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xb5,0x00]
167 ; X86-NEXT: retl # encoding: [0xc3]
169 ; X64-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute_bcast:
171 ; X64-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xb5,0x07]
172 ; X64-NEXT: retq # encoding: [0xc3]
174 %x1load = load i64, ptr %x1ptr
175 %x1insert = insertelement <8 x i64> undef, i64 %x1load, i64 0
176 %x1 = shufflevector <8 x i64> %x1insert, <8 x i64> undef, <8 x i32> zeroinitializer
177 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
181 define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, ptr %x2ptr, i8 %x3) {
182 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load:
184 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
185 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
186 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
187 ; X86-NEXT: vpmadd52huq (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0x00]
188 ; X86-NEXT: retl # encoding: [0xc3]
190 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load:
192 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
193 ; X64-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0x07]
194 ; X64-NEXT: retq # encoding: [0xc3]
196 %x2 = load <8 x i64>, ptr %x2ptr
197 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
198 %2 = bitcast i8 %x3 to <8 x i1>
199 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
203 define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_bcast(<8 x i64> %x0, <8 x i64> %x1, ptr %x2ptr, i8 %x3) {
204 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_bcast:
206 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
207 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
208 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
209 ; X86-NEXT: vpmadd52huq (%eax){1to8}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x59,0xb5,0x00]
210 ; X86-NEXT: retl # encoding: [0xc3]
212 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_bcast:
214 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
215 ; X64-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x59,0xb5,0x07]
216 ; X64-NEXT: retq # encoding: [0xc3]
218 %x2load = load i64, ptr %x2ptr
219 %x2insert = insertelement <8 x i64> undef, i64 %x2load, i64 0
220 %x2 = shufflevector <8 x i64> %x2insert, <8 x i64> undef, <8 x i32> zeroinitializer
221 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
222 %2 = bitcast i8 %x3 to <8 x i1>
223 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
227 define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, ptr %x1ptr, <8 x i64> %x2, i8 %x3) {
228 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute:
230 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
231 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
232 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
233 ; X86-NEXT: vpmadd52huq (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0x00]
234 ; X86-NEXT: retl # encoding: [0xc3]
236 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute:
238 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
239 ; X64-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0x07]
240 ; X64-NEXT: retq # encoding: [0xc3]
242 %x1 = load <8 x i64>, ptr %x1ptr
243 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
244 %2 = bitcast i8 %x3 to <8 x i1>
245 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
249 define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute_bcast(<8 x i64> %x0, ptr %x1ptr, <8 x i64> %x2, i8 %x3) {
250 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute_bcast:
252 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
253 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
254 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
255 ; X86-NEXT: vpmadd52huq (%eax){1to8}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x59,0xb5,0x00]
256 ; X86-NEXT: retl # encoding: [0xc3]
258 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute_bcast:
260 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
261 ; X64-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x59,0xb5,0x07]
262 ; X64-NEXT: retq # encoding: [0xc3]
264 %x1load = load i64, ptr %x1ptr
265 %x1insert = insertelement <8 x i64> undef, i64 %x1load, i64 0
266 %x1 = shufflevector <8 x i64> %x1insert, <8 x i64> undef, <8 x i32> zeroinitializer
267 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
268 %2 = bitcast i8 %x3 to <8 x i1>
269 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
273 define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, ptr %x2ptr, i8 %x3) {
274 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load:
276 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
277 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
278 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
279 ; X86-NEXT: vpmadd52huq (%eax), %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0x00]
280 ; X86-NEXT: retl # encoding: [0xc3]
282 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load:
284 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
285 ; X64-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0x07]
286 ; X64-NEXT: retq # encoding: [0xc3]
288 %x2 = load <8 x i64>, ptr %x2ptr
289 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
290 %2 = bitcast i8 %x3 to <8 x i1>
291 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
295 define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_bcast(<8 x i64> %x0, <8 x i64> %x1, ptr %x2ptr, i8 %x3) {
296 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_bcast:
298 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
299 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
300 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
301 ; X86-NEXT: vpmadd52huq (%eax){1to8}, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xd9,0xb5,0x00]
302 ; X86-NEXT: retl # encoding: [0xc3]
304 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_bcast:
306 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
307 ; X64-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xd9,0xb5,0x07]
308 ; X64-NEXT: retq # encoding: [0xc3]
310 %x2load = load i64, ptr %x2ptr
311 %x2insert = insertelement <8 x i64> undef, i64 %x2load, i64 0
312 %x2 = shufflevector <8 x i64> %x2insert, <8 x i64> undef, <8 x i32> zeroinitializer
313 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
314 %2 = bitcast i8 %x3 to <8 x i1>
315 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
319 define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, ptr %x1ptr, <8 x i64> %x2, i8 %x3) {
320 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute:
322 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
323 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
324 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
325 ; X86-NEXT: vpmadd52huq (%eax), %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0x00]
326 ; X86-NEXT: retl # encoding: [0xc3]
328 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute:
330 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
331 ; X64-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0x07]
332 ; X64-NEXT: retq # encoding: [0xc3]
334 %x1 = load <8 x i64>, ptr %x1ptr
335 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
336 %2 = bitcast i8 %x3 to <8 x i1>
337 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
341 define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute_bcast(<8 x i64> %x0, ptr %x1ptr, <8 x i64> %x2, i8 %x3) {
342 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute_bcast:
344 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
345 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
346 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
347 ; X86-NEXT: vpmadd52huq (%eax){1to8}, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xd9,0xb5,0x00]
348 ; X86-NEXT: retl # encoding: [0xc3]
350 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute_bcast:
352 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
353 ; X64-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xd9,0xb5,0x07]
354 ; X64-NEXT: retq # encoding: [0xc3]
356 %x1load = load i64, ptr %x1ptr
357 %x1insert = insertelement <8 x i64> undef, i64 %x1load, i64 0
358 %x1 = shufflevector <8 x i64> %x1insert, <8 x i64> undef, <8 x i32> zeroinitializer
359 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
360 %2 = bitcast i8 %x3 to <8 x i1>
361 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer