1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512ifma,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
5 declare <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
7 define <2 x i64>@test_int_x86_avx512_vpmadd52h_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
8 ; CHECK-LABEL: test_int_x86_avx512_vpmadd52h_uq_128:
10 ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0xb5,0xc2]
11 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
13 %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
17 define <2 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
18 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_128:
20 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
21 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
22 ; X86-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb5,0xc2]
23 ; X86-NEXT: retl # encoding: [0xc3]
25 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_128:
27 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
28 ; X64-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb5,0xc2]
29 ; X64-NEXT: retq # encoding: [0xc3]
31 %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
32 %2 = bitcast i8 %x3 to <8 x i1>
33 %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
34 %3 = select <2 x i1> %extract2, <2 x i64> %1, <2 x i64> %x0
38 declare <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
40 define <4 x i64>@test_int_x86_avx512_vpmadd52h_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
41 ; CHECK-LABEL: test_int_x86_avx512_vpmadd52h_uq_256:
43 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0xb5,0xc2]
44 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
46 %1 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
50 define <4 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
51 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_256:
53 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
54 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
55 ; X86-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xb5,0xc2]
56 ; X86-NEXT: retl # encoding: [0xc3]
58 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_256:
60 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
61 ; X64-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xb5,0xc2]
62 ; X64-NEXT: retq # encoding: [0xc3]
64 %1 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
65 %2 = bitcast i8 %x3 to <8 x i1>
66 %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
67 %3 = select <4 x i1> %extract2, <4 x i64> %1, <4 x i64> %x0
71 define <2 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
72 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_128:
74 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
75 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
76 ; X86-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb5,0xc2]
77 ; X86-NEXT: retl # encoding: [0xc3]
79 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_128:
81 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
82 ; X64-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb5,0xc2]
83 ; X64-NEXT: retq # encoding: [0xc3]
85 %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
86 %2 = bitcast i8 %x3 to <8 x i1>
87 %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
88 %3 = select <2 x i1> %extract2, <2 x i64> %1, <2 x i64> zeroinitializer
92 define <4 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
93 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_256:
95 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
96 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
97 ; X86-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb5,0xc2]
98 ; X86-NEXT: retl # encoding: [0xc3]
100 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_256:
102 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
103 ; X64-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb5,0xc2]
104 ; X64-NEXT: retq # encoding: [0xc3]
106 %1 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
107 %2 = bitcast i8 %x3 to <8 x i1>
108 %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
109 %3 = select <4 x i1> %extract2, <4 x i64> %1, <4 x i64> zeroinitializer
113 declare <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
115 define <2 x i64>@test_int_x86_avx512_vpmadd52l_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
116 ; CHECK-LABEL: test_int_x86_avx512_vpmadd52l_uq_128:
118 ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0xb4,0xc2]
119 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
121 %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
125 define <2 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
126 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_128:
128 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
129 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
130 ; X86-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb4,0xc2]
131 ; X86-NEXT: retl # encoding: [0xc3]
133 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_128:
135 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
136 ; X64-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb4,0xc2]
137 ; X64-NEXT: retq # encoding: [0xc3]
139 %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
140 %2 = bitcast i8 %x3 to <8 x i1>
141 %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
142 %3 = select <2 x i1> %extract2, <2 x i64> %1, <2 x i64> %x0
146 declare <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
148 define <4 x i64>@test_int_x86_avx512_vpmadd52l_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
149 ; CHECK-LABEL: test_int_x86_avx512_vpmadd52l_uq_256:
151 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0xb4,0xc2]
152 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
154 %1 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
158 define <4 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
159 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_256:
161 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
162 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
163 ; X86-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xb4,0xc2]
164 ; X86-NEXT: retl # encoding: [0xc3]
166 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_256:
168 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
169 ; X64-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xb4,0xc2]
170 ; X64-NEXT: retq # encoding: [0xc3]
172 %1 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
173 %2 = bitcast i8 %x3 to <8 x i1>
174 %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
175 %3 = select <4 x i1> %extract2, <4 x i64> %1, <4 x i64> %x0
179 define <2 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
180 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_128:
182 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
183 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
184 ; X86-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb4,0xc2]
185 ; X86-NEXT: retl # encoding: [0xc3]
187 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_128:
189 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
190 ; X64-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb4,0xc2]
191 ; X64-NEXT: retq # encoding: [0xc3]
193 %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
194 %2 = bitcast i8 %x3 to <8 x i1>
195 %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
196 %3 = select <2 x i1> %extract2, <2 x i64> %1, <2 x i64> zeroinitializer
200 define <4 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
201 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_256:
203 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
204 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
205 ; X86-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb4,0xc2]
206 ; X86-NEXT: retl # encoding: [0xc3]
208 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_256:
210 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
211 ; X64-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb4,0xc2]
212 ; X64-NEXT: retq # encoding: [0xc3]
214 %1 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
215 %2 = bitcast i8 %x3 to <8 x i1>
216 %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
217 %3 = select <4 x i1> %extract2, <4 x i64> %1, <4 x i64> zeroinitializer