1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64
3 ; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86
5 declare <16 x bfloat> @llvm.x86.avx10.vminpbf16256(<16 x bfloat>, <16 x bfloat>)
7 define <16 x bfloat> @test_int_x86_avx10_min_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) {
8 ; CHECK-LABEL: test_int_x86_avx10_min_nepbf16_256:
10 ; CHECK-NEXT: vminpbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x5d,0xc1]
11 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
12 %res0 = call <16 x bfloat> @llvm.x86.avx10.vminpbf16256(<16 x bfloat> %x1, <16 x bfloat> %x2)
13 ret <16 x bfloat> %res0
16 define <16 x bfloat> @test_int_x86_avx10_maskz_min_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk) {
17 ; X64-LABEL: test_int_x86_avx10_maskz_min_nepbf16_256:
19 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
20 ; X64-NEXT: vminpbf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x5d,0xc1]
21 ; X64-NEXT: retq # encoding: [0xc3]
23 ; X86-LABEL: test_int_x86_avx10_maskz_min_nepbf16_256:
25 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
26 ; X86-NEXT: vminpbf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x5d,0xc1]
27 ; X86-NEXT: retl # encoding: [0xc3]
28 %mask = bitcast i16 %msk to <16 x i1>
29 %res0 = call <16 x bfloat> @llvm.x86.avx10.vminpbf16256(<16 x bfloat> %x1, <16 x bfloat> %x2)
30 %res1 = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> zeroinitializer
31 ret <16 x bfloat> %res1
34 declare <8 x bfloat> @llvm.x86.avx10.vminpbf16128(<8 x bfloat>, <8 x bfloat>)
36 define <8 x bfloat> @test_int_x86_avx10_min_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2) {
37 ; CHECK-LABEL: test_int_x86_avx10_min_nepbf16_128:
39 ; CHECK-NEXT: vminpbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x5d,0xc1]
40 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
41 %res0 = call <8 x bfloat> @llvm.x86.avx10.vminpbf16128(<8 x bfloat> %x1, <8 x bfloat> %x2)
42 ret <8 x bfloat> %res0
45 define <8 x bfloat> @test_int_x86_avx10_maskz_min_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk) {
46 ; X64-LABEL: test_int_x86_avx10_maskz_min_nepbf16_128:
48 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
49 ; X64-NEXT: vminpbf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x5d,0xc1]
50 ; X64-NEXT: retq # encoding: [0xc3]
52 ; X86-LABEL: test_int_x86_avx10_maskz_min_nepbf16_128:
54 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
55 ; X86-NEXT: vminpbf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x5d,0xc1]
56 ; X86-NEXT: retl # encoding: [0xc3]
57 %mask = bitcast i8 %msk to <8 x i1>
58 %res0 = call <8 x bfloat> @llvm.x86.avx10.vminpbf16128(<8 x bfloat> %x1, <8 x bfloat> %x2)
59 %res1 = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> zeroinitializer
60 ret <8 x bfloat> %res1
63 declare <16 x bfloat> @llvm.x86.avx10.vmaxpbf16256(<16 x bfloat>, <16 x bfloat>)
65 define <16 x bfloat> @test_int_x86_avx10_max_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) {
66 ; CHECK-LABEL: test_int_x86_avx10_max_nepbf16_256:
68 ; CHECK-NEXT: vmaxpbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x5f,0xc1]
69 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
70 %res0 = call <16 x bfloat> @llvm.x86.avx10.vmaxpbf16256(<16 x bfloat> %x1, <16 x bfloat> %x2)
71 ret <16 x bfloat> %res0
74 define <16 x bfloat> @test_int_x86_avx10_maskz_max_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk) {
75 ; X64-LABEL: test_int_x86_avx10_maskz_max_nepbf16_256:
77 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
78 ; X64-NEXT: vmaxpbf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x5f,0xc1]
79 ; X64-NEXT: retq # encoding: [0xc3]
81 ; X86-LABEL: test_int_x86_avx10_maskz_max_nepbf16_256:
83 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
84 ; X86-NEXT: vmaxpbf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x5f,0xc1]
85 ; X86-NEXT: retl # encoding: [0xc3]
86 %mask = bitcast i16 %msk to <16 x i1>
87 %res0 = call <16 x bfloat> @llvm.x86.avx10.vmaxpbf16256(<16 x bfloat> %x1, <16 x bfloat> %x2)
88 %res1 = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> zeroinitializer
89 ret <16 x bfloat> %res1
92 declare <8 x bfloat> @llvm.x86.avx10.vmaxpbf16128(<8 x bfloat>, <8 x bfloat>)
94 define <8 x bfloat> @test_int_x86_avx10_max_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2) {
95 ; CHECK-LABEL: test_int_x86_avx10_max_nepbf16_128:
97 ; CHECK-NEXT: vmaxpbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x5f,0xc1]
98 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
99 %res0 = call <8 x bfloat> @llvm.x86.avx10.vmaxpbf16128(<8 x bfloat> %x1, <8 x bfloat> %x2)
100 ret <8 x bfloat> %res0
103 define <8 x bfloat> @test_int_x86_avx10_maskz_max_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk) {
104 ; X64-LABEL: test_int_x86_avx10_maskz_max_nepbf16_128:
106 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
107 ; X64-NEXT: vmaxpbf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x5f,0xc1]
108 ; X64-NEXT: retq # encoding: [0xc3]
110 ; X86-LABEL: test_int_x86_avx10_maskz_max_nepbf16_128:
112 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
113 ; X86-NEXT: vmaxpbf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x5f,0xc1]
114 ; X86-NEXT: retl # encoding: [0xc3]
115 %mask = bitcast i8 %msk to <8 x i1>
116 %res0 = call <8 x bfloat> @llvm.x86.avx10.vmaxpbf16128(<8 x bfloat> %x1, <8 x bfloat> %x2)
117 %res1 = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> zeroinitializer
118 ret <8 x bfloat> %res1
121 declare i32 @llvm.x86.avx10.vcomsbf16eq(<8 x bfloat>, <8 x bfloat>)
122 declare i32 @llvm.x86.avx10.vcomsbf16lt(<8 x bfloat>, <8 x bfloat>)
123 declare i32 @llvm.x86.avx10.vcomsbf16le(<8 x bfloat>, <8 x bfloat>)
124 declare i32 @llvm.x86.avx10.vcomsbf16gt(<8 x bfloat>, <8 x bfloat>)
125 declare i32 @llvm.x86.avx10.vcomsbf16ge(<8 x bfloat>, <8 x bfloat>)
126 declare i32 @llvm.x86.avx10.vcomsbf16neq(<8 x bfloat>, <8 x bfloat>)
128 define i32 @test_x86_avx10_com_nesbf16_eq(<8 x bfloat> %a0, <8 x bfloat> %a1) {
129 ; CHECK-LABEL: test_x86_avx10_com_nesbf16_eq:
131 ; CHECK-NEXT: vcomsbf16 %xmm1, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc1]
132 ; CHECK-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
133 ; CHECK-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
134 ; CHECK-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
135 ; CHECK-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
136 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
137 %res = call i32 @llvm.x86.avx10.vcomsbf16eq(<8 x bfloat> %a0, <8 x bfloat> %a1)
141 define i32 @test_x86_avx10_com_nesbf16_lt(<8 x bfloat> %a0, <8 x bfloat> %a1) {
142 ; CHECK-LABEL: test_x86_avx10_com_nesbf16_lt:
144 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
145 ; CHECK-NEXT: vcomsbf16 %xmm0, %xmm1 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc8]
146 ; CHECK-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
147 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
148 %res = call i32 @llvm.x86.avx10.vcomsbf16lt(<8 x bfloat> %a0, <8 x bfloat> %a1)
152 define i32 @test_x86_avx10_com_nesbf16_le(<8 x bfloat> %a0, <8 x bfloat> %a1) {
153 ; CHECK-LABEL: test_x86_avx10_com_nesbf16_le:
155 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
156 ; CHECK-NEXT: vcomsbf16 %xmm0, %xmm1 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc8]
157 ; CHECK-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
158 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
159 %res = call i32 @llvm.x86.avx10.vcomsbf16le(<8 x bfloat> %a0, <8 x bfloat> %a1)
163 define i32 @test_x86_avx10_com_nesbf16_gt(<8 x bfloat> %a0, <8 x bfloat> %a1) {
164 ; CHECK-LABEL: test_x86_avx10_com_nesbf16_gt:
166 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
167 ; CHECK-NEXT: vcomsbf16 %xmm1, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc1]
168 ; CHECK-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
169 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
170 %res = call i32 @llvm.x86.avx10.vcomsbf16ge(<8 x bfloat> %a0, <8 x bfloat> %a1)
174 define i32 @test_x86_avx10_com_nesbf16_neq(<8 x bfloat> %a0, <8 x bfloat> %a1) {
175 ; CHECK-LABEL: test_x86_avx10_com_nesbf16_neq:
177 ; CHECK-NEXT: vcomsbf16 %xmm1, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc1]
178 ; CHECK-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
179 ; CHECK-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
180 ; CHECK-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
181 ; CHECK-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
182 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
183 %res = call i32 @llvm.x86.avx10.vcomsbf16neq(<8 x bfloat> %a0, <8 x bfloat> %a1)
187 declare <8 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.128(<8 x bfloat>, <8 x bfloat>, i8)
188 declare <16 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.256(<16 x bfloat>, <16 x bfloat>, i16)
190 define <8 x bfloat> @test_rsqrt_nepbf16_128(<8 x bfloat> %a0) {
191 ; CHECK-LABEL: test_rsqrt_nepbf16_128:
193 ; CHECK-NEXT: vrsqrtpbf16 %xmm0, %xmm0 # encoding: [0x62,0xf6,0x7c,0x08,0x4e,0xc0]
194 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
195 %res = call <8 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.128(<8 x bfloat> %a0, <8 x bfloat> zeroinitializer, i8 -1)
196 ret <8 x bfloat> %res
199 define <16 x bfloat> @test_rsqrt_nepbf16_256(<16 x bfloat> %a0) {
200 ; CHECK-LABEL: test_rsqrt_nepbf16_256:
202 ; CHECK-NEXT: vrsqrtpbf16 %ymm0, %ymm0 # encoding: [0x62,0xf6,0x7c,0x28,0x4e,0xc0]
203 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
204 %res = call <16 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.256(<16 x bfloat> %a0, <16 x bfloat> zeroinitializer, i16 -1)
205 ret <16 x bfloat> %res
208 declare <8 x bfloat> @llvm.x86.avx10.mask.rcp.nepbf16.128(<8 x bfloat>, <8 x bfloat>, i8)
209 declare <16 x bfloat> @llvm.x86.avx10.mask.rcp.nepbf16.256(<16 x bfloat>, <16 x bfloat>, i16)
211 define <8 x bfloat> @test_rcp_nepbf16_128(<8 x bfloat> %a0, <8 x bfloat> %a1, i8 %mask) {
212 ; X64-LABEL: test_rcp_nepbf16_128:
214 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
215 ; X64-NEXT: vrcppbf16 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0x4c,0xc8]
216 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
217 ; X64-NEXT: retq # encoding: [0xc3]
219 ; X86-LABEL: test_rcp_nepbf16_128:
221 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
222 ; X86-NEXT: vrcppbf16 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0x4c,0xc8]
223 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
224 ; X86-NEXT: retl # encoding: [0xc3]
225 %res = call <8 x bfloat> @llvm.x86.avx10.mask.rcp.nepbf16.128(<8 x bfloat> %a0, <8 x bfloat> %a1, i8 %mask)
226 ret <8 x bfloat> %res
229 define <16 x bfloat> @test_rcp_nepbf16_256(<16 x bfloat> %a0, <16 x bfloat> %a1, i16 %mask) {
230 ; X64-LABEL: test_rcp_nepbf16_256:
232 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
233 ; X64-NEXT: vrcppbf16 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0x4c,0xc8]
234 ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
235 ; X64-NEXT: retq # encoding: [0xc3]
237 ; X86-LABEL: test_rcp_nepbf16_256:
239 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
240 ; X86-NEXT: vrcppbf16 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0x4c,0xc8]
241 ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
242 ; X86-NEXT: retl # encoding: [0xc3]
243 %res = call <16 x bfloat> @llvm.x86.avx10.mask.rcp.nepbf16.256(<16 x bfloat> %a0, <16 x bfloat> %a1, i16 %mask)
244 ret <16 x bfloat> %res
247 declare <8 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.128(<8 x bfloat>, i32, <8 x bfloat>, i8)
248 declare <16 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.256(<16 x bfloat>, i32, <16 x bfloat>, i16)
250 define <8 x bfloat>@test_int_x86_avx512_mask_reduce_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x2, i8 %x3) {
251 ; X64-LABEL: test_int_x86_avx512_mask_reduce_nepbf16_128:
253 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
254 ; X64-NEXT: vreducenepbf16 $8, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x56,0xc8,0x08]
255 ; X64-NEXT: vreducenepbf16 $4, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x56,0xc0,0x04]
256 ; X64-NEXT: vaddnepbf16 %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf5,0x75,0x08,0x58,0xc0]
257 ; X64-NEXT: retq # encoding: [0xc3]
259 ; X86-LABEL: test_int_x86_avx512_mask_reduce_nepbf16_128:
261 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
262 ; X86-NEXT: vreducenepbf16 $8, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x56,0xc8,0x08]
263 ; X86-NEXT: vreducenepbf16 $4, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x56,0xc0,0x04]
264 ; X86-NEXT: vaddnepbf16 %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf5,0x75,0x08,0x58,0xc0]
265 ; X86-NEXT: retl # encoding: [0xc3]
266 %res = call <8 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.128(<8 x bfloat> %x0, i32 8, <8 x bfloat> %x2, i8 %x3)
267 %res1 = call <8 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.128(<8 x bfloat> %x0, i32 4, <8 x bfloat> %x2, i8 -1)
268 %res2 = fadd <8 x bfloat> %res, %res1
269 ret <8 x bfloat> %res2
272 define <16 x bfloat>@test_int_x86_avx512_mask_reduce_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x2, i16 %x3) {
273 ; X64-LABEL: test_int_x86_avx512_mask_reduce_nepbf16_256:
275 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
276 ; X64-NEXT: vreducenepbf16 $8, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x56,0xc8,0x08]
277 ; X64-NEXT: vreducenepbf16 $4, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x56,0xc0,0x04]
278 ; X64-NEXT: vaddnepbf16 %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xc0]
279 ; X64-NEXT: retq # encoding: [0xc3]
281 ; X86-LABEL: test_int_x86_avx512_mask_reduce_nepbf16_256:
283 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
284 ; X86-NEXT: vreducenepbf16 $8, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x56,0xc8,0x08]
285 ; X86-NEXT: vreducenepbf16 $4, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x56,0xc0,0x04]
286 ; X86-NEXT: vaddnepbf16 %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xc0]
287 ; X86-NEXT: retl # encoding: [0xc3]
288 %res = call <16 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.256(<16 x bfloat> %x0, i32 8, <16 x bfloat> %x2, i16 %x3)
289 %res1 = call <16 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.256(<16 x bfloat> %x0, i32 4, <16 x bfloat> %x2, i16 -1)
290 %res2 = fadd <16 x bfloat> %res, %res1
291 ret <16 x bfloat> %res2
294 declare <8 x i1> @llvm.x86.avx10.fpclass.nepbf16.128(<8 x bfloat>, i32)
295 declare <16 x i1> @llvm.x86.avx10.fpclass.nepbf16.256(<16 x bfloat>, i32)
297 define i8 @test_int_x86_avx512_fpclass_nepbf16_128(<8 x bfloat> %x0) {
298 ; CHECK-LABEL: test_int_x86_avx512_fpclass_nepbf16_128:
300 ; CHECK-NEXT: vfpclasspbf16 $2, %xmm0, %k1 # encoding: [0x62,0xf3,0x7f,0x08,0x66,0xc8,0x02]
301 ; CHECK-NEXT: # k1 = isPositiveZero(xmm0)
302 ; CHECK-NEXT: vfpclasspbf16 $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x66,0xc0,0x04]
303 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
304 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
305 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
306 %res = call <8 x i1> @llvm.x86.avx10.fpclass.nepbf16.128(<8 x bfloat> %x0, i32 4)
307 %res1 = call <8 x i1> @llvm.x86.avx10.fpclass.nepbf16.128(<8 x bfloat> %x0, i32 2)
308 %1 = and <8 x i1> %res1, %res
309 %2 = bitcast <8 x i1> %1 to i8
313 define i16 @test_int_x86_avx512_fpclass_nepbf16_256(<16 x bfloat> %x0) {
314 ; CHECK-LABEL: test_int_x86_avx512_fpclass_nepbf16_256:
316 ; CHECK-NEXT: vfpclasspbf16 $2, %ymm0, %k1 # encoding: [0x62,0xf3,0x7f,0x28,0x66,0xc8,0x02]
317 ; CHECK-NEXT: # k1 = isPositiveZero(ymm0)
318 ; CHECK-NEXT: vfpclasspbf16 $4, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x66,0xc0,0x04]
319 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
320 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
321 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
322 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
323 %res = call <16 x i1> @llvm.x86.avx10.fpclass.nepbf16.256(<16 x bfloat> %x0, i32 4)
324 %res1 = call <16 x i1> @llvm.x86.avx10.fpclass.nepbf16.256(<16 x bfloat> %x0, i32 2)
325 %1 = and <16 x i1> %res1, %res
326 %2 = bitcast <16 x i1> %1 to i16
330 declare <8 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.128(<8 x bfloat>, <8 x bfloat>, i8)
331 declare <16 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.256(<16 x bfloat>, <16 x bfloat>, i16)
333 define <8 x bfloat>@test_int_x86_avx512_getexp_nepbf16_128(<8 x bfloat> %x0) {
334 ; CHECK-LABEL: test_int_x86_avx512_getexp_nepbf16_128:
336 ; CHECK-NEXT: vgetexppbf16 %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x42,0xc0]
337 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
338 %res = call <8 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.128(<8 x bfloat> %x0, <8 x bfloat> zeroinitializer, i8 -1)
339 ret <8 x bfloat> %res
342 define <8 x bfloat>@test_int_x86_avx512_mask_getexp_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x1, i8 %x2) {
343 ; X64-LABEL: test_int_x86_avx512_mask_getexp_nepbf16_128:
345 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
346 ; X64-NEXT: vgetexppbf16 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x42,0xc8]
347 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
348 ; X64-NEXT: retq # encoding: [0xc3]
350 ; X86-LABEL: test_int_x86_avx512_mask_getexp_nepbf16_128:
352 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
353 ; X86-NEXT: vgetexppbf16 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x42,0xc8]
354 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
355 ; X86-NEXT: retl # encoding: [0xc3]
356 %res = call <8 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.128(<8 x bfloat> %x0, <8 x bfloat> %x1, i8 %x2)
357 ret <8 x bfloat> %res
360 define <8 x bfloat>@test_int_x86_avx512_maskz_getexp_nepbf16_128(<8 x bfloat> %x0, i8 %x2) {
361 ; X64-LABEL: test_int_x86_avx512_maskz_getexp_nepbf16_128:
363 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
364 ; X64-NEXT: vgetexppbf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x42,0xc0]
365 ; X64-NEXT: retq # encoding: [0xc3]
367 ; X86-LABEL: test_int_x86_avx512_maskz_getexp_nepbf16_128:
369 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
370 ; X86-NEXT: vgetexppbf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x42,0xc0]
371 ; X86-NEXT: retl # encoding: [0xc3]
372 %res = call <8 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.128(<8 x bfloat> %x0, <8 x bfloat> zeroinitializer, i8 %x2)
373 ret <8 x bfloat> %res
376 define <16 x bfloat>@test_int_x86_avx512_getexp_nepbf16_256(<16 x bfloat> %x0) {
377 ; CHECK-LABEL: test_int_x86_avx512_getexp_nepbf16_256:
379 ; CHECK-NEXT: vgetexppbf16 %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x42,0xc0]
380 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
381 %res = call <16 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.256(<16 x bfloat> %x0, <16 x bfloat> zeroinitializer, i16 -1)
382 ret <16 x bfloat> %res
385 define <16 x bfloat>@test_int_x86_avx512_mask_getexp_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x1, i16 %x2) {
386 ; X64-LABEL: test_int_x86_avx512_mask_getexp_nepbf16_256:
388 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
389 ; X64-NEXT: vgetexppbf16 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x42,0xc8]
390 ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
391 ; X64-NEXT: retq # encoding: [0xc3]
393 ; X86-LABEL: test_int_x86_avx512_mask_getexp_nepbf16_256:
395 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
396 ; X86-NEXT: vgetexppbf16 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x42,0xc8]
397 ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
398 ; X86-NEXT: retl # encoding: [0xc3]
399 %res = call <16 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.256(<16 x bfloat> %x0, <16 x bfloat> %x1, i16 %x2)
400 ret <16 x bfloat> %res
403 define <16 x bfloat>@test_int_x86_avx512_maskz_getexp_nepbf16_256(<16 x bfloat> %x0, i16 %x2) {
404 ; X64-LABEL: test_int_x86_avx512_maskz_getexp_nepbf16_256:
406 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
407 ; X64-NEXT: vgetexppbf16 %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x42,0xc0]
408 ; X64-NEXT: retq # encoding: [0xc3]
410 ; X86-LABEL: test_int_x86_avx512_maskz_getexp_nepbf16_256:
412 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
413 ; X86-NEXT: vgetexppbf16 %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x42,0xc0]
414 ; X86-NEXT: retl # encoding: [0xc3]
415 %res = call <16 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.256(<16 x bfloat> %x0, <16 x bfloat> zeroinitializer, i16 %x2)
416 ret <16 x bfloat> %res
419 declare <8 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.128(<8 x bfloat>, i32, <8 x bfloat>, i8)
420 declare <16 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.256(<16 x bfloat>, i32, <16 x bfloat>, i16)
422 define <8 x bfloat>@test_int_x86_avx512_mask_getmant_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x2, i8 %x3) {
423 ; X64-LABEL: test_int_x86_avx512_mask_getmant_nepbf16_128:
425 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
426 ; X64-NEXT: vgetmantpbf16 $8, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x26,0xc8,0x08]
427 ; X64-NEXT: vgetmantpbf16 $4, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x26,0xc0,0x04]
428 ; X64-NEXT: vaddnepbf16 %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf5,0x75,0x08,0x58,0xc0]
429 ; X64-NEXT: retq # encoding: [0xc3]
431 ; X86-LABEL: test_int_x86_avx512_mask_getmant_nepbf16_128:
433 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
434 ; X86-NEXT: vgetmantpbf16 $8, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x26,0xc8,0x08]
435 ; X86-NEXT: vgetmantpbf16 $4, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x26,0xc0,0x04]
436 ; X86-NEXT: vaddnepbf16 %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf5,0x75,0x08,0x58,0xc0]
437 ; X86-NEXT: retl # encoding: [0xc3]
438 %res = call <8 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.128(<8 x bfloat> %x0, i32 8, <8 x bfloat> %x2, i8 %x3)
439 %res1 = call <8 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.128(<8 x bfloat> %x0, i32 4, <8 x bfloat> %x2, i8 -1)
440 %res2 = fadd <8 x bfloat> %res, %res1
441 ret <8 x bfloat> %res2
444 define <16 x bfloat>@test_int_x86_avx512_mask_getmant_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x2, i16 %x3) {
445 ; X64-LABEL: test_int_x86_avx512_mask_getmant_nepbf16_256:
447 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
448 ; X64-NEXT: vgetmantpbf16 $8, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x26,0xc8,0x08]
449 ; X64-NEXT: vgetmantpbf16 $4, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x26,0xc0,0x04]
450 ; X64-NEXT: vaddnepbf16 %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xc0]
451 ; X64-NEXT: retq # encoding: [0xc3]
453 ; X86-LABEL: test_int_x86_avx512_mask_getmant_nepbf16_256:
455 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
456 ; X86-NEXT: vgetmantpbf16 $8, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x26,0xc8,0x08]
457 ; X86-NEXT: vgetmantpbf16 $4, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x26,0xc0,0x04]
458 ; X86-NEXT: vaddnepbf16 %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xc0]
459 ; X86-NEXT: retl # encoding: [0xc3]
460 %res = call <16 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.256(<16 x bfloat> %x0, i32 8, <16 x bfloat> %x2, i16 %x3)
461 %res1 = call <16 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.256(<16 x bfloat> %x0, i32 4, <16 x bfloat> %x2, i16 -1)
462 %res2 = fadd <16 x bfloat> %res, %res1
463 ret <16 x bfloat> %res2
466 declare <8 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.128(<8 x bfloat>, i32, <8 x bfloat>, i8)
467 declare <16 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.256(<16 x bfloat>, i32, <16 x bfloat>, i16)
469 define <8 x bfloat>@test_int_x86_avx512_mask_rndscale_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x2, i8 %x3) {
470 ; X64-LABEL: test_int_x86_avx512_mask_rndscale_nepbf16_128:
472 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
473 ; X64-NEXT: vrndscalenepbf16 $8, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x08,0xc8,0x08]
474 ; X64-NEXT: vrndscalenepbf16 $4, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x08,0xc0,0x04]
475 ; X64-NEXT: vaddnepbf16 %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf5,0x75,0x08,0x58,0xc0]
476 ; X64-NEXT: retq # encoding: [0xc3]
478 ; X86-LABEL: test_int_x86_avx512_mask_rndscale_nepbf16_128:
480 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
481 ; X86-NEXT: vrndscalenepbf16 $8, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x08,0xc8,0x08]
482 ; X86-NEXT: vrndscalenepbf16 $4, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x08,0xc0,0x04]
483 ; X86-NEXT: vaddnepbf16 %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf5,0x75,0x08,0x58,0xc0]
484 ; X86-NEXT: retl # encoding: [0xc3]
485 %res = call <8 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.128(<8 x bfloat> %x0, i32 8, <8 x bfloat> %x2, i8 %x3)
486 %res1 = call <8 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.128(<8 x bfloat> %x0, i32 4, <8 x bfloat> %x2, i8 -1)
487 %res2 = fadd <8 x bfloat> %res, %res1
488 ret <8 x bfloat> %res2
491 define <16 x bfloat>@test_int_x86_avx512_mask_rndscale_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x2, i16 %x3) {
492 ; X64-LABEL: test_int_x86_avx512_mask_rndscale_nepbf16_256:
494 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
495 ; X64-NEXT: vrndscalenepbf16 $8, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x08,0xc8,0x08]
496 ; X64-NEXT: vrndscalenepbf16 $4, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x08,0xc0,0x04]
497 ; X64-NEXT: vaddnepbf16 %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xc0]
498 ; X64-NEXT: retq # encoding: [0xc3]
500 ; X86-LABEL: test_int_x86_avx512_mask_rndscale_nepbf16_256:
502 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
503 ; X86-NEXT: vrndscalenepbf16 $8, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x08,0xc8,0x08]
504 ; X86-NEXT: vrndscalenepbf16 $4, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x08,0xc0,0x04]
505 ; X86-NEXT: vaddnepbf16 %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xc0]
506 ; X86-NEXT: retl # encoding: [0xc3]
507 %res = call <16 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.256(<16 x bfloat> %x0, i32 8, <16 x bfloat> %x2, i16 %x3)
508 %res1 = call <16 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.256(<16 x bfloat> %x0, i32 4, <16 x bfloat> %x2, i16 -1)
509 %res2 = fadd <16 x bfloat> %res, %res1
510 ret <16 x bfloat> %res2
513 declare <8 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.128(<8 x bfloat>, <8 x bfloat>, <8 x bfloat>, i8)
514 declare <16 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.256(<16 x bfloat>, <16 x bfloat>, <16 x bfloat>, i16)
516 define <8 x bfloat>@test_int_x86_avx512_scalef_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x1) {
517 ; CHECK-LABEL: test_int_x86_avx512_scalef_nepbf16_128:
519 ; CHECK-NEXT: vscalefpbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf6,0x7c,0x08,0x2c,0xc1]
520 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
521 %res = call <8 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.128(<8 x bfloat> %x0, <8 x bfloat> %x1, <8 x bfloat> zeroinitializer, i8 -1)
522 ret <8 x bfloat> %res
525 define <8 x bfloat>@test_int_x86_avx512_mask_scalef_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %x3) {
526 ; X64-LABEL: test_int_x86_avx512_mask_scalef_nepbf16_128:
528 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
529 ; X64-NEXT: vscalefpbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0x2c,0xd1]
530 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
531 ; X64-NEXT: retq # encoding: [0xc3]
533 ; X86-LABEL: test_int_x86_avx512_mask_scalef_nepbf16_128:
535 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
536 ; X86-NEXT: vscalefpbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0x2c,0xd1]
537 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
538 ; X86-NEXT: retl # encoding: [0xc3]
539 %mask = bitcast i8 %x3 to <8 x i1>
540 %res = call <8 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.128(<8 x bfloat> %x0, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %x3)
541 ret <8 x bfloat> %res
544 define <8 x bfloat>@test_int_x86_avx512_maskz_scalef_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x1, i8 %x3) {
545 ; X64-LABEL: test_int_x86_avx512_maskz_scalef_nepbf16_128:
547 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
548 ; X64-NEXT: vscalefpbf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x7c,0x89,0x2c,0xc1]
549 ; X64-NEXT: retq # encoding: [0xc3]
551 ; X86-LABEL: test_int_x86_avx512_maskz_scalef_nepbf16_128:
553 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
554 ; X86-NEXT: vscalefpbf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x7c,0x89,0x2c,0xc1]
555 ; X86-NEXT: retl # encoding: [0xc3]
556 %mask = bitcast i8 %x3 to <8 x i1>
557 %res = call <8 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.128(<8 x bfloat> %x0, <8 x bfloat> %x1, <8 x bfloat> zeroinitializer, i8 %x3)
558 ret <8 x bfloat> %res
561 define <16 x bfloat>@test_int_x86_avx512_scalef_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x1) {
562 ; CHECK-LABEL: test_int_x86_avx512_scalef_nepbf16_256:
564 ; CHECK-NEXT: vscalefpbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf6,0x7c,0x28,0x2c,0xc1]
565 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
566 %res = call <16 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.256(<16 x bfloat> %x0, <16 x bfloat> %x1, <16 x bfloat> zeroinitializer, i16 -1)
567 ret <16 x bfloat> %res
570 define <16 x bfloat>@test_int_x86_avx512_mask_scalef_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %x3) {
571 ; X64-LABEL: test_int_x86_avx512_mask_scalef_nepbf16_256:
573 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
574 ; X64-NEXT: vscalefpbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0x2c,0xd1]
575 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
576 ; X64-NEXT: retq # encoding: [0xc3]
578 ; X86-LABEL: test_int_x86_avx512_mask_scalef_nepbf16_256:
580 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
581 ; X86-NEXT: vscalefpbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0x2c,0xd1]
582 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
583 ; X86-NEXT: retl # encoding: [0xc3]
584 %mask = bitcast i16 %x3 to <16 x i1>
585 %res = call <16 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.256(<16 x bfloat> %x0, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %x3)
586 ret <16 x bfloat> %res
589 define <16 x bfloat>@test_int_x86_avx512_maskz_scalef_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x1, i16 %x3) {
590 ; X64-LABEL: test_int_x86_avx512_maskz_scalef_nepbf16_256:
592 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
593 ; X64-NEXT: vscalefpbf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x7c,0xa9,0x2c,0xc1]
594 ; X64-NEXT: retq # encoding: [0xc3]
596 ; X86-LABEL: test_int_x86_avx512_maskz_scalef_nepbf16_256:
598 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
599 ; X86-NEXT: vscalefpbf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x7c,0xa9,0x2c,0xc1]
600 ; X86-NEXT: retl # encoding: [0xc3]
601 %mask = bitcast i16 %x3 to <16 x i1>
602 %res = call <16 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.256(<16 x bfloat> %x0, <16 x bfloat> %x1, <16 x bfloat> zeroinitializer, i16 %x3)
603 ret <16 x bfloat> %res