1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64
3 ; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86
5 define <16 x bfloat> @test_int_x86_avx10_add_bf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) {
6 ; CHECK-LABEL: test_int_x86_avx10_add_bf16_256:
8 ; CHECK-NEXT: vaddbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc1]
9 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
10 %res = fadd <16 x bfloat> %x1, %x2
11 ret <16 x bfloat> %res
14 define <16 x bfloat> @test_int_x86_avx10_mask_add_bf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) {
15 ; X64-LABEL: test_int_x86_avx10_mask_add_bf16_256:
17 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
18 ; X64-NEXT: vaddbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x58,0xc2]
19 ; X64-NEXT: retq # encoding: [0xc3]
21 ; X86-LABEL: test_int_x86_avx10_mask_add_bf16_256:
23 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
24 ; X86-NEXT: vaddbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x58,0xc2]
25 ; X86-NEXT: retl # encoding: [0xc3]
26 %mask = bitcast i16 %msk to <16 x i1>
27 %res0 = fadd <16 x bfloat> %x1, %x2
28 %res = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> %src
29 ret <16 x bfloat> %res
31 define <16 x bfloat> @test_int_x86_avx10_maskz_add_bf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) {
32 ; X64-LABEL: test_int_x86_avx10_maskz_add_bf16_256:
34 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
35 ; X64-NEXT: vaddbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x58,0xc2]
36 ; X64-NEXT: vaddbf16 (%rsi), %ymm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x58,0x0e]
37 ; X64-NEXT: vaddbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc1]
38 ; X64-NEXT: retq # encoding: [0xc3]
40 ; X86-LABEL: test_int_x86_avx10_maskz_add_bf16_256:
42 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
43 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
44 ; X86-NEXT: vaddbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x58,0xc2]
45 ; X86-NEXT: vaddbf16 (%eax), %ymm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x58,0x08]
46 ; X86-NEXT: vaddbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc1]
47 ; X86-NEXT: retl # encoding: [0xc3]
48 %mask = bitcast i16 %msk to <16 x i1>
49 %val = load <16 x bfloat>, ptr %ptr
50 %res0 = fadd <16 x bfloat> %x1, %x2
51 %res1 = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> zeroinitializer
52 %t2 = fadd <16 x bfloat> %x1, %val
53 %res2 = select <16 x i1> %mask, <16 x bfloat> %t2, <16 x bfloat> zeroinitializer
54 %res3 = fadd <16 x bfloat> %res1, %res2
55 ret <16 x bfloat> %res3
58 define <8 x bfloat> @test_int_x86_avx10_add_bf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2) {
59 ; CHECK-LABEL: test_int_x86_avx10_add_bf16_128:
61 ; CHECK-NEXT: vaddbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x58,0xc1]
62 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
63 %res = fadd <8 x bfloat> %x1, %x2
67 define <8 x bfloat> @test_int_x86_avx10_mask_add_bf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) {
68 ; X64-LABEL: test_int_x86_avx10_mask_add_bf16_128:
70 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
71 ; X64-NEXT: vaddbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x58,0xc2]
72 ; X64-NEXT: retq # encoding: [0xc3]
74 ; X86-LABEL: test_int_x86_avx10_mask_add_bf16_128:
76 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
77 ; X86-NEXT: vaddbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x58,0xc2]
78 ; X86-NEXT: retl # encoding: [0xc3]
79 %mask = bitcast i8 %msk to <8 x i1>
80 %res0 = fadd <8 x bfloat> %x1, %x2
81 %res = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> %src
85 define <8 x bfloat> @test_int_x86_avx10_maskz_add_bf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) {
86 ; X64-LABEL: test_int_x86_avx10_maskz_add_bf16_128:
88 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
89 ; X64-NEXT: vaddbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x58,0xc2]
90 ; X64-NEXT: vaddbf16 (%rsi), %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x58,0x0e]
91 ; X64-NEXT: vaddbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x58,0xc1]
92 ; X64-NEXT: retq # encoding: [0xc3]
94 ; X86-LABEL: test_int_x86_avx10_maskz_add_bf16_128:
96 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
97 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
98 ; X86-NEXT: vaddbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x58,0xc2]
99 ; X86-NEXT: vaddbf16 (%eax), %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x58,0x08]
100 ; X86-NEXT: vaddbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x58,0xc1]
101 ; X86-NEXT: retl # encoding: [0xc3]
102 %mask = bitcast i8 %msk to <8 x i1>
103 %val = load <8 x bfloat>, ptr %ptr
104 %res0 = fadd <8 x bfloat> %x1, %x2
105 %res1 = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> zeroinitializer
106 %t2 = fadd <8 x bfloat> %x1, %val
107 %res2 = select <8 x i1> %mask, <8 x bfloat> %t2, <8 x bfloat> zeroinitializer
108 %res3 = fadd <8 x bfloat> %res1, %res2
109 ret <8 x bfloat> %res3
112 define <16 x bfloat> @test_int_x86_avx10_sub_bf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) {
113 ; CHECK-LABEL: test_int_x86_avx10_sub_bf16_256:
115 ; CHECK-NEXT: vsubbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x5c,0xc1]
116 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
117 %res = fsub <16 x bfloat> %x1, %x2
118 ret <16 x bfloat> %res
121 define <16 x bfloat> @test_int_x86_avx10_mask_sub_bf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) {
122 ; X64-LABEL: test_int_x86_avx10_mask_sub_bf16_256:
124 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
125 ; X64-NEXT: vsubbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x5c,0xc2]
126 ; X64-NEXT: retq # encoding: [0xc3]
128 ; X86-LABEL: test_int_x86_avx10_mask_sub_bf16_256:
130 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
131 ; X86-NEXT: vsubbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x5c,0xc2]
132 ; X86-NEXT: retl # encoding: [0xc3]
133 %mask = bitcast i16 %msk to <16 x i1>
134 %res0 = fsub <16 x bfloat> %x1, %x2
135 %res = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> %src
136 ret <16 x bfloat> %res
139 define <16 x bfloat> @test_int_x86_avx10_maskz_sub_bf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) {
140 ; X64-LABEL: test_int_x86_avx10_maskz_sub_bf16_256:
142 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
143 ; X64-NEXT: vsubbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5c,0xc2]
144 ; X64-NEXT: vsubbf16 (%rsi), %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x5c,0x0e]
145 ; X64-NEXT: vsubbf16 %ymm1, %ymm0, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x5c,0xc1]
146 ; X64-NEXT: retq # encoding: [0xc3]
148 ; X86-LABEL: test_int_x86_avx10_maskz_sub_bf16_256:
150 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
151 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
152 ; X86-NEXT: vsubbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5c,0xc2]
153 ; X86-NEXT: vsubbf16 (%eax), %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x5c,0x08]
154 ; X86-NEXT: vsubbf16 %ymm1, %ymm0, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x5c,0xc1]
155 ; X86-NEXT: retl # encoding: [0xc3]
156 %mask = bitcast i16 %msk to <16 x i1>
157 %val = load <16 x bfloat>, ptr %ptr
158 %res0 = fsub <16 x bfloat> %x1, %x2
159 %res1 = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> zeroinitializer
160 %t2 = fsub <16 x bfloat> %x1, %val
161 %res2 = select <16 x i1> %mask, <16 x bfloat> %t2, <16 x bfloat> zeroinitializer
162 %res3 = fsub <16 x bfloat> %res1, %res2
163 ret <16 x bfloat> %res3
166 define <8 x bfloat> @test_int_x86_avx10_sub_bf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2) {
167 ; CHECK-LABEL: test_int_x86_avx10_sub_bf16_128:
169 ; CHECK-NEXT: vsubbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x5c,0xc1]
170 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
171 %res = fsub <8 x bfloat> %x1, %x2
172 ret <8 x bfloat> %res
175 define <8 x bfloat> @test_int_x86_avx10_mask_sub_bf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) {
176 ; X64-LABEL: test_int_x86_avx10_mask_sub_bf16_128:
178 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
179 ; X64-NEXT: vsubbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x5c,0xc2]
180 ; X64-NEXT: retq # encoding: [0xc3]
182 ; X86-LABEL: test_int_x86_avx10_mask_sub_bf16_128:
184 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
185 ; X86-NEXT: vsubbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x5c,0xc2]
186 ; X86-NEXT: retl # encoding: [0xc3]
187 %mask = bitcast i8 %msk to <8 x i1>
188 %res0 = fsub <8 x bfloat> %x1, %x2
189 %res = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> %src
190 ret <8 x bfloat> %res
193 define <8 x bfloat> @test_int_x86_avx10_maskz_sub_bf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) {
194 ; X64-LABEL: test_int_x86_avx10_maskz_sub_bf16_128:
196 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
197 ; X64-NEXT: vsubbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5c,0xc2]
198 ; X64-NEXT: vsubbf16 (%rsi), %xmm1, %xmm1 # encoding: [0x62,0xf5,0x75,0x08,0x5c,0x0e]
199 ; X64-NEXT: vsubbf16 %xmm1, %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x5c,0xc1]
200 ; X64-NEXT: retq # encoding: [0xc3]
202 ; X86-LABEL: test_int_x86_avx10_maskz_sub_bf16_128:
204 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
205 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
206 ; X86-NEXT: vsubbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5c,0xc2]
207 ; X86-NEXT: vsubbf16 (%eax), %xmm1, %xmm1 # encoding: [0x62,0xf5,0x75,0x08,0x5c,0x08]
208 ; X86-NEXT: vsubbf16 %xmm1, %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x5c,0xc1]
209 ; X86-NEXT: retl # encoding: [0xc3]
210 %mask = bitcast i8 %msk to <8 x i1>
211 %val = load <8 x bfloat>, ptr %ptr
212 %res0 = fsub <8 x bfloat> %x1, %x2
213 %res1 = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> zeroinitializer
214 %t2 = fsub <8 x bfloat> %x1, %val
215 %res2 = select <8 x i1> %mask, <8 x bfloat> %t2, <8 x bfloat> zeroinitializer
216 %res3 = fsub <8 x bfloat> %res1, %res2
217 ret <8 x bfloat> %res3
220 define <16 x bfloat> @test_int_x86_avx10_mul_bf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) {
221 ; CHECK-LABEL: test_int_x86_avx10_mul_bf16_256:
223 ; CHECK-NEXT: vmulbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x59,0xc1]
224 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
225 %res = fmul <16 x bfloat> %x1, %x2
226 ret <16 x bfloat> %res
229 define <16 x bfloat> @test_int_x86_avx10_mask_mul_bf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) {
230 ; X64-LABEL: test_int_x86_avx10_mask_mul_bf16_256:
232 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
233 ; X64-NEXT: vmulbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x59,0xc2]
234 ; X64-NEXT: retq # encoding: [0xc3]
236 ; X86-LABEL: test_int_x86_avx10_mask_mul_bf16_256:
238 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
239 ; X86-NEXT: vmulbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x59,0xc2]
240 ; X86-NEXT: retl # encoding: [0xc3]
241 %mask = bitcast i16 %msk to <16 x i1>
242 %res0 = fmul <16 x bfloat> %x1, %x2
243 %res = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> %src
244 ret <16 x bfloat> %res
247 define <16 x bfloat> @test_int_x86_avx10_maskz_mul_bf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) {
248 ; X64-LABEL: test_int_x86_avx10_maskz_mul_bf16_256:
250 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
251 ; X64-NEXT: vmulbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x59,0xc2]
252 ; X64-NEXT: vmulbf16 (%rsi), %ymm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x59,0x0e]
253 ; X64-NEXT: vmulbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x59,0xc1]
254 ; X64-NEXT: retq # encoding: [0xc3]
256 ; X86-LABEL: test_int_x86_avx10_maskz_mul_bf16_256:
258 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
259 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
260 ; X86-NEXT: vmulbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x59,0xc2]
261 ; X86-NEXT: vmulbf16 (%eax), %ymm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x59,0x08]
262 ; X86-NEXT: vmulbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x59,0xc1]
263 ; X86-NEXT: retl # encoding: [0xc3]
264 %mask = bitcast i16 %msk to <16 x i1>
265 %val = load <16 x bfloat>, ptr %ptr
266 %res0 = fmul <16 x bfloat> %x1, %x2
267 %res1 = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> zeroinitializer
268 %t2 = fmul <16 x bfloat> %x1, %val
269 %res2 = select <16 x i1> %mask, <16 x bfloat> %t2, <16 x bfloat> zeroinitializer
270 %res3 = fmul <16 x bfloat> %res1, %res2
271 ret <16 x bfloat> %res3
274 define <8 x bfloat> @test_int_x86_avx10_mul_bf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2) {
275 ; CHECK-LABEL: test_int_x86_avx10_mul_bf16_128:
277 ; CHECK-NEXT: vmulbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x59,0xc1]
278 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
279 %res = fmul <8 x bfloat> %x1, %x2
280 ret <8 x bfloat> %res
283 define <8 x bfloat> @test_int_x86_avx10_mask_mul_bf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) {
284 ; X64-LABEL: test_int_x86_avx10_mask_mul_bf16_128:
286 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
287 ; X64-NEXT: vmulbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x59,0xc2]
288 ; X64-NEXT: retq # encoding: [0xc3]
290 ; X86-LABEL: test_int_x86_avx10_mask_mul_bf16_128:
292 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
293 ; X86-NEXT: vmulbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x59,0xc2]
294 ; X86-NEXT: retl # encoding: [0xc3]
295 %mask = bitcast i8 %msk to <8 x i1>
296 %res0 = fmul <8 x bfloat> %x1, %x2
297 %res = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> %src
298 ret <8 x bfloat> %res
301 define <8 x bfloat> @test_int_x86_avx10_maskz_mul_bf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) {
302 ; X64-LABEL: test_int_x86_avx10_maskz_mul_bf16_128:
304 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
305 ; X64-NEXT: vmulbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x59,0xc2]
306 ; X64-NEXT: vmulbf16 (%rsi), %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x59,0x0e]
307 ; X64-NEXT: vmulbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x59,0xc1]
308 ; X64-NEXT: retq # encoding: [0xc3]
310 ; X86-LABEL: test_int_x86_avx10_maskz_mul_bf16_128:
312 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
313 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
314 ; X86-NEXT: vmulbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x59,0xc2]
315 ; X86-NEXT: vmulbf16 (%eax), %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x59,0x08]
316 ; X86-NEXT: vmulbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x59,0xc1]
317 ; X86-NEXT: retl # encoding: [0xc3]
318 %mask = bitcast i8 %msk to <8 x i1>
319 %val = load <8 x bfloat>, ptr %ptr
320 %res0 = fmul <8 x bfloat> %x1, %x2
321 %res1 = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> zeroinitializer
322 %t2 = fmul <8 x bfloat> %x1, %val
323 %res2 = select <8 x i1> %mask, <8 x bfloat> %t2, <8 x bfloat> zeroinitializer
324 %res3 = fmul <8 x bfloat> %res1, %res2
325 ret <8 x bfloat> %res3
328 define <16 x bfloat> @test_int_x86_avx10_div_bf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) {
329 ; CHECK-LABEL: test_int_x86_avx10_div_bf16_256:
331 ; CHECK-NEXT: vdivbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x5e,0xc1]
332 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
333 %res = fdiv <16 x bfloat> %x1, %x2
334 ret <16 x bfloat> %res
337 define <16 x bfloat> @test_int_x86_avx10_mask_div_bf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) {
338 ; X64-LABEL: test_int_x86_avx10_mask_div_bf16_256:
340 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
341 ; X64-NEXT: vdivbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x5e,0xc2]
342 ; X64-NEXT: retq # encoding: [0xc3]
344 ; X86-LABEL: test_int_x86_avx10_mask_div_bf16_256:
346 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
347 ; X86-NEXT: vdivbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x5e,0xc2]
348 ; X86-NEXT: retl # encoding: [0xc3]
349 %mask = bitcast i16 %msk to <16 x i1>
350 %res0 = fdiv <16 x bfloat> %x1, %x2
351 %res = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> %src
352 ret <16 x bfloat> %res
355 ; FIXME: assembly order is different from fp16 ones
356 define <16 x bfloat> @test_int_x86_avx10_maskz_div_bf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) {
357 ; X64-LABEL: test_int_x86_avx10_maskz_div_bf16_256:
359 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
360 ; X64-NEXT: vdivbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5e,0xc2]
361 ; X64-NEXT: vdivbf16 (%rsi), %ymm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5e,0x0e]
362 ; X64-NEXT: vdivbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x5e,0xc1]
363 ; X64-NEXT: retq # encoding: [0xc3]
365 ; X86-LABEL: test_int_x86_avx10_maskz_div_bf16_256:
367 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
368 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
369 ; X86-NEXT: vdivbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5e,0xc2]
370 ; X86-NEXT: vdivbf16 (%eax), %ymm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5e,0x08]
371 ; X86-NEXT: vdivbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x5e,0xc1]
372 ; X86-NEXT: retl # encoding: [0xc3]
373 %mask = bitcast i16 %msk to <16 x i1>
374 %val = load <16 x bfloat>, ptr %ptr
375 %res0 = fdiv <16 x bfloat> %x1, %x2
376 %res1 = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> zeroinitializer
377 %t2 = fdiv <16 x bfloat> %x1, %val
378 %res2 = select <16 x i1> %mask, <16 x bfloat> %t2, <16 x bfloat> zeroinitializer
379 %res3 = fdiv <16 x bfloat> %res1, %res2
380 ret <16 x bfloat> %res3
383 define <8 x bfloat> @test_int_x86_avx10_div_bf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2) {
384 ; CHECK-LABEL: test_int_x86_avx10_div_bf16_128:
386 ; CHECK-NEXT: vdivbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x5e,0xc1]
387 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
388 %res = fdiv <8 x bfloat> %x1, %x2
389 ret <8 x bfloat> %res
392 define <8 x bfloat> @test_int_x86_avx10_mask_div_bf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) {
393 ; X64-LABEL: test_int_x86_avx10_mask_div_bf16_128:
395 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
396 ; X64-NEXT: vdivbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x5e,0xc2]
397 ; X64-NEXT: retq # encoding: [0xc3]
399 ; X86-LABEL: test_int_x86_avx10_mask_div_bf16_128:
401 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
402 ; X86-NEXT: vdivbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x5e,0xc2]
403 ; X86-NEXT: retl # encoding: [0xc3]
404 %mask = bitcast i8 %msk to <8 x i1>
405 %res0 = fdiv <8 x bfloat> %x1, %x2
406 %res = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> %src
407 ret <8 x bfloat> %res
410 ; FIXME: assembly order is different from fp16 ones
411 define <8 x bfloat> @test_int_x86_avx10_maskz_div_bf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) {
412 ; X64-LABEL: test_int_x86_avx10_maskz_div_bf16_128:
414 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
415 ; X64-NEXT: vdivbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5e,0xc2]
416 ; X64-NEXT: vdivbf16 (%rsi), %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5e,0x0e]
417 ; X64-NEXT: vdivbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x5e,0xc1]
418 ; X64-NEXT: retq # encoding: [0xc3]
420 ; X86-LABEL: test_int_x86_avx10_maskz_div_bf16_128:
422 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
423 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
424 ; X86-NEXT: vdivbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5e,0xc2]
425 ; X86-NEXT: vdivbf16 (%eax), %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5e,0x08]
426 ; X86-NEXT: vdivbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x5e,0xc1]
427 ; X86-NEXT: retl # encoding: [0xc3]
428 %mask = bitcast i8 %msk to <8 x i1>
429 %val = load <8 x bfloat>, ptr %ptr
430 %res0 = fdiv <8 x bfloat> %x1, %x2
431 %res1 = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> zeroinitializer
432 %t2 = fdiv <8 x bfloat> %x1, %val
433 %res2 = select <8 x i1> %mask, <8 x bfloat> %t2, <8 x bfloat> zeroinitializer
434 %res3 = fdiv <8 x bfloat> %res1, %res2
435 ret <8 x bfloat> %res3
438 define i16 @test_int_x86_avx10_vcmpbf16256(<16 x bfloat> %x1, <16 x bfloat> %x2) {
439 ; CHECK-LABEL: test_int_x86_avx10_vcmpbf16256:
441 ; CHECK-NEXT: vcmpunordbf16 %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7f,0x28,0xc2,0xc1,0x03]
442 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
443 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
444 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
445 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
446 %1 = fcmp uno <16 x bfloat> %x1, %x2
447 %res = bitcast <16 x i1> %1 to i16
451 define i16 @test_int_x86_avx10_vcmpbf16256_mask2(<16 x bfloat> %x1, <16 x bfloat> %x2) {
452 ; CHECK-LABEL: test_int_x86_avx10_vcmpbf16256_mask2:
454 ; CHECK-NEXT: vcmpeqbf16 %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7f,0x28,0xc2,0xc1,0x00]
455 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
456 ; CHECK-NEXT: andl $3, %eax # encoding: [0x83,0xe0,0x03]
457 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
458 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
459 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
460 %1 = fcmp oeq <16 x bfloat> %x1, %x2
461 %2 = and <16 x i1> %1, <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
462 %3 = bitcast <16 x i1> %2 to i16
466 define i8 @test_int_x86_avx10_vcmpbf16128(<8 x bfloat> %x1, <8 x bfloat> %x2) {
467 ; CHECK-LABEL: test_int_x86_avx10_vcmpbf16128:
469 ; CHECK-NEXT: vcmpunordbf16 %xmm1, %xmm0, %k0 # encoding: [0x62,0xf3,0x7f,0x08,0xc2,0xc1,0x03]
470 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
471 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
472 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
473 %1 = fcmp uno <8 x bfloat> %x1, %x2
474 %res = bitcast <8 x i1> %1 to i8
478 define i8 @test_int_x86_avx10_vcmpbf16128_mask2(<8 x bfloat> %x1, <8 x bfloat> %x2) {
479 ; CHECK-LABEL: test_int_x86_avx10_vcmpbf16128_mask2:
481 ; CHECK-NEXT: vcmpeqbf16 %xmm1, %xmm0, %k0 # encoding: [0x62,0xf3,0x7f,0x08,0xc2,0xc1,0x00]
482 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
483 ; CHECK-NEXT: andb $3, %al # encoding: [0x24,0x03]
484 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
485 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
486 %1 = fcmp oeq <8 x bfloat> %x1, %x2
487 %2 = and <8 x i1> %1, <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
488 %3 = bitcast <8 x i1> %2 to i8
492 define <16 x bfloat> @test_sqrt_bf16_256(<16 x bfloat> %a0) {
493 ; CHECK-LABEL: test_sqrt_bf16_256:
495 ; CHECK-NEXT: vsqrtbf16 %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x51,0xc0]
496 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
497 %1 = tail call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> %a0)
501 define <16 x bfloat> @test_mm256_mask_sqrt_pbh(<16 x bfloat> %__W, i16 %__U, <16 x bfloat> %__A) {
502 ; X64-LABEL: test_mm256_mask_sqrt_pbh:
503 ; X64: # %bb.0: # %entry
504 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
505 ; X64-NEXT: vsqrtbf16 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x51,0xc1]
506 ; X64-NEXT: retq # encoding: [0xc3]
508 ; X86-LABEL: test_mm256_mask_sqrt_pbh:
509 ; X86: # %bb.0: # %entry
510 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
511 ; X86-NEXT: vsqrtbf16 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x51,0xc1]
512 ; X86-NEXT: retl # encoding: [0xc3]
514 %0 = tail call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> %__A)
515 %1 = bitcast i16 %__U to <16 x i1>
516 %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__W
520 define <16 x bfloat> @test_mm256_maskz_sqrt_pbh(i16 %__U, <16 x bfloat>%__A) {
521 ; X64-LABEL: test_mm256_maskz_sqrt_pbh:
522 ; X64: # %bb.0: # %entry
523 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
524 ; X64-NEXT: vsqrtbf16 %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x51,0xc0]
525 ; X64-NEXT: retq # encoding: [0xc3]
527 ; X86-LABEL: test_mm256_maskz_sqrt_pbh:
528 ; X86: # %bb.0: # %entry
529 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
530 ; X86-NEXT: vsqrtbf16 %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x51,0xc0]
531 ; X86-NEXT: retl # encoding: [0xc3]
533 %0 = tail call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> %__A)
534 %1 = bitcast i16 %__U to <16 x i1>
535 %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> zeroinitializer
539 define <8 x bfloat> @test_sqrt_bf16_128(<8 x bfloat> %a0) {
540 ; CHECK-LABEL: test_sqrt_bf16_128:
542 ; CHECK-NEXT: vsqrtbf16 %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x51,0xc0]
543 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
544 %1 = tail call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> %a0)
548 define <8 x bfloat> @test_mm_mask_sqrt_pbh(<8 x bfloat> %__W, i8 %__U, <8 x bfloat> %__A) {
549 ; X64-LABEL: test_mm_mask_sqrt_pbh:
550 ; X64: # %bb.0: # %entry
551 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
552 ; X64-NEXT: vsqrtbf16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x51,0xc1]
553 ; X64-NEXT: retq # encoding: [0xc3]
555 ; X86-LABEL: test_mm_mask_sqrt_pbh:
556 ; X86: # %bb.0: # %entry
557 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
558 ; X86-NEXT: vsqrtbf16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x51,0xc1]
559 ; X86-NEXT: retl # encoding: [0xc3]
561 %0 = tail call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> %__A)
562 %1 = bitcast i8 %__U to <8 x i1>
563 %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__W
567 define <8 x bfloat> @test_mm_maskz_sqrt_pbh(i8 %__U, <8 x bfloat>%__A) {
568 ; X64-LABEL: test_mm_maskz_sqrt_pbh:
569 ; X64: # %bb.0: # %entry
570 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
571 ; X64-NEXT: vsqrtbf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x51,0xc0]
572 ; X64-NEXT: retq # encoding: [0xc3]
574 ; X86-LABEL: test_mm_maskz_sqrt_pbh:
575 ; X86: # %bb.0: # %entry
576 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
577 ; X86-NEXT: vsqrtbf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x51,0xc0]
578 ; X86-NEXT: retl # encoding: [0xc3]
580 %0 = tail call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> %__A)
581 %1 = bitcast i8 %__U to <8 x i1>
582 %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer
586 define <16 x bfloat> @test_mm256_fmaddne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) {
587 ; CHECK-LABEL: test_mm256_fmaddne_pbh:
588 ; CHECK: # %bb.0: # %entry
589 ; CHECK-NEXT: vfmadd213bf16 %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf6,0x74,0x28,0xa8,0xc2]
590 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
592 %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C)
596 define <16 x bfloat> @test_mm256_mask_fmaddne_pbh(<16 x bfloat> %__A, i16 zeroext %__U, <16 x bfloat> %__B, <16 x bfloat> %__C) {
597 ; X64-LABEL: test_mm256_mask_fmaddne_pbh:
598 ; X64: # %bb.0: # %entry
599 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
600 ; X64-NEXT: vfmadd132bf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x98,0xc1]
601 ; X64-NEXT: retq # encoding: [0xc3]
603 ; X86-LABEL: test_mm256_mask_fmaddne_pbh:
604 ; X86: # %bb.0: # %entry
605 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
606 ; X86-NEXT: vfmadd132bf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x98,0xc1]
607 ; X86-NEXT: retl # encoding: [0xc3]
609 %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C)
610 %1 = bitcast i16 %__U to <16 x i1>
611 %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__A
615 define <16 x bfloat> @test_mm256_mask3_fmaddne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C, i16 zeroext %__U) {
616 ; X64-LABEL: test_mm256_mask3_fmaddne_pbh:
617 ; X64: # %bb.0: # %entry
618 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
619 ; X64-NEXT: vfmadd231bf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xb8,0xd1]
620 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
621 ; X64-NEXT: retq # encoding: [0xc3]
623 ; X86-LABEL: test_mm256_mask3_fmaddne_pbh:
624 ; X86: # %bb.0: # %entry
625 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
626 ; X86-NEXT: vfmadd231bf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xb8,0xd1]
627 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
628 ; X86-NEXT: retl # encoding: [0xc3]
630 %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C)
631 %1 = bitcast i16 %__U to <16 x i1>
632 %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__C
636 define <16 x bfloat> @test_mm256_maskz_fmaddne_pbh(i16 zeroext %__U, <16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) {
637 ; X64-LABEL: test_mm256_maskz_fmaddne_pbh:
638 ; X64: # %bb.0: # %entry
639 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
640 ; X64-NEXT: vfmadd213bf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xa8,0xc2]
641 ; X64-NEXT: retq # encoding: [0xc3]
643 ; X86-LABEL: test_mm256_maskz_fmaddne_pbh:
644 ; X86: # %bb.0: # %entry
645 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
646 ; X86-NEXT: vfmadd213bf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xa8,0xc2]
647 ; X86-NEXT: retl # encoding: [0xc3]
649 %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C)
650 %1 = bitcast i16 %__U to <16 x i1>
651 %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> zeroinitializer
655 define <16 x bfloat> @test_mm256_fmsubne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) {
656 ; CHECK-LABEL: test_mm256_fmsubne_pbh:
657 ; CHECK: # %bb.0: # %entry
658 ; CHECK-NEXT: vfmsub213bf16 %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf6,0x74,0x28,0xaa,0xc2]
659 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
661 %fneg.i = fneg <16 x bfloat> %__C
662 %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %fneg.i)
666 define <16 x bfloat> @test_mm256_mask_fmsubne_pbh(<16 x bfloat> %__A, i16 zeroext %__U, <16 x bfloat> %__B, <16 x bfloat> %__C) {
667 ; X64-LABEL: test_mm256_mask_fmsubne_pbh:
668 ; X64: # %bb.0: # %entry
669 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
670 ; X64-NEXT: vfmsub132bf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x9a,0xc1]
671 ; X64-NEXT: retq # encoding: [0xc3]
673 ; X86-LABEL: test_mm256_mask_fmsubne_pbh:
674 ; X86: # %bb.0: # %entry
675 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
676 ; X86-NEXT: vfmsub132bf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x9a,0xc1]
677 ; X86-NEXT: retl # encoding: [0xc3]
679 %fneg.i.i = fneg <16 x bfloat> %__C
680 %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %fneg.i.i)
681 %1 = bitcast i16 %__U to <16 x i1>
682 %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__A
686 define <16 x bfloat> @test_mm256_mask3_fmsubne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C, i16 zeroext %__U) {
687 ; X64-LABEL: test_mm256_mask3_fmsubne_pbh:
688 ; X64: # %bb.0: # %entry
689 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
690 ; X64-NEXT: vfmsub231bf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xba,0xd1]
691 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
692 ; X64-NEXT: retq # encoding: [0xc3]
694 ; X86-LABEL: test_mm256_mask3_fmsubne_pbh:
695 ; X86: # %bb.0: # %entry
696 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
697 ; X86-NEXT: vfmsub231bf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xba,0xd1]
698 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
699 ; X86-NEXT: retl # encoding: [0xc3]
701 %fneg.i.i = fneg <16 x bfloat> %__C
702 %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %fneg.i.i)
703 %1 = bitcast i16 %__U to <16 x i1>
704 %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__C
708 define <16 x bfloat> @test_mm256_maskz_fmsubne_pbh(i16 zeroext %__U, <16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) {
709 ; X64-LABEL: test_mm256_maskz_fmsubne_pbh:
710 ; X64: # %bb.0: # %entry
711 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
712 ; X64-NEXT: vfmsub213bf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xaa,0xc2]
713 ; X64-NEXT: retq # encoding: [0xc3]
715 ; X86-LABEL: test_mm256_maskz_fmsubne_pbh:
716 ; X86: # %bb.0: # %entry
717 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
718 ; X86-NEXT: vfmsub213bf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xaa,0xc2]
719 ; X86-NEXT: retl # encoding: [0xc3]
721 %fneg.i.i = fneg <16 x bfloat> %__C
722 %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %fneg.i.i)
723 %1 = bitcast i16 %__U to <16 x i1>
724 %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> zeroinitializer
728 define <16 x bfloat> @test_mm256_fnmaddne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) {
729 ; CHECK-LABEL: test_mm256_fnmaddne_pbh:
730 ; CHECK: # %bb.0: # %entry
731 ; CHECK-NEXT: vfnmadd213bf16 %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf6,0x74,0x28,0xac,0xc2]
732 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
734 %fneg.i = fneg <16 x bfloat> %__B
735 %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i, <16 x bfloat> %__C)
739 define <16 x bfloat> @test_mm256_mask_fnmaddne_pbh(<16 x bfloat> %__A, i16 zeroext %__U, <16 x bfloat> %__B, <16 x bfloat> %__C) {
740 ; X64-LABEL: test_mm256_mask_fnmaddne_pbh:
741 ; X64: # %bb.0: # %entry
742 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
743 ; X64-NEXT: vfnmadd132bf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x9c,0xc1]
744 ; X64-NEXT: retq # encoding: [0xc3]
746 ; X86-LABEL: test_mm256_mask_fnmaddne_pbh:
747 ; X86: # %bb.0: # %entry
748 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
749 ; X86-NEXT: vfnmadd132bf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x9c,0xc1]
750 ; X86-NEXT: retl # encoding: [0xc3]
752 %fneg.i.i = fneg <16 x bfloat> %__B
753 %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i.i, <16 x bfloat> %__C)
754 %1 = bitcast i16 %__U to <16 x i1>
755 %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__A
759 define <16 x bfloat> @test_mm256_mask3_fnmaddne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C, i16 zeroext %__U) {
760 ; X64-LABEL: test_mm256_mask3_fnmaddne_pbh:
761 ; X64: # %bb.0: # %entry
762 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
763 ; X64-NEXT: vfnmadd231bf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xbc,0xd1]
764 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
765 ; X64-NEXT: retq # encoding: [0xc3]
767 ; X86-LABEL: test_mm256_mask3_fnmaddne_pbh:
768 ; X86: # %bb.0: # %entry
769 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
770 ; X86-NEXT: vfnmadd231bf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xbc,0xd1]
771 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
772 ; X86-NEXT: retl # encoding: [0xc3]
774 %fneg.i.i = fneg <16 x bfloat> %__B
775 %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i.i, <16 x bfloat> %__C)
776 %1 = bitcast i16 %__U to <16 x i1>
777 %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__C
781 define <16 x bfloat> @test_mm256_maskz_fnmaddne_pbh(i16 zeroext %__U, <16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) {
782 ; X64-LABEL: test_mm256_maskz_fnmaddne_pbh:
783 ; X64: # %bb.0: # %entry
784 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
785 ; X64-NEXT: vfnmadd213bf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xac,0xc2]
786 ; X64-NEXT: retq # encoding: [0xc3]
788 ; X86-LABEL: test_mm256_maskz_fnmaddne_pbh:
789 ; X86: # %bb.0: # %entry
790 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
791 ; X86-NEXT: vfnmadd213bf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xac,0xc2]
792 ; X86-NEXT: retl # encoding: [0xc3]
794 %fneg.i.i = fneg <16 x bfloat> %__B
795 %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i.i, <16 x bfloat> %__C)
796 %1 = bitcast i16 %__U to <16 x i1>
797 %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> zeroinitializer
801 define <16 x bfloat> @test_mm256_fnmsubne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) {
802 ; CHECK-LABEL: test_mm256_fnmsubne_pbh:
803 ; CHECK: # %bb.0: # %entry
804 ; CHECK-NEXT: vfnmsub213bf16 %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf6,0x74,0x28,0xae,0xc2]
805 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
807 %fneg.i = fneg <16 x bfloat> %__B
808 %fneg1.i = fneg <16 x bfloat> %__C
809 %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i, <16 x bfloat> %fneg1.i)
813 define <16 x bfloat> @test_mm256_mask_fnmsubne_pbh(<16 x bfloat> %__A, i16 zeroext %__U, <16 x bfloat> %__B, <16 x bfloat> %__C) {
814 ; X64-LABEL: test_mm256_mask_fnmsubne_pbh:
815 ; X64: # %bb.0: # %entry
816 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
817 ; X64-NEXT: vfnmsub132bf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x9e,0xc1]
818 ; X64-NEXT: retq # encoding: [0xc3]
820 ; X86-LABEL: test_mm256_mask_fnmsubne_pbh:
821 ; X86: # %bb.0: # %entry
822 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
823 ; X86-NEXT: vfnmsub132bf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x9e,0xc1]
824 ; X86-NEXT: retl # encoding: [0xc3]
826 %fneg.i.i = fneg <16 x bfloat> %__B
827 %fneg1.i.i = fneg <16 x bfloat> %__C
828 %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i.i, <16 x bfloat> %fneg1.i.i)
829 %1 = bitcast i16 %__U to <16 x i1>
830 %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__A
834 define <16 x bfloat> @test_mm256_mask3_fnmsubne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C, i16 zeroext %__U) {
835 ; X64-LABEL: test_mm256_mask3_fnmsubne_pbh:
836 ; X64: # %bb.0: # %entry
837 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
838 ; X64-NEXT: vfnmsub231bf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xbe,0xd1]
839 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
840 ; X64-NEXT: retq # encoding: [0xc3]
842 ; X86-LABEL: test_mm256_mask3_fnmsubne_pbh:
843 ; X86: # %bb.0: # %entry
844 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
845 ; X86-NEXT: vfnmsub231bf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xbe,0xd1]
846 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
847 ; X86-NEXT: retl # encoding: [0xc3]
849 %fneg.i.i = fneg <16 x bfloat> %__B
850 %fneg1.i.i = fneg <16 x bfloat> %__C
851 %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i.i, <16 x bfloat> %fneg1.i.i)
852 %1 = bitcast i16 %__U to <16 x i1>
853 %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__C
857 define <16 x bfloat> @test_mm256_maskz_fnmsubne_pbh(i16 zeroext %__U, <16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) {
858 ; X64-LABEL: test_mm256_maskz_fnmsubne_pbh:
859 ; X64: # %bb.0: # %entry
860 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
861 ; X64-NEXT: vfnmsub213bf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xae,0xc2]
862 ; X64-NEXT: retq # encoding: [0xc3]
864 ; X86-LABEL: test_mm256_maskz_fnmsubne_pbh:
865 ; X86: # %bb.0: # %entry
866 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
867 ; X86-NEXT: vfnmsub213bf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xae,0xc2]
868 ; X86-NEXT: retl # encoding: [0xc3]
870 %fneg.i.i = fneg <16 x bfloat> %__B
871 %fneg1.i.i = fneg <16 x bfloat> %__C
872 %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i.i, <16 x bfloat> %fneg1.i.i)
873 %1 = bitcast i16 %__U to <16 x i1>
874 %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> zeroinitializer
878 define <8 x bfloat> @test_mm_fmaddne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) {
879 ; CHECK-LABEL: test_mm_fmaddne_pbh:
880 ; CHECK: # %bb.0: # %entry
881 ; CHECK-NEXT: vfmadd213bf16 %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf6,0x74,0x08,0xa8,0xc2]
882 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
884 %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C)
888 define <8 x bfloat> @test_mm_mask_fmaddne_pbh(<8 x bfloat> %__A, i8 zeroext %__U, <8 x bfloat> %__B, <8 x bfloat> %__C) {
889 ; X64-LABEL: test_mm_mask_fmaddne_pbh:
890 ; X64: # %bb.0: # %entry
891 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
892 ; X64-NEXT: vfmadd132bf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x98,0xc1]
893 ; X64-NEXT: retq # encoding: [0xc3]
895 ; X86-LABEL: test_mm_mask_fmaddne_pbh:
896 ; X86: # %bb.0: # %entry
897 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
898 ; X86-NEXT: vfmadd132bf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x98,0xc1]
899 ; X86-NEXT: retl # encoding: [0xc3]
901 %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C)
902 %1 = bitcast i8 %__U to <8 x i1>
903 %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__A
907 define <8 x bfloat> @test_mm_mask3_fmaddne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C, i8 zeroext %__U) {
908 ; X64-LABEL: test_mm_mask3_fmaddne_pbh:
909 ; X64: # %bb.0: # %entry
910 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
911 ; X64-NEXT: vfmadd231bf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xb8,0xd1]
912 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
913 ; X64-NEXT: retq # encoding: [0xc3]
915 ; X86-LABEL: test_mm_mask3_fmaddne_pbh:
916 ; X86: # %bb.0: # %entry
917 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
918 ; X86-NEXT: vfmadd231bf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xb8,0xd1]
919 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
920 ; X86-NEXT: retl # encoding: [0xc3]
922 %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C)
923 %1 = bitcast i8 %__U to <8 x i1>
924 %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__C
928 define <8 x bfloat> @test_mm_maskz_fmaddne_pbh(i8 zeroext %__U, <8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) {
929 ; X64-LABEL: test_mm_maskz_fmaddne_pbh:
930 ; X64: # %bb.0: # %entry
931 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
932 ; X64-NEXT: vfmadd213bf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xa8,0xc2]
933 ; X64-NEXT: retq # encoding: [0xc3]
935 ; X86-LABEL: test_mm_maskz_fmaddne_pbh:
936 ; X86: # %bb.0: # %entry
937 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
938 ; X86-NEXT: vfmadd213bf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xa8,0xc2]
939 ; X86-NEXT: retl # encoding: [0xc3]
941 %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C)
942 %1 = bitcast i8 %__U to <8 x i1>
943 %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer
947 define <8 x bfloat> @test_mm_fmsubne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) {
948 ; CHECK-LABEL: test_mm_fmsubne_pbh:
949 ; CHECK: # %bb.0: # %entry
950 ; CHECK-NEXT: vfmsub213bf16 %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf6,0x74,0x08,0xaa,0xc2]
951 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
953 %fneg.i = fneg <8 x bfloat> %__C
954 %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %fneg.i)
958 define <8 x bfloat> @test_mm_mask_fmsubne_pbh(<8 x bfloat> %__A, i8 zeroext %__U, <8 x bfloat> %__B, <8 x bfloat> %__C) {
959 ; X64-LABEL: test_mm_mask_fmsubne_pbh:
960 ; X64: # %bb.0: # %entry
961 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
962 ; X64-NEXT: vfmsub132bf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x9a,0xc1]
963 ; X64-NEXT: retq # encoding: [0xc3]
965 ; X86-LABEL: test_mm_mask_fmsubne_pbh:
966 ; X86: # %bb.0: # %entry
967 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
968 ; X86-NEXT: vfmsub132bf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x9a,0xc1]
969 ; X86-NEXT: retl # encoding: [0xc3]
971 %fneg.i.i = fneg <8 x bfloat> %__C
972 %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %fneg.i.i)
973 %1 = bitcast i8 %__U to <8 x i1>
974 %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__A
978 define <8 x bfloat> @test_mm_mask3_fmsubne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C, i8 zeroext %__U) {
979 ; X64-LABEL: test_mm_mask3_fmsubne_pbh:
980 ; X64: # %bb.0: # %entry
981 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
982 ; X64-NEXT: vfmsub231bf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xba,0xd1]
983 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
984 ; X64-NEXT: retq # encoding: [0xc3]
986 ; X86-LABEL: test_mm_mask3_fmsubne_pbh:
987 ; X86: # %bb.0: # %entry
988 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
989 ; X86-NEXT: vfmsub231bf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xba,0xd1]
990 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
991 ; X86-NEXT: retl # encoding: [0xc3]
993 %fneg.i.i = fneg <8 x bfloat> %__C
994 %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %fneg.i.i)
995 %1 = bitcast i8 %__U to <8 x i1>
996 %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__C
1000 define <8 x bfloat> @test_mm_maskz_fmsubne_pbh(i8 zeroext %__U, <8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) {
1001 ; X64-LABEL: test_mm_maskz_fmsubne_pbh:
1002 ; X64: # %bb.0: # %entry
1003 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1004 ; X64-NEXT: vfmsub213bf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xaa,0xc2]
1005 ; X64-NEXT: retq # encoding: [0xc3]
1007 ; X86-LABEL: test_mm_maskz_fmsubne_pbh:
1008 ; X86: # %bb.0: # %entry
1009 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1010 ; X86-NEXT: vfmsub213bf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xaa,0xc2]
1011 ; X86-NEXT: retl # encoding: [0xc3]
1013 %fneg.i.i = fneg <8 x bfloat> %__C
1014 %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %fneg.i.i)
1015 %1 = bitcast i8 %__U to <8 x i1>
1016 %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer
1020 define <8 x bfloat> @test_mm_fnmaddne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) {
1021 ; CHECK-LABEL: test_mm_fnmaddne_pbh:
1022 ; CHECK: # %bb.0: # %entry
1023 ; CHECK-NEXT: vfnmadd213bf16 %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf6,0x74,0x08,0xac,0xc2]
1024 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1026 %fneg.i = fneg <8 x bfloat> %__B
1027 %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i, <8 x bfloat> %__C)
1031 define <8 x bfloat> @test_mm_mask_fnmaddne_pbh(<8 x bfloat> %__A, i8 zeroext %__U, <8 x bfloat> %__B, <8 x bfloat> %__C) {
1032 ; X64-LABEL: test_mm_mask_fnmaddne_pbh:
1033 ; X64: # %bb.0: # %entry
1034 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1035 ; X64-NEXT: vfnmadd132bf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x9c,0xc1]
1036 ; X64-NEXT: retq # encoding: [0xc3]
1038 ; X86-LABEL: test_mm_mask_fnmaddne_pbh:
1039 ; X86: # %bb.0: # %entry
1040 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1041 ; X86-NEXT: vfnmadd132bf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x9c,0xc1]
1042 ; X86-NEXT: retl # encoding: [0xc3]
1044 %fneg.i.i = fneg <8 x bfloat> %__B
1045 %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i.i, <8 x bfloat> %__C)
1046 %1 = bitcast i8 %__U to <8 x i1>
1047 %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__A
1051 define <8 x bfloat> @test_mm_mask3_fnmaddne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C, i8 zeroext %__U) {
1052 ; X64-LABEL: test_mm_mask3_fnmaddne_pbh:
1053 ; X64: # %bb.0: # %entry
1054 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1055 ; X64-NEXT: vfnmadd231bf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xbc,0xd1]
1056 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
1057 ; X64-NEXT: retq # encoding: [0xc3]
1059 ; X86-LABEL: test_mm_mask3_fnmaddne_pbh:
1060 ; X86: # %bb.0: # %entry
1061 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1062 ; X86-NEXT: vfnmadd231bf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xbc,0xd1]
1063 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
1064 ; X86-NEXT: retl # encoding: [0xc3]
1066 %fneg.i.i = fneg <8 x bfloat> %__B
1067 %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i.i, <8 x bfloat> %__C)
1068 %1 = bitcast i8 %__U to <8 x i1>
1069 %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__C
1073 define <8 x bfloat> @test_mm_maskz_fnmaddne_pbh(i8 zeroext %__U, <8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) {
1074 ; X64-LABEL: test_mm_maskz_fnmaddne_pbh:
1075 ; X64: # %bb.0: # %entry
1076 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1077 ; X64-NEXT: vfnmadd213bf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xac,0xc2]
1078 ; X64-NEXT: retq # encoding: [0xc3]
1080 ; X86-LABEL: test_mm_maskz_fnmaddne_pbh:
1081 ; X86: # %bb.0: # %entry
1082 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1083 ; X86-NEXT: vfnmadd213bf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xac,0xc2]
1084 ; X86-NEXT: retl # encoding: [0xc3]
1086 %fneg.i.i = fneg <8 x bfloat> %__B
1087 %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i.i, <8 x bfloat> %__C)
1088 %1 = bitcast i8 %__U to <8 x i1>
1089 %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer
1093 define <8 x bfloat> @test_mm_fnmsubne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) {
1094 ; CHECK-LABEL: test_mm_fnmsubne_pbh:
1095 ; CHECK: # %bb.0: # %entry
1096 ; CHECK-NEXT: vfnmsub213bf16 %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf6,0x74,0x08,0xae,0xc2]
1097 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1099 %fneg.i = fneg <8 x bfloat> %__B
1100 %fneg1.i = fneg <8 x bfloat> %__C
1101 %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i, <8 x bfloat> %fneg1.i)
1105 define <8 x bfloat> @test_mm_mask_fnmsubne_pbh(<8 x bfloat> %__A, i8 zeroext %__U, <8 x bfloat> %__B, <8 x bfloat> %__C) {
1106 ; X64-LABEL: test_mm_mask_fnmsubne_pbh:
1107 ; X64: # %bb.0: # %entry
1108 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1109 ; X64-NEXT: vfnmsub132bf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x9e,0xc1]
1110 ; X64-NEXT: retq # encoding: [0xc3]
1112 ; X86-LABEL: test_mm_mask_fnmsubne_pbh:
1113 ; X86: # %bb.0: # %entry
1114 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1115 ; X86-NEXT: vfnmsub132bf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x9e,0xc1]
1116 ; X86-NEXT: retl # encoding: [0xc3]
1118 %fneg.i.i = fneg <8 x bfloat> %__B
1119 %fneg1.i.i = fneg <8 x bfloat> %__C
1120 %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i.i, <8 x bfloat> %fneg1.i.i)
1121 %1 = bitcast i8 %__U to <8 x i1>
1122 %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__A
1126 define <8 x bfloat> @test_mm_mask3_fnmsubne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C, i8 zeroext %__U) {
1127 ; X64-LABEL: test_mm_mask3_fnmsubne_pbh:
1128 ; X64: # %bb.0: # %entry
1129 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1130 ; X64-NEXT: vfnmsub231bf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xbe,0xd1]
1131 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
1132 ; X64-NEXT: retq # encoding: [0xc3]
1134 ; X86-LABEL: test_mm_mask3_fnmsubne_pbh:
1135 ; X86: # %bb.0: # %entry
1136 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1137 ; X86-NEXT: vfnmsub231bf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xbe,0xd1]
1138 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
1139 ; X86-NEXT: retl # encoding: [0xc3]
1141 %fneg.i.i = fneg <8 x bfloat> %__B
1142 %fneg1.i.i = fneg <8 x bfloat> %__C
1143 %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i.i, <8 x bfloat> %fneg1.i.i)
1144 %1 = bitcast i8 %__U to <8 x i1>
1145 %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__C
1149 define <8 x bfloat> @test_mm_maskz_fnmsubne_pbh(i8 zeroext %__U, <8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) {
1150 ; X64-LABEL: test_mm_maskz_fnmsubne_pbh:
1151 ; X64: # %bb.0: # %entry
1152 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1153 ; X64-NEXT: vfnmsub213bf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xae,0xc2]
1154 ; X64-NEXT: retq # encoding: [0xc3]
1156 ; X86-LABEL: test_mm_maskz_fnmsubne_pbh:
1157 ; X86: # %bb.0: # %entry
1158 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1159 ; X86-NEXT: vfnmsub213bf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xae,0xc2]
1160 ; X86-NEXT: retl # encoding: [0xc3]
1162 %fneg.i.i = fneg <8 x bfloat> %__B
1163 %fneg1.i.i = fneg <8 x bfloat> %__C
1164 %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i.i, <8 x bfloat> %fneg1.i.i)
1165 %1 = bitcast i8 %__U to <8 x i1>
1166 %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer
1170 define <32 x bfloat> @addv(<32 x bfloat> %a, <32 x bfloat> %b) nounwind {
1173 ; X64-NEXT: vaddbf16 %ymm2, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc2]
1174 ; X64-NEXT: vaddbf16 %ymm3, %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xcb]
1175 ; X64-NEXT: retq # encoding: [0xc3]
1179 ; X86-NEXT: pushl %ebp # encoding: [0x55]
1180 ; X86-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5]
1181 ; X86-NEXT: andl $-32, %esp # encoding: [0x83,0xe4,0xe0]
1182 ; X86-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20]
1183 ; X86-NEXT: vaddbf16 %ymm2, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc2]
1184 ; X86-NEXT: vaddbf16 8(%ebp), %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x58,0x8d,0x08,0x00,0x00,0x00]
1185 ; X86-NEXT: movl %ebp, %esp # encoding: [0x89,0xec]
1186 ; X86-NEXT: popl %ebp # encoding: [0x5d]
1187 ; X86-NEXT: retl # encoding: [0xc3]
1188 %add = fadd <32 x bfloat> %a, %b
1189 ret <32 x bfloat> %add