1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X64
3 ; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X86
5 declare <32 x bfloat> @llvm.x86.avx10.vminpbf16512(<32 x bfloat>, <32 x bfloat>)
7 define <32 x bfloat> @test_int_x86_avx10_min_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2) {
8 ; CHECK-LABEL: test_int_x86_avx10_min_nepbf16_512:
10 ; CHECK-NEXT: vminpbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x5d,0xc1]
11 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
12 %res0 = call <32 x bfloat> @llvm.x86.avx10.vminpbf16512(<32 x bfloat> %x1, <32 x bfloat> %x2)
13 ret <32 x bfloat> %res0
16 define <32 x bfloat> @test_int_x86_avx10_maskz_min_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk) {
17 ; X64-LABEL: test_int_x86_avx10_maskz_min_nepbf16_512:
19 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
20 ; X64-NEXT: vminpbf16 %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x5d,0xc1]
21 ; X64-NEXT: retq # encoding: [0xc3]
23 ; X86-LABEL: test_int_x86_avx10_maskz_min_nepbf16_512:
25 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
26 ; X86-NEXT: vminpbf16 %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x5d,0xc1]
27 ; X86-NEXT: retl # encoding: [0xc3]
28 %mask = bitcast i32 %msk to <32 x i1>
29 %res0 = call <32 x bfloat> @llvm.x86.avx10.vminpbf16512(<32 x bfloat> %x1, <32 x bfloat> %x2)
30 %res1 = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> zeroinitializer
31 ret <32 x bfloat> %res1
34 declare <32 x bfloat> @llvm.x86.avx10.vmaxpbf16512(<32 x bfloat>, <32 x bfloat>)
36 define <32 x bfloat> @test_int_x86_avx10_max_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2) {
37 ; CHECK-LABEL: test_int_x86_avx10_max_nepbf16_512:
39 ; CHECK-NEXT: vmaxpbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x5f,0xc1]
40 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
41 %res0 = call <32 x bfloat> @llvm.x86.avx10.vmaxpbf16512(<32 x bfloat> %x1, <32 x bfloat> %x2)
42 ret <32 x bfloat> %res0
45 define <32 x bfloat> @test_int_x86_avx10_maskz_max_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk) {
46 ; X64-LABEL: test_int_x86_avx10_maskz_max_nepbf16_512:
48 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
49 ; X64-NEXT: vmaxpbf16 %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x5f,0xc1]
50 ; X64-NEXT: retq # encoding: [0xc3]
52 ; X86-LABEL: test_int_x86_avx10_maskz_max_nepbf16_512:
54 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
55 ; X86-NEXT: vmaxpbf16 %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x5f,0xc1]
56 ; X86-NEXT: retl # encoding: [0xc3]
57 %mask = bitcast i32 %msk to <32 x i1>
58 %res0 = call <32 x bfloat> @llvm.x86.avx10.vmaxpbf16512(<32 x bfloat> %x1, <32 x bfloat> %x2)
59 %res1 = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> zeroinitializer
60 ret <32 x bfloat> %res1
63 declare <32 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.512(<32 x bfloat>, <32 x bfloat>, i32)
65 define <32 x bfloat> @test_rsqrt_nepbf16_512(<32 x bfloat> %a0) {
66 ; CHECK-LABEL: test_rsqrt_nepbf16_512:
68 ; CHECK-NEXT: vrsqrtpbf16 %zmm0, %zmm0 # encoding: [0x62,0xf6,0x7c,0x48,0x4e,0xc0]
69 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
70 %res = call <32 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.512(<32 x bfloat> %a0, <32 x bfloat> zeroinitializer, i32 -1)
71 ret <32 x bfloat> %res
74 declare <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat>, i32)
76 define i32 @test_int_x86_avx512_fpclass_nepbf16_512(<32 x bfloat> %x0) {
77 ; CHECK-LABEL: test_int_x86_avx512_fpclass_nepbf16_512:
79 ; CHECK-NEXT: vfpclasspbf16 $6, %zmm0, %k1 # encoding: [0x62,0xf3,0x7f,0x48,0x66,0xc8,0x06]
80 ; CHECK-NEXT: # k1 = isPositiveZero(zmm0) | isNegativeZero(zmm0)
81 ; CHECK-NEXT: vfpclasspbf16 $0, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x66,0xc0,0x00]
82 ; CHECK-NEXT: # k0 {%k1} = false
83 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
84 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
85 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
86 %res = call <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat> %x0, i32 0)
87 %res1 = call <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat> %x0, i32 6)
88 %1 = and <32 x i1> %res1, %res
89 %2 = bitcast <32 x i1> %1 to i32
93 declare <32 x bfloat> @llvm.x86.avx10.mask.rcp.nepbf16.512(<32 x bfloat>, <32 x bfloat>, i32)
95 define <32 x bfloat> @test_rcp_nepbf16_512(<32 x bfloat> %a0, <32 x bfloat> %a1, i32 %mask) {
96 ; X64-LABEL: test_rcp_nepbf16_512:
98 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
99 ; X64-NEXT: vrcppbf16 %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0x4c,0xc8]
100 ; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
101 ; X64-NEXT: retq # encoding: [0xc3]
103 ; X86-LABEL: test_rcp_nepbf16_512:
105 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
106 ; X86-NEXT: vrcppbf16 %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0x4c,0xc8]
107 ; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
108 ; X86-NEXT: retl # encoding: [0xc3]
109 %res = call <32 x bfloat> @llvm.x86.avx10.mask.rcp.nepbf16.512(<32 x bfloat> %a0, <32 x bfloat> %a1, i32 %mask)
110 ret <32 x bfloat> %res
113 declare <32 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.512(<32 x bfloat>, i32, <32 x bfloat>, i32)
115 define <32 x bfloat>@test_int_x86_avx512_mask_reduce_nepbf16_512(<32 x bfloat> %x0, <32 x bfloat> %x2, i32 %x3) {
116 ; X64-LABEL: test_int_x86_avx512_mask_reduce_nepbf16_512:
118 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
119 ; X64-NEXT: vreducenepbf16 $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x56,0xc8,0x08]
120 ; X64-NEXT: vreducenepbf16 $4, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x56,0xc0,0x04]
121 ; X64-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0]
122 ; X64-NEXT: retq # encoding: [0xc3]
124 ; X86-LABEL: test_int_x86_avx512_mask_reduce_nepbf16_512:
126 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
127 ; X86-NEXT: vreducenepbf16 $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x56,0xc8,0x08]
128 ; X86-NEXT: vreducenepbf16 $4, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x56,0xc0,0x04]
129 ; X86-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0]
130 ; X86-NEXT: retl # encoding: [0xc3]
131 %res = call <32 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.512(<32 x bfloat> %x0, i32 8, <32 x bfloat> %x2, i32 %x3)
132 %res1 = call <32 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.512(<32 x bfloat> %x0, i32 4, <32 x bfloat> %x2, i32 -1)
133 %res2 = fadd <32 x bfloat> %res, %res1
134 ret <32 x bfloat> %res2
137 declare <32 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.512(<32 x bfloat>, i32, <32 x bfloat>, i32)
139 define <32 x bfloat>@test_int_x86_avx512_mask_rndscale_nepbf16_512(<32 x bfloat> %x0, <32 x bfloat> %x2, i32 %x3) {
140 ; X64-LABEL: test_int_x86_avx512_mask_rndscale_nepbf16_512:
142 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
143 ; X64-NEXT: vrndscalenepbf16 $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x08,0xc8,0x08]
144 ; X64-NEXT: vrndscalenepbf16 $4, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x08,0xc0,0x04]
145 ; X64-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0]
146 ; X64-NEXT: retq # encoding: [0xc3]
148 ; X86-LABEL: test_int_x86_avx512_mask_rndscale_nepbf16_512:
150 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
151 ; X86-NEXT: vrndscalenepbf16 $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x08,0xc8,0x08]
152 ; X86-NEXT: vrndscalenepbf16 $4, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x08,0xc0,0x04]
153 ; X86-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0]
154 ; X86-NEXT: retl # encoding: [0xc3]
155 %res = call <32 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.512(<32 x bfloat> %x0, i32 8, <32 x bfloat> %x2, i32 %x3)
156 %res1 = call <32 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.512(<32 x bfloat> %x0, i32 4, <32 x bfloat> %x2, i32 -1)
157 %res2 = fadd <32 x bfloat> %res, %res1
158 ret <32 x bfloat> %res2
161 declare <32 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.512(<32 x bfloat>, <32 x bfloat>, i32)
163 define <32 x bfloat>@test_int_x86_avx512_mask_getexp_nepbf16_512(<32 x bfloat> %x0, <32 x bfloat> %x1, i32 %x2) {
164 ; X64-LABEL: test_int_x86_avx512_mask_getexp_nepbf16_512:
166 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
167 ; X64-NEXT: vgetexppbf16 %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x42,0xc0]
168 ; X64-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xc8]
169 ; X64-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0]
170 ; X64-NEXT: retq # encoding: [0xc3]
172 ; X86-LABEL: test_int_x86_avx512_mask_getexp_nepbf16_512:
174 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
175 ; X86-NEXT: vgetexppbf16 %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x42,0xc0]
176 ; X86-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xc8]
177 ; X86-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0]
178 ; X86-NEXT: retl # encoding: [0xc3]
179 %res1 = call <32 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.512(<32 x bfloat> %x0, <32 x bfloat> %x1, i32 %x2)
180 %res2 = call <32 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.512(<32 x bfloat> %x0, <32 x bfloat> zeroinitializer, i32 -1)
181 %res3 = fadd <32 x bfloat> %res1, %res2
182 ret <32 x bfloat> %res3
185 declare <32 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.512(<32 x bfloat>, i32, <32 x bfloat>, i32)
187 define <32 x bfloat>@test_int_x86_avx512_mask_getmant_nepbf16_512(<32 x bfloat> %x0, <32 x bfloat> %x2, i32 %x3) {
188 ; X64-LABEL: test_int_x86_avx512_mask_getmant_nepbf16_512:
190 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
191 ; X64-NEXT: vgetmantpbf16 $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x26,0xc8,0x08]
192 ; X64-NEXT: vgetmantpbf16 $4, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x26,0xc0,0x04]
193 ; X64-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0]
194 ; X64-NEXT: retq # encoding: [0xc3]
196 ; X86-LABEL: test_int_x86_avx512_mask_getmant_nepbf16_512:
198 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
199 ; X86-NEXT: vgetmantpbf16 $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x26,0xc8,0x08]
200 ; X86-NEXT: vgetmantpbf16 $4, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x26,0xc0,0x04]
201 ; X86-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0]
202 ; X86-NEXT: retl # encoding: [0xc3]
203 %res = call <32 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.512(<32 x bfloat> %x0, i32 8, <32 x bfloat> %x2, i32 %x3)
204 %res1 = call <32 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.512(<32 x bfloat> %x0, i32 4, <32 x bfloat> %x2, i32 -1)
205 %res2 = fadd <32 x bfloat> %res, %res1
206 ret <32 x bfloat> %res2
209 declare <32 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.512(<32 x bfloat>, <32 x bfloat>, <32 x bfloat>, i32)
211 define <32 x bfloat>@test_int_x86_avx512_mask_scalef_nepbf16_512(<32 x bfloat> %x0, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %x3) {
212 ; X64-LABEL: test_int_x86_avx512_mask_scalef_nepbf16_512:
214 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
215 ; X64-NEXT: vscalefpbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf6,0x7c,0x48,0x2c,0xc1]
216 ; X64-NEXT: vmovdqu16 %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xd0]
217 ; X64-NEXT: vaddnepbf16 %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf5,0x6d,0x48,0x58,0xc0]
218 ; X64-NEXT: retq # encoding: [0xc3]
220 ; X86-LABEL: test_int_x86_avx512_mask_scalef_nepbf16_512:
222 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
223 ; X86-NEXT: vscalefpbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf6,0x7c,0x48,0x2c,0xc1]
224 ; X86-NEXT: vmovdqu16 %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xd0]
225 ; X86-NEXT: vaddnepbf16 %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf5,0x6d,0x48,0x58,0xc0]
226 ; X86-NEXT: retl # encoding: [0xc3]
227 %mask = bitcast i32 %x3 to <32 x i1>
228 %res1 = call <32 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.512(<32 x bfloat> %x0, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %x3)
229 %res2 = call <32 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.512(<32 x bfloat> %x0, <32 x bfloat> %x1, <32 x bfloat> zeroinitializer, i32 -1)
230 %res3 = fadd <32 x bfloat> %res1, %res2
231 ret <32 x bfloat> %res3