1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; FIXME: globalisel crashes on v3
3 ; RUN: llc -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7SELDAG %s
4 ; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s
5 ; RUN: llc -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8SELDAG %s
6 ; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s
7 ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9SELDAG %s
8 ; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s
9 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10SELDAG %s
10 ; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s
11 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG %s
12 ; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL %s
14 define amdgpu_kernel void @sgpr_isnan_bf16(ptr addrspace(1) %out, bfloat %x) {
15 ; GFX7CHECK-LABEL: sgpr_isnan_bf16:
17 ; GFX7CHECK-NEXT: s_load_dword s4, s[2:3], 0xb
18 ; GFX7CHECK-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
19 ; GFX7CHECK-NEXT: s_mov_b32 s3, 0xf000
20 ; GFX7CHECK-NEXT: s_mov_b32 s2, -1
21 ; GFX7CHECK-NEXT: s_waitcnt lgkmcnt(0)
22 ; GFX7CHECK-NEXT: s_and_b32 s4, s4, 0x7fff
23 ; GFX7CHECK-NEXT: s_cmpk_gt_i32 s4, 0x7f80
24 ; GFX7CHECK-NEXT: s_cselect_b64 s[4:5], -1, 0
25 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
26 ; GFX7CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0
27 ; GFX7CHECK-NEXT: s_endpgm
29 ; GFX8CHECK-LABEL: sgpr_isnan_bf16:
31 ; GFX8CHECK-NEXT: s_load_dword s4, s[2:3], 0x2c
32 ; GFX8CHECK-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
33 ; GFX8CHECK-NEXT: v_mov_b32_e32 v0, 0x7fff
34 ; GFX8CHECK-NEXT: s_movk_i32 s2, 0x7f80
35 ; GFX8CHECK-NEXT: s_waitcnt lgkmcnt(0)
36 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, s4, v0
37 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s2, v0
38 ; GFX8CHECK-NEXT: v_mov_b32_e32 v0, s0
39 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
40 ; GFX8CHECK-NEXT: v_mov_b32_e32 v1, s1
41 ; GFX8CHECK-NEXT: flat_store_dword v[0:1], v2
42 ; GFX8CHECK-NEXT: s_endpgm
44 ; GFX9CHECK-LABEL: sgpr_isnan_bf16:
46 ; GFX9CHECK-NEXT: s_load_dword s4, s[2:3], 0x2c
47 ; GFX9CHECK-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
48 ; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 0x7fff
49 ; GFX9CHECK-NEXT: s_movk_i32 s2, 0x7f80
50 ; GFX9CHECK-NEXT: v_mov_b32_e32 v0, 0
51 ; GFX9CHECK-NEXT: s_waitcnt lgkmcnt(0)
52 ; GFX9CHECK-NEXT: v_and_b32_e32 v1, s4, v1
53 ; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s2, v1
54 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
55 ; GFX9CHECK-NEXT: global_store_dword v0, v1, s[0:1]
56 ; GFX9CHECK-NEXT: s_endpgm
58 ; GFX10CHECK-LABEL: sgpr_isnan_bf16:
59 ; GFX10CHECK: ; %bb.0:
60 ; GFX10CHECK-NEXT: s_clause 0x1
61 ; GFX10CHECK-NEXT: s_load_dword s4, s[2:3], 0x2c
62 ; GFX10CHECK-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
63 ; GFX10CHECK-NEXT: v_mov_b32_e32 v1, 0
64 ; GFX10CHECK-NEXT: s_waitcnt lgkmcnt(0)
65 ; GFX10CHECK-NEXT: v_and_b32_e64 v0, 0x7fff, s4
66 ; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
67 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
68 ; GFX10CHECK-NEXT: global_store_dword v1, v0, s[0:1]
69 ; GFX10CHECK-NEXT: s_endpgm
71 ; GFX11CHECK-LABEL: sgpr_isnan_bf16:
72 ; GFX11CHECK: ; %bb.0:
73 ; GFX11CHECK-NEXT: s_clause 0x1
74 ; GFX11CHECK-NEXT: s_load_b32 s4, s[2:3], 0x2c
75 ; GFX11CHECK-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
76 ; GFX11CHECK-NEXT: v_mov_b32_e32 v1, 0
77 ; GFX11CHECK-NEXT: s_waitcnt lgkmcnt(0)
78 ; GFX11CHECK-NEXT: v_and_b32_e64 v0, 0x7fff, s4
79 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
80 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
81 ; GFX11CHECK-NEXT: global_store_b32 v1, v0, s[0:1]
82 ; GFX11CHECK-NEXT: s_nop 0
83 ; GFX11CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
84 ; GFX11CHECK-NEXT: s_endpgm
85 %result = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 3)
86 %sext = sext i1 %result to i32
87 store i32 %sext, ptr addrspace(1) %out, align 4
91 define i1 @zeromask_bf16(bfloat %x) nounwind {
92 ; GFX7CHECK-LABEL: zeromask_bf16:
94 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95 ; GFX7CHECK-NEXT: v_mov_b32_e32 v0, 0
96 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
98 ; GFX8CHECK-LABEL: zeromask_bf16:
100 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
101 ; GFX8CHECK-NEXT: v_mov_b32_e32 v0, 0
102 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
104 ; GFX9CHECK-LABEL: zeromask_bf16:
105 ; GFX9CHECK: ; %bb.0:
106 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
107 ; GFX9CHECK-NEXT: v_mov_b32_e32 v0, 0
108 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
110 ; GFX10CHECK-LABEL: zeromask_bf16:
111 ; GFX10CHECK: ; %bb.0:
112 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113 ; GFX10CHECK-NEXT: v_mov_b32_e32 v0, 0
114 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
116 ; GFX11CHECK-LABEL: zeromask_bf16:
117 ; GFX11CHECK: ; %bb.0:
118 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
119 ; GFX11CHECK-NEXT: v_mov_b32_e32 v0, 0
120 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
121 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 0)
125 ; FIXME: DAG and GlobalISel return different values for i1 true
126 define i1 @allflags_bf16(bfloat %x) nounwind {
127 ; GFX7CHECK-LABEL: allflags_bf16:
128 ; GFX7CHECK: ; %bb.0:
129 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130 ; GFX7CHECK-NEXT: v_mov_b32_e32 v0, 1
131 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
133 ; GFX8CHECK-LABEL: allflags_bf16:
134 ; GFX8CHECK: ; %bb.0:
135 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136 ; GFX8CHECK-NEXT: v_mov_b32_e32 v0, 1
137 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
139 ; GFX9CHECK-LABEL: allflags_bf16:
140 ; GFX9CHECK: ; %bb.0:
141 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142 ; GFX9CHECK-NEXT: v_mov_b32_e32 v0, 1
143 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
145 ; GFX10CHECK-LABEL: allflags_bf16:
146 ; GFX10CHECK: ; %bb.0:
147 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
148 ; GFX10CHECK-NEXT: v_mov_b32_e32 v0, 1
149 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
151 ; GFX11CHECK-LABEL: allflags_bf16:
152 ; GFX11CHECK: ; %bb.0:
153 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
154 ; GFX11CHECK-NEXT: v_mov_b32_e32 v0, 1
155 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
156 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 1023) ; 0x3ff
160 define i1 @snan_bf16(bfloat %x) nounwind {
161 ; GFX7CHECK-LABEL: snan_bf16:
162 ; GFX7CHECK: ; %bb.0:
163 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
165 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
166 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7fc0
167 ; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0
168 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
169 ; GFX7CHECK-NEXT: v_cmp_lt_i32_e64 s[4:5], s4, v0
170 ; GFX7CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
171 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
172 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
174 ; GFX8CHECK-LABEL: snan_bf16:
175 ; GFX8CHECK: ; %bb.0:
176 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
177 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
178 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7fc0
179 ; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0
180 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
181 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e64 s[4:5], s4, v0
182 ; GFX8CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
183 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
184 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
186 ; GFX9CHECK-LABEL: snan_bf16:
187 ; GFX9CHECK: ; %bb.0:
188 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
189 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
190 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7fc0
191 ; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0
192 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
193 ; GFX9CHECK-NEXT: v_cmp_lt_i16_e64 s[4:5], s4, v0
194 ; GFX9CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
195 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
196 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
198 ; GFX10CHECK-LABEL: snan_bf16:
199 ; GFX10CHECK: ; %bb.0:
200 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
201 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
202 ; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0
203 ; GFX10CHECK-NEXT: v_cmp_lt_i16_e64 s4, 0x7f80, v0
204 ; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo
205 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
206 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
208 ; GFX11CHECK-LABEL: snan_bf16:
209 ; GFX11CHECK: ; %bb.0:
210 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
211 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
212 ; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0
213 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e64 s0, 0x7f80, v0
214 ; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo
215 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
216 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
217 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 1) ; 0x001
221 define i1 @qnan_bf16(bfloat %x) nounwind {
222 ; GFX7CHECK-LABEL: qnan_bf16:
223 ; GFX7CHECK: ; %bb.0:
224 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
225 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
226 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
227 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7fbf
228 ; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0
229 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
230 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
232 ; GFX8CHECK-LABEL: qnan_bf16:
233 ; GFX8CHECK: ; %bb.0:
234 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
235 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
236 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7fbf
237 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0
238 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
239 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
241 ; GFX9CHECK-LABEL: qnan_bf16:
242 ; GFX9CHECK: ; %bb.0:
243 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
244 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
245 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7fbf
246 ; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0
247 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
248 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
250 ; GFX10CHECK-LABEL: qnan_bf16:
251 ; GFX10CHECK: ; %bb.0:
252 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
253 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
254 ; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7fbf, v0
255 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
256 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
258 ; GFX11CHECK-LABEL: qnan_bf16:
259 ; GFX11CHECK: ; %bb.0:
260 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
261 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
262 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7fbf, v0
263 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
264 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
265 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 2) ; 0x002
269 define i1 @posinf_bf16(bfloat %x) nounwind {
270 ; GFX7CHECK-LABEL: posinf_bf16:
271 ; GFX7CHECK: ; %bb.0:
272 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
274 ; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v0, 16, v0
275 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
276 ; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0
277 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
278 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
280 ; GFX8CHECK-LABEL: posinf_bf16:
281 ; GFX8CHECK: ; %bb.0:
282 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
283 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
284 ; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
285 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
286 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
288 ; GFX9CHECK-LABEL: posinf_bf16:
289 ; GFX9CHECK: ; %bb.0:
290 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
291 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
292 ; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
293 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
294 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
296 ; GFX10CHECK-LABEL: posinf_bf16:
297 ; GFX10CHECK: ; %bb.0:
298 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
299 ; GFX10CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
300 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
301 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
303 ; GFX11CHECK-LABEL: posinf_bf16:
304 ; GFX11CHECK: ; %bb.0:
305 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
306 ; GFX11CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
307 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
308 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
309 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 512) ; 0x200
313 define i1 @neginf_bf16(bfloat %x) nounwind {
314 ; GFX7CHECK-LABEL: neginf_bf16:
315 ; GFX7CHECK: ; %bb.0:
316 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
317 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
318 ; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v0, 16, v0
319 ; GFX7CHECK-NEXT: s_mov_b32 s4, 0xff80
320 ; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0
321 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
322 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
324 ; GFX8CHECK-LABEL: neginf_bf16:
325 ; GFX8CHECK: ; %bb.0:
326 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
327 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0xff80
328 ; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
329 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
330 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
332 ; GFX9CHECK-LABEL: neginf_bf16:
333 ; GFX9CHECK: ; %bb.0:
334 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0xff80
336 ; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
337 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
338 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
340 ; GFX10CHECK-LABEL: neginf_bf16:
341 ; GFX10CHECK: ; %bb.0:
342 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
343 ; GFX10CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0xff80, v0
344 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
345 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
347 ; GFX11CHECK-LABEL: neginf_bf16:
348 ; GFX11CHECK: ; %bb.0:
349 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
350 ; GFX11CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0xff80, v0
351 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
352 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
353 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 4) ; 0x004
357 define i1 @posnormal_bf16(bfloat %x) nounwind {
358 ; GFX7CHECK-LABEL: posnormal_bf16:
359 ; GFX7CHECK: ; %bb.0:
360 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
361 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
362 ; GFX7CHECK-NEXT: v_ashrrev_i32_e32 v1, 16, v0
363 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
364 ; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, 0xffffff80, v0
365 ; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
366 ; GFX7CHECK-NEXT: s_movk_i32 s6, 0x7f00
367 ; GFX7CHECK-NEXT: v_cmp_lt_i32_e64 s[4:5], -1, v1
368 ; GFX7CHECK-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0
369 ; GFX7CHECK-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
370 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
371 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
373 ; GFX8CHECK-LABEL: posnormal_bf16:
374 ; GFX8CHECK: ; %bb.0:
375 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
376 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, -1, v0
377 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
378 ; GFX8CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0
379 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f00
380 ; GFX8CHECK-NEXT: v_cmp_gt_u16_e64 s[4:5], s4, v0
381 ; GFX8CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
382 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
383 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
385 ; GFX9CHECK-LABEL: posnormal_bf16:
386 ; GFX9CHECK: ; %bb.0:
387 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
388 ; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, -1, v0
389 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
390 ; GFX9CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0
391 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f00
392 ; GFX9CHECK-NEXT: v_cmp_gt_u16_e64 s[4:5], s4, v0
393 ; GFX9CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
394 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
395 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
397 ; GFX10CHECK-LABEL: posnormal_bf16:
398 ; GFX10CHECK: ; %bb.0:
399 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
400 ; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
401 ; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, -1, v0
402 ; GFX10CHECK-NEXT: v_add_nc_u16 v1, v1, 0xff80
403 ; GFX10CHECK-NEXT: v_cmp_gt_u16_e64 s4, 0x7f00, v1
404 ; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo
405 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
406 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
408 ; GFX11CHECK-LABEL: posnormal_bf16:
409 ; GFX11CHECK: ; %bb.0:
410 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
411 ; GFX11CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
412 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, -1, v0
413 ; GFX11CHECK-NEXT: v_add_nc_u16 v1, v1, 0xff80
414 ; GFX11CHECK-NEXT: v_cmp_gt_u16_e64 s0, 0x7f00, v1
415 ; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo
416 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
417 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
418 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 256) ; 0x100
422 define i1 @negnormal_bf16(bfloat %x) nounwind {
423 ; GFX7CHECK-LABEL: negnormal_bf16:
424 ; GFX7CHECK: ; %bb.0:
425 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
426 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
427 ; GFX7CHECK-NEXT: v_ashrrev_i32_e32 v1, 16, v0
428 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
429 ; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, 0xffffff80, v0
430 ; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
431 ; GFX7CHECK-NEXT: s_movk_i32 s6, 0x7f00
432 ; GFX7CHECK-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v1
433 ; GFX7CHECK-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0
434 ; GFX7CHECK-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
435 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
436 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
438 ; GFX8CHECK-LABEL: negnormal_bf16:
439 ; GFX8CHECK: ; %bb.0:
440 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
441 ; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0
442 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
443 ; GFX8CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0
444 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f00
445 ; GFX8CHECK-NEXT: v_cmp_gt_u16_e64 s[4:5], s4, v0
446 ; GFX8CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
447 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
448 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
450 ; GFX9CHECK-LABEL: negnormal_bf16:
451 ; GFX9CHECK: ; %bb.0:
452 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
453 ; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0
454 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
455 ; GFX9CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0
456 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f00
457 ; GFX9CHECK-NEXT: v_cmp_gt_u16_e64 s[4:5], s4, v0
458 ; GFX9CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
459 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
460 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
462 ; GFX10CHECK-LABEL: negnormal_bf16:
463 ; GFX10CHECK: ; %bb.0:
464 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
465 ; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
466 ; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0
467 ; GFX10CHECK-NEXT: v_add_nc_u16 v1, v1, 0xff80
468 ; GFX10CHECK-NEXT: v_cmp_gt_u16_e64 s4, 0x7f00, v1
469 ; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo
470 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
471 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
473 ; GFX11CHECK-LABEL: negnormal_bf16:
474 ; GFX11CHECK: ; %bb.0:
475 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
476 ; GFX11CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
477 ; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0
478 ; GFX11CHECK-NEXT: v_add_nc_u16 v1, v1, 0xff80
479 ; GFX11CHECK-NEXT: v_cmp_gt_u16_e64 s0, 0x7f00, v1
480 ; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo
481 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
482 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
483 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 8) ; 0x008
487 define i1 @possubnormal_bf16(bfloat %x) nounwind {
488 ; GFX7CHECK-LABEL: possubnormal_bf16:
489 ; GFX7CHECK: ; %bb.0:
490 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
491 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
492 ; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v0, 16, v0
493 ; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, -1, v0
494 ; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
495 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f
496 ; GFX7CHECK-NEXT: v_cmp_gt_u32_e32 vcc, s4, v0
497 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
498 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
500 ; GFX8CHECK-LABEL: possubnormal_bf16:
501 ; GFX8CHECK: ; %bb.0:
502 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
503 ; GFX8CHECK-NEXT: v_add_u16_e32 v0, -1, v0
504 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f
505 ; GFX8CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s4, v0
506 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
507 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
509 ; GFX9CHECK-LABEL: possubnormal_bf16:
510 ; GFX9CHECK: ; %bb.0:
511 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
512 ; GFX9CHECK-NEXT: v_add_u16_e32 v0, -1, v0
513 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f
514 ; GFX9CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s4, v0
515 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
516 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
518 ; GFX10CHECK-LABEL: possubnormal_bf16:
519 ; GFX10CHECK: ; %bb.0:
520 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521 ; GFX10CHECK-NEXT: v_add_nc_u16 v0, v0, -1
522 ; GFX10CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f, v0
523 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
524 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
526 ; GFX11CHECK-LABEL: possubnormal_bf16:
527 ; GFX11CHECK: ; %bb.0:
528 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
529 ; GFX11CHECK-NEXT: v_add_nc_u16 v0, v0, -1
530 ; GFX11CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f, v0
531 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
532 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
533 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 128) ; 0x080
537 define i1 @negsubnormal_bf16(bfloat %x) nounwind {
538 ; GFX7CHECK-LABEL: negsubnormal_bf16:
539 ; GFX7CHECK: ; %bb.0:
540 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
541 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
542 ; GFX7CHECK-NEXT: v_ashrrev_i32_e32 v1, 16, v0
543 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
544 ; GFX7CHECK-NEXT: v_add_i32_e64 v0, s[4:5], -1, v0
545 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f
546 ; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1
547 ; GFX7CHECK-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v0
548 ; GFX7CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
549 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
550 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
552 ; GFX8CHECK-LABEL: negsubnormal_bf16:
553 ; GFX8CHECK: ; %bb.0:
554 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
555 ; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0
556 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
557 ; GFX8CHECK-NEXT: v_add_u16_e32 v0, -1, v0
558 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f
559 ; GFX8CHECK-NEXT: v_cmp_gt_u16_e64 s[4:5], s4, v0
560 ; GFX8CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
561 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
562 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
564 ; GFX9CHECK-LABEL: negsubnormal_bf16:
565 ; GFX9CHECK: ; %bb.0:
566 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
567 ; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0
568 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
569 ; GFX9CHECK-NEXT: v_add_u16_e32 v0, -1, v0
570 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f
571 ; GFX9CHECK-NEXT: v_cmp_gt_u16_e64 s[4:5], s4, v0
572 ; GFX9CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
573 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
574 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
576 ; GFX10CHECK-LABEL: negsubnormal_bf16:
577 ; GFX10CHECK: ; %bb.0:
578 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
579 ; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
580 ; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0
581 ; GFX10CHECK-NEXT: v_add_nc_u16 v1, v1, -1
582 ; GFX10CHECK-NEXT: v_cmp_gt_u16_e64 s4, 0x7f, v1
583 ; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo
584 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
585 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
587 ; GFX11CHECK-LABEL: negsubnormal_bf16:
588 ; GFX11CHECK: ; %bb.0:
589 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
590 ; GFX11CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
591 ; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0
592 ; GFX11CHECK-NEXT: v_add_nc_u16 v1, v1, -1
593 ; GFX11CHECK-NEXT: v_cmp_gt_u16_e64 s0, 0x7f, v1
594 ; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo
595 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
596 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
597 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 16) ; 0x010
601 define i1 @poszero_bf16(bfloat %x) nounwind {
602 ; GFX7CHECK-LABEL: poszero_bf16:
603 ; GFX7CHECK: ; %bb.0:
604 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
605 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
606 ; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v0, 16, v0
607 ; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
608 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
609 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
611 ; GFX8CHECK-LABEL: poszero_bf16:
612 ; GFX8CHECK: ; %bb.0:
613 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
614 ; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
615 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
616 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
618 ; GFX9CHECK-LABEL: poszero_bf16:
619 ; GFX9CHECK: ; %bb.0:
620 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
621 ; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
622 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
623 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
625 ; GFX10CHECK-LABEL: poszero_bf16:
626 ; GFX10CHECK: ; %bb.0:
627 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
628 ; GFX10CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0
629 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
630 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
632 ; GFX11CHECK-LABEL: poszero_bf16:
633 ; GFX11CHECK: ; %bb.0:
634 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
635 ; GFX11CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0
636 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
637 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
638 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 64) ; 0x040
642 define i1 @negzero_bf16(bfloat %x) nounwind {
643 ; GFX7CHECK-LABEL: negzero_bf16:
644 ; GFX7CHECK: ; %bb.0:
645 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
646 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
647 ; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v0, 16, v0
648 ; GFX7CHECK-NEXT: s_mov_b32 s4, 0x8000
649 ; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0
650 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
651 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
653 ; GFX8CHECK-LABEL: negzero_bf16:
654 ; GFX8CHECK: ; %bb.0:
655 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
656 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x8000
657 ; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
658 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
659 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
661 ; GFX9CHECK-LABEL: negzero_bf16:
662 ; GFX9CHECK: ; %bb.0:
663 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
664 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x8000
665 ; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
666 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
667 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
669 ; GFX10CHECK-LABEL: negzero_bf16:
670 ; GFX10CHECK: ; %bb.0:
671 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
672 ; GFX10CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0
673 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
674 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
676 ; GFX11CHECK-LABEL: negzero_bf16:
677 ; GFX11CHECK: ; %bb.0:
678 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
679 ; GFX11CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0
680 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
681 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
682 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 32) ; 0x020
686 define i1 @posfinite_bf16(bfloat %x) nounwind {
687 ; GFX7CHECK-LABEL: posfinite_bf16:
688 ; GFX7CHECK: ; %bb.0:
689 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
690 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
691 ; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v0, 16, v0
692 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
693 ; GFX7CHECK-NEXT: v_cmp_gt_u32_e32 vcc, s4, v0
694 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
695 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
697 ; GFX8CHECK-LABEL: posfinite_bf16:
698 ; GFX8CHECK: ; %bb.0:
699 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
700 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
701 ; GFX8CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s4, v0
702 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
703 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
705 ; GFX9CHECK-LABEL: posfinite_bf16:
706 ; GFX9CHECK: ; %bb.0:
707 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
708 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
709 ; GFX9CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s4, v0
710 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
711 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
713 ; GFX10CHECK-LABEL: posfinite_bf16:
714 ; GFX10CHECK: ; %bb.0:
715 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
716 ; GFX10CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f80, v0
717 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
718 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
720 ; GFX11CHECK-LABEL: posfinite_bf16:
721 ; GFX11CHECK: ; %bb.0:
722 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
723 ; GFX11CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f80, v0
724 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
725 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
726 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 448) ; 0x1c0
730 define i1 @negfinite_bf16(bfloat %x) nounwind {
731 ; GFX7CHECK-LABEL: negfinite_bf16:
732 ; GFX7CHECK: ; %bb.0:
733 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
734 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
735 ; GFX7CHECK-NEXT: v_ashrrev_i32_e32 v1, 16, v0
736 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
737 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
738 ; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1
739 ; GFX7CHECK-NEXT: v_cmp_gt_i32_e64 s[4:5], s4, v0
740 ; GFX7CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
741 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
742 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
744 ; GFX8CHECK-LABEL: negfinite_bf16:
745 ; GFX8CHECK: ; %bb.0:
746 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
747 ; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0
748 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
749 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
750 ; GFX8CHECK-NEXT: v_cmp_gt_i16_e64 s[4:5], s4, v0
751 ; GFX8CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
752 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
753 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
755 ; GFX9CHECK-LABEL: negfinite_bf16:
756 ; GFX9CHECK: ; %bb.0:
757 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
758 ; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0
759 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
760 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
761 ; GFX9CHECK-NEXT: v_cmp_gt_i16_e64 s[4:5], s4, v0
762 ; GFX9CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
763 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
764 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
766 ; GFX10CHECK-LABEL: negfinite_bf16:
767 ; GFX10CHECK: ; %bb.0:
768 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
769 ; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
770 ; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0
771 ; GFX10CHECK-NEXT: v_cmp_gt_i16_e64 s4, 0x7f80, v1
772 ; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo
773 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
774 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
776 ; GFX11CHECK-LABEL: negfinite_bf16:
777 ; GFX11CHECK: ; %bb.0:
778 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
779 ; GFX11CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
780 ; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0
781 ; GFX11CHECK-NEXT: v_cmp_gt_i16_e64 s0, 0x7f80, v1
782 ; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo
783 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
784 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
785 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 56) ; 0x038
789 define i1 @isnan_bf16(bfloat %x) nounwind {
790 ; GFX7CHECK-LABEL: isnan_bf16:
791 ; GFX7CHECK: ; %bb.0:
792 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
793 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
794 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
795 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
796 ; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0
797 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
798 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
800 ; GFX8CHECK-LABEL: isnan_bf16:
801 ; GFX8CHECK: ; %bb.0:
802 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
803 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
804 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
805 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0
806 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
807 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
809 ; GFX9CHECK-LABEL: isnan_bf16:
810 ; GFX9CHECK: ; %bb.0:
811 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
812 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
813 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
814 ; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0
815 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
816 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
818 ; GFX10CHECK-LABEL: isnan_bf16:
819 ; GFX10CHECK: ; %bb.0:
820 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
821 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
822 ; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
823 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
824 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
826 ; GFX11CHECK-LABEL: isnan_bf16:
827 ; GFX11CHECK: ; %bb.0:
828 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
829 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
830 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
831 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
832 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
833 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 3) ; nan
837 define i1 @not_isnan_bf16(bfloat %x) {
838 ; GFX7CHECK-LABEL: not_isnan_bf16:
839 ; GFX7CHECK: ; %bb.0:
840 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
841 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
842 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
843 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f81
844 ; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0
845 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
846 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
848 ; GFX8CHECK-LABEL: not_isnan_bf16:
849 ; GFX8CHECK: ; %bb.0:
850 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
851 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
852 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f81
853 ; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0
854 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
855 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
857 ; GFX9CHECK-LABEL: not_isnan_bf16:
858 ; GFX9CHECK: ; %bb.0:
859 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
860 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
861 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f81
862 ; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0
863 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
864 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
866 ; GFX10CHECK-LABEL: not_isnan_bf16:
867 ; GFX10CHECK: ; %bb.0:
868 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
869 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
870 ; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0
871 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
872 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
874 ; GFX11CHECK-LABEL: not_isnan_bf16:
875 ; GFX11CHECK: ; %bb.0:
876 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
877 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
878 ; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0
879 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
880 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
881 %class = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 1020) ; ~nan
885 define <2 x i1> @isnan_v2bf16(<2 x bfloat> %x) nounwind {
886 ; GFX7CHECK-LABEL: isnan_v2bf16:
887 ; GFX7CHECK: ; %bb.0:
888 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
889 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
890 ; GFX7CHECK-NEXT: v_mul_f32_e32 v1, 1.0, v1
891 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
892 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
893 ; GFX7CHECK-NEXT: v_bfe_u32 v1, v1, 16, 15
894 ; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0
895 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
896 ; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v1
897 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
898 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
900 ; GFX8CHECK-LABEL: isnan_v2bf16:
901 ; GFX8CHECK: ; %bb.0:
902 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
903 ; GFX8CHECK-NEXT: v_bfe_u32 v1, v0, 16, 15
904 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
905 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
906 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0
907 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
908 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v1
909 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
910 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
912 ; GFX9CHECK-LABEL: isnan_v2bf16:
913 ; GFX9CHECK: ; %bb.0:
914 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
915 ; GFX9CHECK-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
916 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
917 ; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v1
918 ; GFX9CHECK-NEXT: v_cmp_gt_i16_sdwa s[4:5], v1, s4 src0_sel:WORD_1 src1_sel:DWORD
919 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
920 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
921 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
923 ; GFX10CHECK-LABEL: isnan_v2bf16:
924 ; GFX10CHECK: ; %bb.0:
925 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
926 ; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
927 ; GFX10CHECK-NEXT: v_mov_b32_e32 v2, 0x7f80
928 ; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v1
929 ; GFX10CHECK-NEXT: v_cmp_gt_i16_sdwa s4, v1, v2 src0_sel:WORD_1 src1_sel:DWORD
930 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
931 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4
932 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
934 ; GFX11CHECK-LABEL: isnan_v2bf16:
935 ; GFX11CHECK: ; %bb.0:
936 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
937 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
938 ; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v1, 16, v0
939 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
940 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
941 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v1
942 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
943 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
944 %1 = call <2 x i1> @llvm.is.fpclass.v2bf16(<2 x bfloat> %x, i32 3) ; nan
948 define <3 x i1> @isnan_v3bf16(<3 x bfloat> %x) nounwind {
949 ; GFX7CHECK-LABEL: isnan_v3bf16:
950 ; GFX7CHECK: ; %bb.0:
951 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
952 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
953 ; GFX7CHECK-NEXT: v_mul_f32_e32 v1, 1.0, v1
954 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
955 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
956 ; GFX7CHECK-NEXT: v_mul_f32_e32 v2, 1.0, v2
957 ; GFX7CHECK-NEXT: v_bfe_u32 v1, v1, 16, 15
958 ; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0
959 ; GFX7CHECK-NEXT: v_bfe_u32 v2, v2, 16, 15
960 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
961 ; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v1
962 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
963 ; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v2
964 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
965 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
967 ; GFX8CHECK-LABEL: isnan_v3bf16:
968 ; GFX8CHECK: ; %bb.0:
969 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
970 ; GFX8CHECK-NEXT: v_and_b32_e32 v2, 0x7fff, v1
971 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
972 ; GFX8CHECK-NEXT: v_bfe_u32 v1, v0, 16, 15
973 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
974 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0
975 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
976 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v1
977 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
978 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v2
979 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
980 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
982 ; GFX9CHECK-LABEL: isnan_v3bf16:
983 ; GFX9CHECK: ; %bb.0:
984 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
985 ; GFX9CHECK-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v0
986 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
987 ; GFX9CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v1
988 ; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v3
989 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
990 ; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v1
991 ; GFX9CHECK-NEXT: v_cmp_gt_i16_sdwa s[4:5], v3, s4 src0_sel:WORD_1 src1_sel:DWORD
992 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
993 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
994 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
996 ; GFX10CHECK-LABEL: isnan_v3bf16:
997 ; GFX10CHECK: ; %bb.0:
998 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
999 ; GFX10CHECK-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v0
1000 ; GFX10CHECK-NEXT: v_mov_b32_e32 v3, 0x7f80
1001 ; GFX10CHECK-NEXT: v_and_b32_e32 v4, 0x7fff, v1
1002 ; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v2
1003 ; GFX10CHECK-NEXT: v_cmp_gt_i16_sdwa s4, v2, v3 src0_sel:WORD_1 src1_sel:DWORD
1004 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1005 ; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v4
1006 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4
1007 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
1008 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
1010 ; GFX11CHECK-LABEL: isnan_v3bf16:
1011 ; GFX11CHECK: ; %bb.0:
1012 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1013 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
1014 ; GFX11CHECK-NEXT: v_and_b32_e32 v3, 0x7fff, v1
1015 ; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v2, 16, v0
1016 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
1017 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1018 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v2
1019 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
1020 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v3
1021 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
1022 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
1023 %1 = call <3 x i1> @llvm.is.fpclass.v3bf16(<3 x bfloat> %x, i32 3) ; nan
1027 define <4 x i1> @isnan_v4bf16(<4 x bfloat> %x) nounwind {
1028 ; GFX7CHECK-LABEL: isnan_v4bf16:
1029 ; GFX7CHECK: ; %bb.0:
1030 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1031 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
1032 ; GFX7CHECK-NEXT: v_mul_f32_e32 v1, 1.0, v1
1033 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
1034 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
1035 ; GFX7CHECK-NEXT: v_mul_f32_e32 v2, 1.0, v2
1036 ; GFX7CHECK-NEXT: v_bfe_u32 v1, v1, 16, 15
1037 ; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0
1038 ; GFX7CHECK-NEXT: v_mul_f32_e32 v3, 1.0, v3
1039 ; GFX7CHECK-NEXT: v_bfe_u32 v2, v2, 16, 15
1040 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1041 ; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v1
1042 ; GFX7CHECK-NEXT: v_bfe_u32 v3, v3, 16, 15
1043 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
1044 ; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v2
1045 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
1046 ; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v3
1047 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
1048 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
1050 ; GFX8CHECK-LABEL: isnan_v4bf16:
1051 ; GFX8CHECK: ; %bb.0:
1052 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1053 ; GFX8CHECK-NEXT: v_bfe_u32 v3, v1, 16, 15
1054 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
1055 ; GFX8CHECK-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v1
1056 ; GFX8CHECK-NEXT: v_bfe_u32 v1, v0, 16, 15
1057 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
1058 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0
1059 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1060 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v1
1061 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
1062 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v2
1063 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
1064 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v3
1065 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
1066 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
1068 ; GFX9CHECK-LABEL: isnan_v4bf16:
1069 ; GFX9CHECK: ; %bb.0:
1070 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1071 ; GFX9CHECK-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v1
1072 ; GFX9CHECK-NEXT: s_movk_i32 s6, 0x7f80
1073 ; GFX9CHECK-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
1074 ; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s6, v1
1075 ; GFX9CHECK-NEXT: v_cmp_gt_i16_sdwa s[4:5], v1, s6 src0_sel:WORD_1 src1_sel:DWORD
1076 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1077 ; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s6, v3
1078 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
1079 ; GFX9CHECK-NEXT: v_cmp_gt_i16_sdwa s[4:5], v3, s6 src0_sel:WORD_1 src1_sel:DWORD
1080 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
1081 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5]
1082 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
1084 ; GFX10CHECK-LABEL: isnan_v4bf16:
1085 ; GFX10CHECK: ; %bb.0:
1086 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1087 ; GFX10CHECK-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v0
1088 ; GFX10CHECK-NEXT: v_mov_b32_e32 v5, 0x7f80
1089 ; GFX10CHECK-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v1
1090 ; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v3
1091 ; GFX10CHECK-NEXT: v_cmp_gt_i16_sdwa s4, v3, v5 src0_sel:WORD_1 src1_sel:DWORD
1092 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1093 ; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v4
1094 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4
1095 ; GFX10CHECK-NEXT: v_cmp_gt_i16_sdwa s4, v4, v5 src0_sel:WORD_1 src1_sel:DWORD
1096 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
1097 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4
1098 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
1100 ; GFX11CHECK-LABEL: isnan_v4bf16:
1101 ; GFX11CHECK: ; %bb.0:
1102 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1103 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
1104 ; GFX11CHECK-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1
1105 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
1106 ; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v4, 16, v0
1107 ; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v3, 16, v1
1108 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1109 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v1
1110 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
1111 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v4
1112 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
1113 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v3
1114 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
1115 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
1116 %1 = call <4 x i1> @llvm.is.fpclass.v4bf16(<4 x bfloat> %x, i32 3) ; nan
1120 ; FIXME: Broken for gfx6/7
1121 ; define i1 @isnan_bf16_strictfp(bfloat %x) strictfp nounwind {
1122 ; %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 3) strictfp ; nan
1126 define i1 @isinf_bf16(bfloat %x) nounwind {
1127 ; GFX7CHECK-LABEL: isinf_bf16:
1128 ; GFX7CHECK: ; %bb.0:
1129 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1130 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
1131 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
1132 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
1133 ; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0
1134 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1135 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
1137 ; GFX8CHECK-LABEL: isinf_bf16:
1138 ; GFX8CHECK: ; %bb.0:
1139 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1140 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1141 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
1142 ; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
1143 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1144 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
1146 ; GFX9CHECK-LABEL: isinf_bf16:
1147 ; GFX9CHECK: ; %bb.0:
1148 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1149 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1150 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
1151 ; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
1152 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1153 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
1155 ; GFX10CHECK-LABEL: isinf_bf16:
1156 ; GFX10CHECK: ; %bb.0:
1157 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1158 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1159 ; GFX10CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
1160 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1161 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
1163 ; GFX11CHECK-LABEL: isinf_bf16:
1164 ; GFX11CHECK: ; %bb.0:
1165 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1166 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1167 ; GFX11CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
1168 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1169 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
1170 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 516) ; 0x204 = "inf"
1174 define i1 @isfinite_bf16(bfloat %x) nounwind {
1175 ; GFX7CHECK-LABEL: isfinite_bf16:
1176 ; GFX7CHECK: ; %bb.0:
1177 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1178 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
1179 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
1180 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
1181 ; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0
1182 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1183 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
1185 ; GFX8CHECK-LABEL: isfinite_bf16:
1186 ; GFX8CHECK: ; %bb.0:
1187 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1188 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1189 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
1190 ; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0
1191 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1192 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
1194 ; GFX9CHECK-LABEL: isfinite_bf16:
1195 ; GFX9CHECK: ; %bb.0:
1196 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1197 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1198 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
1199 ; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0
1200 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1201 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
1203 ; GFX10CHECK-LABEL: isfinite_bf16:
1204 ; GFX10CHECK: ; %bb.0:
1205 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1206 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1207 ; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7f80, v0
1208 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1209 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
1211 ; GFX11CHECK-LABEL: isfinite_bf16:
1212 ; GFX11CHECK: ; %bb.0:
1213 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1214 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1215 ; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7f80, v0
1216 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1217 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
1218 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 504) ; 0x1f8 = "finite"
1222 define i1 @issubnormal_or_zero_bf16(bfloat %x) {
1223 ; GFX7CHECK-LABEL: issubnormal_or_zero_bf16:
1224 ; GFX7CHECK: ; %bb.0: ; %entry
1225 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1226 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
1227 ; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1228 ; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0x7f80, v0
1229 ; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1230 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1231 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
1233 ; GFX8CHECK-LABEL: issubnormal_or_zero_bf16:
1234 ; GFX8CHECK: ; %bb.0: ; %entry
1235 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1236 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7f80, v0
1237 ; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
1238 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1239 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
1241 ; GFX9CHECK-LABEL: issubnormal_or_zero_bf16:
1242 ; GFX9CHECK: ; %bb.0: ; %entry
1243 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1244 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7f80, v0
1245 ; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
1246 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1247 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
1249 ; GFX10CHECK-LABEL: issubnormal_or_zero_bf16:
1250 ; GFX10CHECK: ; %bb.0: ; %entry
1251 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1252 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7f80, v0
1253 ; GFX10CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0
1254 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1255 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
1257 ; GFX11CHECK-LABEL: issubnormal_or_zero_bf16:
1258 ; GFX11CHECK: ; %bb.0: ; %entry
1259 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1260 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7f80, v0
1261 ; GFX11CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0
1262 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1263 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
1265 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 240) ; 0xf0 = "subnormal|zero"
1269 define i1 @not_issubnormal_or_zero_bf16(bfloat %x) {
1270 ; GFX7CHECK-LABEL: not_issubnormal_or_zero_bf16:
1271 ; GFX7CHECK: ; %bb.0: ; %entry
1272 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1273 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
1274 ; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1275 ; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0x7f80, v0
1276 ; GFX7CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
1277 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1278 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
1280 ; GFX8CHECK-LABEL: not_issubnormal_or_zero_bf16:
1281 ; GFX8CHECK: ; %bb.0: ; %entry
1282 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1283 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7f80, v0
1284 ; GFX8CHECK-NEXT: v_cmp_ne_u16_e32 vcc, 0, v0
1285 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1286 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
1288 ; GFX9CHECK-LABEL: not_issubnormal_or_zero_bf16:
1289 ; GFX9CHECK: ; %bb.0: ; %entry
1290 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1291 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7f80, v0
1292 ; GFX9CHECK-NEXT: v_cmp_ne_u16_e32 vcc, 0, v0
1293 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1294 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
1296 ; GFX10CHECK-LABEL: not_issubnormal_or_zero_bf16:
1297 ; GFX10CHECK: ; %bb.0: ; %entry
1298 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1299 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7f80, v0
1300 ; GFX10CHECK-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0, v0
1301 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1302 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
1304 ; GFX11CHECK-LABEL: not_issubnormal_or_zero_bf16:
1305 ; GFX11CHECK: ; %bb.0: ; %entry
1306 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1307 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7f80, v0
1308 ; GFX11CHECK-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0, v0
1309 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1310 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
1312 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 783) ; ~0xf0 = "~(subnormal|zero)"
1316 define i1 @isnormal_bf16(bfloat %x) {
1317 ; GFX7CHECK-LABEL: isnormal_bf16:
1318 ; GFX7CHECK: ; %bb.0:
1319 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1320 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
1321 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
1322 ; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, 0xffffff80, v0
1323 ; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
1324 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f00
1325 ; GFX7CHECK-NEXT: v_cmp_gt_u32_e32 vcc, s4, v0
1326 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1327 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
1329 ; GFX8CHECK-LABEL: isnormal_bf16:
1330 ; GFX8CHECK: ; %bb.0:
1331 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1332 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1333 ; GFX8CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0
1334 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f00
1335 ; GFX8CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s4, v0
1336 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1337 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
1339 ; GFX9CHECK-LABEL: isnormal_bf16:
1340 ; GFX9CHECK: ; %bb.0:
1341 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1342 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1343 ; GFX9CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0
1344 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f00
1345 ; GFX9CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s4, v0
1346 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1347 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
1349 ; GFX10CHECK-LABEL: isnormal_bf16:
1350 ; GFX10CHECK: ; %bb.0:
1351 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1352 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1353 ; GFX10CHECK-NEXT: v_add_nc_u16 v0, v0, 0xff80
1354 ; GFX10CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f00, v0
1355 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1356 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
1358 ; GFX11CHECK-LABEL: isnormal_bf16:
1359 ; GFX11CHECK: ; %bb.0:
1360 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1361 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1362 ; GFX11CHECK-NEXT: v_add_nc_u16 v0, v0, 0xff80
1363 ; GFX11CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f00, v0
1364 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1365 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
1366 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 264) ; 0x108 = "normal"
1370 define i1 @not_isnormal_bf16(bfloat %x) {
1371 ; GFX7CHECK-LABEL: not_isnormal_bf16:
1372 ; GFX7CHECK: ; %bb.0:
1373 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1374 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
1375 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
1376 ; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, 0xffffff80, v0
1377 ; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
1378 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7eff
1379 ; GFX7CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s4, v0
1380 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1381 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
1383 ; GFX8CHECK-LABEL: not_isnormal_bf16:
1384 ; GFX8CHECK: ; %bb.0:
1385 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1386 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1387 ; GFX8CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0
1388 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7eff
1389 ; GFX8CHECK-NEXT: v_cmp_lt_u16_e32 vcc, s4, v0
1390 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1391 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
1393 ; GFX9CHECK-LABEL: not_isnormal_bf16:
1394 ; GFX9CHECK: ; %bb.0:
1395 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1396 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1397 ; GFX9CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0
1398 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7eff
1399 ; GFX9CHECK-NEXT: v_cmp_lt_u16_e32 vcc, s4, v0
1400 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1401 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
1403 ; GFX10CHECK-LABEL: not_isnormal_bf16:
1404 ; GFX10CHECK: ; %bb.0:
1405 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1406 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1407 ; GFX10CHECK-NEXT: v_add_nc_u16 v0, v0, 0xff80
1408 ; GFX10CHECK-NEXT: v_cmp_lt_u16_e32 vcc_lo, 0x7eff, v0
1409 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1410 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
1412 ; GFX11CHECK-LABEL: not_isnormal_bf16:
1413 ; GFX11CHECK: ; %bb.0:
1414 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1415 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1416 ; GFX11CHECK-NEXT: v_add_nc_u16 v0, v0, 0xff80
1417 ; GFX11CHECK-NEXT: v_cmp_lt_u16_e32 vcc_lo, 0x7eff, v0
1418 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1419 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
1420 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 759) ; ~0x108 = "~normal"
1424 define i1 @not_is_plus_normal_bf16(bfloat %x) {
1425 ; GFX7CHECK-LABEL: not_is_plus_normal_bf16:
1426 ; GFX7CHECK: ; %bb.0:
1427 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1428 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
1429 ; GFX7CHECK-NEXT: v_ashrrev_i32_e32 v1, 16, v0
1430 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
1431 ; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, 0xffffff80, v0
1432 ; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
1433 ; GFX7CHECK-NEXT: s_movk_i32 s6, 0x7eff
1434 ; GFX7CHECK-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v1
1435 ; GFX7CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s6, v0
1436 ; GFX7CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1437 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1438 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
1440 ; GFX8CHECK-LABEL: not_is_plus_normal_bf16:
1441 ; GFX8CHECK: ; %bb.0:
1442 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1443 ; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0
1444 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1445 ; GFX8CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0
1446 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7eff
1447 ; GFX8CHECK-NEXT: v_cmp_lt_u16_e64 s[4:5], s4, v0
1448 ; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
1449 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1450 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
1452 ; GFX9CHECK-LABEL: not_is_plus_normal_bf16:
1453 ; GFX9CHECK: ; %bb.0:
1454 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1455 ; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0
1456 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1457 ; GFX9CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0
1458 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7eff
1459 ; GFX9CHECK-NEXT: v_cmp_lt_u16_e64 s[4:5], s4, v0
1460 ; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
1461 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1462 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
1464 ; GFX10CHECK-LABEL: not_is_plus_normal_bf16:
1465 ; GFX10CHECK: ; %bb.0:
1466 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1467 ; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
1468 ; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0
1469 ; GFX10CHECK-NEXT: v_add_nc_u16 v1, v1, 0xff80
1470 ; GFX10CHECK-NEXT: v_cmp_lt_u16_e64 s4, 0x7eff, v1
1471 ; GFX10CHECK-NEXT: s_or_b32 s4, s4, vcc_lo
1472 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
1473 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
1475 ; GFX11CHECK-LABEL: not_is_plus_normal_bf16:
1476 ; GFX11CHECK: ; %bb.0:
1477 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1478 ; GFX11CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
1479 ; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0
1480 ; GFX11CHECK-NEXT: v_add_nc_u16 v1, v1, 0xff80
1481 ; GFX11CHECK-NEXT: v_cmp_lt_u16_e64 s0, 0x7eff, v1
1482 ; GFX11CHECK-NEXT: s_or_b32 s0, s0, vcc_lo
1483 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
1484 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
1485 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 767) ; ~0x100 = ~"+normal"
1489 define i1 @not_is_neg_normal_bf16(bfloat %x) {
1490 ; GFX7CHECK-LABEL: not_is_neg_normal_bf16:
1491 ; GFX7CHECK: ; %bb.0:
1492 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1493 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
1494 ; GFX7CHECK-NEXT: v_ashrrev_i32_e32 v1, 16, v0
1495 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
1496 ; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, 0xffffff80, v0
1497 ; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
1498 ; GFX7CHECK-NEXT: s_movk_i32 s6, 0x7eff
1499 ; GFX7CHECK-NEXT: v_cmp_lt_i32_e64 s[4:5], -1, v1
1500 ; GFX7CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s6, v0
1501 ; GFX7CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1502 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1503 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
1505 ; GFX8CHECK-LABEL: not_is_neg_normal_bf16:
1506 ; GFX8CHECK: ; %bb.0:
1507 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1508 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, -1, v0
1509 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1510 ; GFX8CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0
1511 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7eff
1512 ; GFX8CHECK-NEXT: v_cmp_lt_u16_e64 s[4:5], s4, v0
1513 ; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
1514 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1515 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
1517 ; GFX9CHECK-LABEL: not_is_neg_normal_bf16:
1518 ; GFX9CHECK: ; %bb.0:
1519 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1520 ; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, -1, v0
1521 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1522 ; GFX9CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0
1523 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7eff
1524 ; GFX9CHECK-NEXT: v_cmp_lt_u16_e64 s[4:5], s4, v0
1525 ; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
1526 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1527 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
1529 ; GFX10CHECK-LABEL: not_is_neg_normal_bf16:
1530 ; GFX10CHECK: ; %bb.0:
1531 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1532 ; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
1533 ; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, -1, v0
1534 ; GFX10CHECK-NEXT: v_add_nc_u16 v1, v1, 0xff80
1535 ; GFX10CHECK-NEXT: v_cmp_lt_u16_e64 s4, 0x7eff, v1
1536 ; GFX10CHECK-NEXT: s_or_b32 s4, s4, vcc_lo
1537 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
1538 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
1540 ; GFX11CHECK-LABEL: not_is_neg_normal_bf16:
1541 ; GFX11CHECK: ; %bb.0:
1542 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1543 ; GFX11CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
1544 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, -1, v0
1545 ; GFX11CHECK-NEXT: v_add_nc_u16 v1, v1, 0xff80
1546 ; GFX11CHECK-NEXT: v_cmp_lt_u16_e64 s0, 0x7eff, v1
1547 ; GFX11CHECK-NEXT: s_or_b32 s0, s0, vcc_lo
1548 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
1549 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
1550 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 1015) ; ~0x008 = ~"-normal"
1554 define i1 @issubnormal_bf16(bfloat %x) {
1555 ; GFX7CHECK-LABEL: issubnormal_bf16:
1556 ; GFX7CHECK: ; %bb.0:
1557 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1558 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
1559 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
1560 ; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, -1, v0
1561 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f
1562 ; GFX7CHECK-NEXT: v_cmp_gt_u32_e32 vcc, s4, v0
1563 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1564 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
1566 ; GFX8CHECK-LABEL: issubnormal_bf16:
1567 ; GFX8CHECK: ; %bb.0:
1568 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1569 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1570 ; GFX8CHECK-NEXT: v_add_u16_e32 v0, -1, v0
1571 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f
1572 ; GFX8CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s4, v0
1573 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1574 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
1576 ; GFX9CHECK-LABEL: issubnormal_bf16:
1577 ; GFX9CHECK: ; %bb.0:
1578 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1579 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1580 ; GFX9CHECK-NEXT: v_add_u16_e32 v0, -1, v0
1581 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f
1582 ; GFX9CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s4, v0
1583 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1584 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
1586 ; GFX10CHECK-LABEL: issubnormal_bf16:
1587 ; GFX10CHECK: ; %bb.0:
1588 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1589 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1590 ; GFX10CHECK-NEXT: v_add_nc_u16 v0, v0, -1
1591 ; GFX10CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f, v0
1592 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1593 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
1595 ; GFX11CHECK-LABEL: issubnormal_bf16:
1596 ; GFX11CHECK: ; %bb.0:
1597 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1598 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1599 ; GFX11CHECK-NEXT: v_add_nc_u16 v0, v0, -1
1600 ; GFX11CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f, v0
1601 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1602 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
1603 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 144) ; 0x90 = "subnormal"
1607 define i1 @not_issubnormal_bf16(bfloat %x) {
1608 ; GFX7CHECK-LABEL: not_issubnormal_bf16:
1609 ; GFX7CHECK: ; %bb.0:
1610 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1611 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
1612 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
1613 ; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, -1, v0
1614 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7e
1615 ; GFX7CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s4, v0
1616 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1617 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
1619 ; GFX8CHECK-LABEL: not_issubnormal_bf16:
1620 ; GFX8CHECK: ; %bb.0:
1621 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1622 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1623 ; GFX8CHECK-NEXT: v_add_u16_e32 v0, -1, v0
1624 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7e
1625 ; GFX8CHECK-NEXT: v_cmp_lt_u16_e32 vcc, s4, v0
1626 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1627 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
1629 ; GFX9CHECK-LABEL: not_issubnormal_bf16:
1630 ; GFX9CHECK: ; %bb.0:
1631 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1632 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1633 ; GFX9CHECK-NEXT: v_add_u16_e32 v0, -1, v0
1634 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7e
1635 ; GFX9CHECK-NEXT: v_cmp_lt_u16_e32 vcc, s4, v0
1636 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1637 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
1639 ; GFX10CHECK-LABEL: not_issubnormal_bf16:
1640 ; GFX10CHECK: ; %bb.0:
1641 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1642 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1643 ; GFX10CHECK-NEXT: v_add_nc_u16 v0, v0, -1
1644 ; GFX10CHECK-NEXT: v_cmp_lt_u16_e32 vcc_lo, 0x7e, v0
1645 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1646 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
1648 ; GFX11CHECK-LABEL: not_issubnormal_bf16:
1649 ; GFX11CHECK: ; %bb.0:
1650 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1651 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1652 ; GFX11CHECK-NEXT: v_add_nc_u16 v0, v0, -1
1653 ; GFX11CHECK-NEXT: v_cmp_lt_u16_e32 vcc_lo, 0x7e, v0
1654 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1655 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
1656 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 879) ; ~0x90 = ~"subnormal"
1660 define i1 @iszero_bf16(bfloat %x) {
1661 ; GFX7CHECK-LABEL: iszero_bf16:
1662 ; GFX7CHECK: ; %bb.0:
1663 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1664 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
1665 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
1666 ; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1667 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1668 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
1670 ; GFX8CHECK-LABEL: iszero_bf16:
1671 ; GFX8CHECK: ; %bb.0:
1672 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1673 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1674 ; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
1675 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1676 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
1678 ; GFX9CHECK-LABEL: iszero_bf16:
1679 ; GFX9CHECK: ; %bb.0:
1680 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1681 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1682 ; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
1683 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1684 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
1686 ; GFX10CHECK-LABEL: iszero_bf16:
1687 ; GFX10CHECK: ; %bb.0:
1688 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1689 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1690 ; GFX10CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0
1691 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1692 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
1694 ; GFX11CHECK-LABEL: iszero_bf16:
1695 ; GFX11CHECK: ; %bb.0:
1696 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1697 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1698 ; GFX11CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0
1699 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1700 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
1701 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 96) ; 0x60 = "zero"
1705 define i1 @not_iszero_bf16(bfloat %x) {
1706 ; GFX7CHECK-LABEL: not_iszero_bf16:
1707 ; GFX7CHECK: ; %bb.0:
1708 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1709 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
1710 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
1711 ; GFX7CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
1712 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1713 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
1715 ; GFX8CHECK-LABEL: not_iszero_bf16:
1716 ; GFX8CHECK: ; %bb.0:
1717 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1718 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1719 ; GFX8CHECK-NEXT: v_cmp_ne_u16_e32 vcc, 0, v0
1720 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1721 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
1723 ; GFX9CHECK-LABEL: not_iszero_bf16:
1724 ; GFX9CHECK: ; %bb.0:
1725 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1726 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1727 ; GFX9CHECK-NEXT: v_cmp_ne_u16_e32 vcc, 0, v0
1728 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1729 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
1731 ; GFX10CHECK-LABEL: not_iszero_bf16:
1732 ; GFX10CHECK: ; %bb.0:
1733 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1734 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1735 ; GFX10CHECK-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0, v0
1736 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1737 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
1739 ; GFX11CHECK-LABEL: not_iszero_bf16:
1740 ; GFX11CHECK: ; %bb.0:
1741 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1742 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1743 ; GFX11CHECK-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0, v0
1744 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1745 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
1746 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 927) ; ~0x60 = ~"zero"
1750 define i1 @ispositive_bf16(bfloat %x) {
1751 ; GFX7CHECK-LABEL: ispositive_bf16:
1752 ; GFX7CHECK: ; %bb.0:
1753 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1754 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
1755 ; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1756 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f81
1757 ; GFX7CHECK-NEXT: v_cmp_gt_u32_e32 vcc, s4, v0
1758 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1759 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
1761 ; GFX8CHECK-LABEL: ispositive_bf16:
1762 ; GFX8CHECK: ; %bb.0:
1763 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1764 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f81
1765 ; GFX8CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s4, v0
1766 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1767 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
1769 ; GFX9CHECK-LABEL: ispositive_bf16:
1770 ; GFX9CHECK: ; %bb.0:
1771 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1772 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f81
1773 ; GFX9CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s4, v0
1774 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1775 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
1777 ; GFX10CHECK-LABEL: ispositive_bf16:
1778 ; GFX10CHECK: ; %bb.0:
1779 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1780 ; GFX10CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f81, v0
1781 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1782 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
1784 ; GFX11CHECK-LABEL: ispositive_bf16:
1785 ; GFX11CHECK: ; %bb.0:
1786 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1787 ; GFX11CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f81, v0
1788 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1789 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
1790 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 960) ; fcPositive
1794 define i1 @not_ispositive_bf16(bfloat %x) {
1795 ; GFX7CHECK-LABEL: not_ispositive_bf16:
1796 ; GFX7CHECK: ; %bb.0:
1797 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1798 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
1799 ; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1800 ; GFX7CHECK-NEXT: v_ashrrev_i32_e32 v2, 16, v0
1801 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
1802 ; GFX7CHECK-NEXT: s_movk_i32 s6, 0x7f80
1803 ; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2
1804 ; GFX7CHECK-NEXT: v_cmp_gt_i32_e64 s[4:5], s6, v0
1805 ; GFX7CHECK-NEXT: s_mov_b32 s7, 0xff80
1806 ; GFX7CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
1807 ; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s7, v1
1808 ; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
1809 ; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s6, v0
1810 ; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
1811 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1812 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
1814 ; GFX8CHECK-LABEL: not_ispositive_bf16:
1815 ; GFX8CHECK: ; %bb.0:
1816 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1817 ; GFX8CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
1818 ; GFX8CHECK-NEXT: s_movk_i32 s6, 0x7f80
1819 ; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0
1820 ; GFX8CHECK-NEXT: v_cmp_gt_i16_e64 s[4:5], s6, v1
1821 ; GFX8CHECK-NEXT: s_movk_i32 s7, 0xff80
1822 ; GFX8CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
1823 ; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s7, v0
1824 ; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
1825 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s6, v1
1826 ; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
1827 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1828 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
1830 ; GFX9CHECK-LABEL: not_ispositive_bf16:
1831 ; GFX9CHECK: ; %bb.0:
1832 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1833 ; GFX9CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
1834 ; GFX9CHECK-NEXT: s_movk_i32 s6, 0x7f80
1835 ; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0
1836 ; GFX9CHECK-NEXT: v_cmp_gt_i16_e64 s[4:5], s6, v1
1837 ; GFX9CHECK-NEXT: s_movk_i32 s7, 0xff80
1838 ; GFX9CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
1839 ; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s7, v0
1840 ; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
1841 ; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s6, v1
1842 ; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
1843 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1844 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
1846 ; GFX10CHECK-LABEL: not_ispositive_bf16:
1847 ; GFX10CHECK: ; %bb.0:
1848 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1849 ; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
1850 ; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0
1851 ; GFX10CHECK-NEXT: v_cmp_eq_u16_e64 s5, 0xff80, v0
1852 ; GFX10CHECK-NEXT: v_cmp_gt_i16_e64 s4, 0x7f80, v1
1853 ; GFX10CHECK-NEXT: v_cmp_lt_i16_e64 s6, 0x7f80, v1
1854 ; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo
1855 ; GFX10CHECK-NEXT: s_or_b32 s4, s4, s5
1856 ; GFX10CHECK-NEXT: s_or_b32 s4, s4, s6
1857 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
1858 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
1860 ; GFX11CHECK-LABEL: not_ispositive_bf16:
1861 ; GFX11CHECK: ; %bb.0:
1862 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1863 ; GFX11CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
1864 ; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0
1865 ; GFX11CHECK-NEXT: v_cmp_eq_u16_e64 s1, 0xff80, v0
1866 ; GFX11CHECK-NEXT: v_cmp_gt_i16_e64 s0, 0x7f80, v1
1867 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e64 s2, 0x7f80, v1
1868 ; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo
1869 ; GFX11CHECK-NEXT: s_or_b32 s0, s0, s1
1870 ; GFX11CHECK-NEXT: s_or_b32 s0, s0, s2
1871 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
1872 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
1873 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 63) ; ~fcPositive
1877 define i1 @isnegative_bf16(bfloat %x) {
1878 ; GFX7CHECK-LABEL: isnegative_bf16:
1879 ; GFX7CHECK: ; %bb.0:
1880 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1881 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
1882 ; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1883 ; GFX7CHECK-NEXT: v_ashrrev_i32_e32 v2, 16, v0
1884 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
1885 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
1886 ; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2
1887 ; GFX7CHECK-NEXT: v_cmp_gt_i32_e64 s[4:5], s4, v0
1888 ; GFX7CHECK-NEXT: s_mov_b32 s6, 0xff80
1889 ; GFX7CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
1890 ; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s6, v1
1891 ; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
1892 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1893 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
1895 ; GFX8CHECK-LABEL: isnegative_bf16:
1896 ; GFX8CHECK: ; %bb.0:
1897 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1898 ; GFX8CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
1899 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
1900 ; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0
1901 ; GFX8CHECK-NEXT: v_cmp_gt_i16_e64 s[4:5], s4, v1
1902 ; GFX8CHECK-NEXT: s_movk_i32 s6, 0xff80
1903 ; GFX8CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
1904 ; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s6, v0
1905 ; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
1906 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1907 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
1909 ; GFX9CHECK-LABEL: isnegative_bf16:
1910 ; GFX9CHECK: ; %bb.0:
1911 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1912 ; GFX9CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
1913 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
1914 ; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0
1915 ; GFX9CHECK-NEXT: v_cmp_gt_i16_e64 s[4:5], s4, v1
1916 ; GFX9CHECK-NEXT: s_movk_i32 s6, 0xff80
1917 ; GFX9CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
1918 ; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s6, v0
1919 ; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
1920 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1921 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
1923 ; GFX10CHECK-LABEL: isnegative_bf16:
1924 ; GFX10CHECK: ; %bb.0:
1925 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1926 ; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
1927 ; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0
1928 ; GFX10CHECK-NEXT: v_cmp_eq_u16_e64 s5, 0xff80, v0
1929 ; GFX10CHECK-NEXT: v_cmp_gt_i16_e64 s4, 0x7f80, v1
1930 ; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo
1931 ; GFX10CHECK-NEXT: s_or_b32 s4, s4, s5
1932 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
1933 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
1935 ; GFX11CHECK-LABEL: isnegative_bf16:
1936 ; GFX11CHECK: ; %bb.0:
1937 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1938 ; GFX11CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
1939 ; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0
1940 ; GFX11CHECK-NEXT: v_cmp_eq_u16_e64 s1, 0xff80, v0
1941 ; GFX11CHECK-NEXT: v_cmp_gt_i16_e64 s0, 0x7f80, v1
1942 ; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo
1943 ; GFX11CHECK-NEXT: s_or_b32 s0, s0, s1
1944 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
1945 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
1946 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 60) ; fcNegative
1950 define i1 @not_isnegative_bf16(bfloat %x) {
1951 ; GFX7CHECK-LABEL: not_isnegative_bf16:
1952 ; GFX7CHECK: ; %bb.0:
1953 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1954 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
1955 ; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1956 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
1957 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
1958 ; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0
1959 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f81
1960 ; GFX7CHECK-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v1
1961 ; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
1962 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1963 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
1965 ; GFX8CHECK-LABEL: not_isnegative_bf16:
1966 ; GFX8CHECK: ; %bb.0:
1967 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1968 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f81
1969 ; GFX8CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s4, v0
1970 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1971 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
1972 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e64 s[4:5], s4, v0
1973 ; GFX8CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1974 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1975 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
1977 ; GFX9CHECK-LABEL: not_isnegative_bf16:
1978 ; GFX9CHECK: ; %bb.0:
1979 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1980 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f81
1981 ; GFX9CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s4, v0
1982 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1983 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
1984 ; GFX9CHECK-NEXT: v_cmp_lt_i16_e64 s[4:5], s4, v0
1985 ; GFX9CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1986 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1987 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
1989 ; GFX10CHECK-LABEL: not_isnegative_bf16:
1990 ; GFX10CHECK: ; %bb.0:
1991 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1992 ; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
1993 ; GFX10CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f81, v0
1994 ; GFX10CHECK-NEXT: v_cmp_lt_i16_e64 s4, 0x7f80, v1
1995 ; GFX10CHECK-NEXT: s_or_b32 s4, vcc_lo, s4
1996 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
1997 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
1999 ; GFX11CHECK-LABEL: not_isnegative_bf16:
2000 ; GFX11CHECK: ; %bb.0:
2001 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2002 ; GFX11CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
2003 ; GFX11CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f81, v0
2004 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e64 s0, 0x7f80, v1
2005 ; GFX11CHECK-NEXT: s_or_b32 s0, vcc_lo, s0
2006 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
2007 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
2008 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 963) ; ~fcNegative
2012 define i1 @iszero_or_nan_bf16(bfloat %x) {
2013 ; GFX7CHECK-LABEL: iszero_or_nan_bf16:
2014 ; GFX7CHECK: ; %bb.0: ; %entry
2015 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2016 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
2017 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
2018 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
2019 ; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0
2020 ; GFX7CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
2021 ; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2022 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2023 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
2025 ; GFX8CHECK-LABEL: iszero_or_nan_bf16:
2026 ; GFX8CHECK: ; %bb.0: ; %entry
2027 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2028 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2029 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
2030 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0
2031 ; GFX8CHECK-NEXT: v_cmp_eq_u16_e64 s[4:5], 0, v0
2032 ; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2033 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2034 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
2036 ; GFX9CHECK-LABEL: iszero_or_nan_bf16:
2037 ; GFX9CHECK: ; %bb.0: ; %entry
2038 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2039 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2040 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
2041 ; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0
2042 ; GFX9CHECK-NEXT: v_cmp_eq_u16_e64 s[4:5], 0, v0
2043 ; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2044 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2045 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
2047 ; GFX10CHECK-LABEL: iszero_or_nan_bf16:
2048 ; GFX10CHECK: ; %bb.0: ; %entry
2049 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2050 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2051 ; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
2052 ; GFX10CHECK-NEXT: v_cmp_eq_u16_e64 s4, 0, v0
2053 ; GFX10CHECK-NEXT: s_or_b32 s4, s4, vcc_lo
2054 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
2055 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
2057 ; GFX11CHECK-LABEL: iszero_or_nan_bf16:
2058 ; GFX11CHECK: ; %bb.0: ; %entry
2059 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2060 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2061 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
2062 ; GFX11CHECK-NEXT: v_cmp_eq_u16_e64 s0, 0, v0
2063 ; GFX11CHECK-NEXT: s_or_b32 s0, s0, vcc_lo
2064 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
2065 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
2067 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 99) ; 0x60|0x3 = "zero|nan"
2071 define i1 @iszero_or_nan_f_daz(bfloat %x) #0 {
2072 ; GFX7CHECK-LABEL: iszero_or_nan_f_daz:
2073 ; GFX7CHECK: ; %bb.0: ; %entry
2074 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2075 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
2076 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
2077 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
2078 ; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0
2079 ; GFX7CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
2080 ; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2081 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2082 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
2084 ; GFX8CHECK-LABEL: iszero_or_nan_f_daz:
2085 ; GFX8CHECK: ; %bb.0: ; %entry
2086 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2087 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2088 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
2089 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0
2090 ; GFX8CHECK-NEXT: v_cmp_eq_u16_e64 s[4:5], 0, v0
2091 ; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2092 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2093 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
2095 ; GFX9CHECK-LABEL: iszero_or_nan_f_daz:
2096 ; GFX9CHECK: ; %bb.0: ; %entry
2097 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2098 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2099 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
2100 ; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0
2101 ; GFX9CHECK-NEXT: v_cmp_eq_u16_e64 s[4:5], 0, v0
2102 ; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2103 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2104 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
2106 ; GFX10CHECK-LABEL: iszero_or_nan_f_daz:
2107 ; GFX10CHECK: ; %bb.0: ; %entry
2108 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2109 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2110 ; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
2111 ; GFX10CHECK-NEXT: v_cmp_eq_u16_e64 s4, 0, v0
2112 ; GFX10CHECK-NEXT: s_or_b32 s4, s4, vcc_lo
2113 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
2114 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
2116 ; GFX11CHECK-LABEL: iszero_or_nan_f_daz:
2117 ; GFX11CHECK: ; %bb.0: ; %entry
2118 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2119 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2120 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
2121 ; GFX11CHECK-NEXT: v_cmp_eq_u16_e64 s0, 0, v0
2122 ; GFX11CHECK-NEXT: s_or_b32 s0, s0, vcc_lo
2123 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
2124 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
2126 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 99) ; 0x60|0x3 = "zero|nan"
2130 define i1 @iszero_or_nan_f_maybe_daz(bfloat %x) #1 {
2131 ; GFX7CHECK-LABEL: iszero_or_nan_f_maybe_daz:
2132 ; GFX7CHECK: ; %bb.0: ; %entry
2133 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2134 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
2135 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
2136 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
2137 ; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0
2138 ; GFX7CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
2139 ; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2140 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2141 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
2143 ; GFX8CHECK-LABEL: iszero_or_nan_f_maybe_daz:
2144 ; GFX8CHECK: ; %bb.0: ; %entry
2145 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2146 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2147 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
2148 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0
2149 ; GFX8CHECK-NEXT: v_cmp_eq_u16_e64 s[4:5], 0, v0
2150 ; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2151 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2152 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
2154 ; GFX9CHECK-LABEL: iszero_or_nan_f_maybe_daz:
2155 ; GFX9CHECK: ; %bb.0: ; %entry
2156 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2157 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2158 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
2159 ; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0
2160 ; GFX9CHECK-NEXT: v_cmp_eq_u16_e64 s[4:5], 0, v0
2161 ; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2162 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2163 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
2165 ; GFX10CHECK-LABEL: iszero_or_nan_f_maybe_daz:
2166 ; GFX10CHECK: ; %bb.0: ; %entry
2167 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2168 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2169 ; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
2170 ; GFX10CHECK-NEXT: v_cmp_eq_u16_e64 s4, 0, v0
2171 ; GFX10CHECK-NEXT: s_or_b32 s4, s4, vcc_lo
2172 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
2173 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
2175 ; GFX11CHECK-LABEL: iszero_or_nan_f_maybe_daz:
2176 ; GFX11CHECK: ; %bb.0: ; %entry
2177 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2178 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2179 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
2180 ; GFX11CHECK-NEXT: v_cmp_eq_u16_e64 s0, 0, v0
2181 ; GFX11CHECK-NEXT: s_or_b32 s0, s0, vcc_lo
2182 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
2183 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
2185 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 99) ; 0x60|0x3 = "zero|nan"
2189 define i1 @not_iszero_or_nan_bf16(bfloat %x) {
2190 ; GFX7CHECK-LABEL: not_iszero_or_nan_bf16:
2191 ; GFX7CHECK: ; %bb.0: ; %entry
2192 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2193 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
2194 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
2195 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f81
2196 ; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0
2197 ; GFX7CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0
2198 ; GFX7CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
2199 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2200 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
2202 ; GFX8CHECK-LABEL: not_iszero_or_nan_bf16:
2203 ; GFX8CHECK: ; %bb.0: ; %entry
2204 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2205 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2206 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f81
2207 ; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0
2208 ; GFX8CHECK-NEXT: v_cmp_ne_u16_e64 s[4:5], 0, v0
2209 ; GFX8CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
2210 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2211 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
2213 ; GFX9CHECK-LABEL: not_iszero_or_nan_bf16:
2214 ; GFX9CHECK: ; %bb.0: ; %entry
2215 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2216 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2217 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f81
2218 ; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0
2219 ; GFX9CHECK-NEXT: v_cmp_ne_u16_e64 s[4:5], 0, v0
2220 ; GFX9CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
2221 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2222 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
2224 ; GFX10CHECK-LABEL: not_iszero_or_nan_bf16:
2225 ; GFX10CHECK: ; %bb.0: ; %entry
2226 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2227 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2228 ; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0
2229 ; GFX10CHECK-NEXT: v_cmp_ne_u16_e64 s4, 0, v0
2230 ; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo
2231 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
2232 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
2234 ; GFX11CHECK-LABEL: not_iszero_or_nan_bf16:
2235 ; GFX11CHECK: ; %bb.0: ; %entry
2236 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2237 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2238 ; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0
2239 ; GFX11CHECK-NEXT: v_cmp_ne_u16_e64 s0, 0, v0
2240 ; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo
2241 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
2242 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
2244 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 924) ; ~0x60 = "~(zero|nan)"
2248 define i1 @not_iszero_or_nan_f_daz(bfloat %x) #0 {
2249 ; GFX7CHECK-LABEL: not_iszero_or_nan_f_daz:
2250 ; GFX7CHECK: ; %bb.0: ; %entry
2251 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2252 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
2253 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
2254 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f81
2255 ; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0
2256 ; GFX7CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0
2257 ; GFX7CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
2258 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2259 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
2261 ; GFX8CHECK-LABEL: not_iszero_or_nan_f_daz:
2262 ; GFX8CHECK: ; %bb.0: ; %entry
2263 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2264 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2265 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f81
2266 ; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0
2267 ; GFX8CHECK-NEXT: v_cmp_ne_u16_e64 s[4:5], 0, v0
2268 ; GFX8CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
2269 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2270 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
2272 ; GFX9CHECK-LABEL: not_iszero_or_nan_f_daz:
2273 ; GFX9CHECK: ; %bb.0: ; %entry
2274 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2275 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2276 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f81
2277 ; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0
2278 ; GFX9CHECK-NEXT: v_cmp_ne_u16_e64 s[4:5], 0, v0
2279 ; GFX9CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
2280 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2281 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
2283 ; GFX10CHECK-LABEL: not_iszero_or_nan_f_daz:
2284 ; GFX10CHECK: ; %bb.0: ; %entry
2285 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2286 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2287 ; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0
2288 ; GFX10CHECK-NEXT: v_cmp_ne_u16_e64 s4, 0, v0
2289 ; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo
2290 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
2291 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
2293 ; GFX11CHECK-LABEL: not_iszero_or_nan_f_daz:
2294 ; GFX11CHECK: ; %bb.0: ; %entry
2295 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2296 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2297 ; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0
2298 ; GFX11CHECK-NEXT: v_cmp_ne_u16_e64 s0, 0, v0
2299 ; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo
2300 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
2301 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
2303 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 924) ; ~(0x60|0x3) = "~(zero|nan)"
2307 define i1 @not_iszero_or_nan_f_maybe_daz(bfloat %x) #1 {
2308 ; GFX7CHECK-LABEL: not_iszero_or_nan_f_maybe_daz:
2309 ; GFX7CHECK: ; %bb.0: ; %entry
2310 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2311 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
2312 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
2313 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f81
2314 ; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0
2315 ; GFX7CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0
2316 ; GFX7CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
2317 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2318 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
2320 ; GFX8CHECK-LABEL: not_iszero_or_nan_f_maybe_daz:
2321 ; GFX8CHECK: ; %bb.0: ; %entry
2322 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2323 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2324 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f81
2325 ; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0
2326 ; GFX8CHECK-NEXT: v_cmp_ne_u16_e64 s[4:5], 0, v0
2327 ; GFX8CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
2328 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2329 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
2331 ; GFX9CHECK-LABEL: not_iszero_or_nan_f_maybe_daz:
2332 ; GFX9CHECK: ; %bb.0: ; %entry
2333 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2334 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2335 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f81
2336 ; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0
2337 ; GFX9CHECK-NEXT: v_cmp_ne_u16_e64 s[4:5], 0, v0
2338 ; GFX9CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
2339 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2340 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
2342 ; GFX10CHECK-LABEL: not_iszero_or_nan_f_maybe_daz:
2343 ; GFX10CHECK: ; %bb.0: ; %entry
2344 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2345 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2346 ; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0
2347 ; GFX10CHECK-NEXT: v_cmp_ne_u16_e64 s4, 0, v0
2348 ; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo
2349 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
2350 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
2352 ; GFX11CHECK-LABEL: not_iszero_or_nan_f_maybe_daz:
2353 ; GFX11CHECK: ; %bb.0: ; %entry
2354 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2355 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2356 ; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0
2357 ; GFX11CHECK-NEXT: v_cmp_ne_u16_e64 s0, 0, v0
2358 ; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo
2359 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
2360 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
2362 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 924) ; ~(0x60|0x3) = "~(zero|nan)"
2366 define i1 @iszero_or_qnan_bf16(bfloat %x) {
2367 ; GFX7CHECK-LABEL: iszero_or_qnan_bf16:
2368 ; GFX7CHECK: ; %bb.0: ; %entry
2369 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2370 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
2371 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
2372 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7fbf
2373 ; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0
2374 ; GFX7CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
2375 ; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2376 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2377 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
2379 ; GFX8CHECK-LABEL: iszero_or_qnan_bf16:
2380 ; GFX8CHECK: ; %bb.0: ; %entry
2381 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2382 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2383 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7fbf
2384 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0
2385 ; GFX8CHECK-NEXT: v_cmp_eq_u16_e64 s[4:5], 0, v0
2386 ; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2387 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2388 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
2390 ; GFX9CHECK-LABEL: iszero_or_qnan_bf16:
2391 ; GFX9CHECK: ; %bb.0: ; %entry
2392 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2393 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2394 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7fbf
2395 ; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0
2396 ; GFX9CHECK-NEXT: v_cmp_eq_u16_e64 s[4:5], 0, v0
2397 ; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2398 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2399 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
2401 ; GFX10CHECK-LABEL: iszero_or_qnan_bf16:
2402 ; GFX10CHECK: ; %bb.0: ; %entry
2403 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2404 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2405 ; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7fbf, v0
2406 ; GFX10CHECK-NEXT: v_cmp_eq_u16_e64 s4, 0, v0
2407 ; GFX10CHECK-NEXT: s_or_b32 s4, s4, vcc_lo
2408 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
2409 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
2411 ; GFX11CHECK-LABEL: iszero_or_qnan_bf16:
2412 ; GFX11CHECK: ; %bb.0: ; %entry
2413 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2414 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2415 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7fbf, v0
2416 ; GFX11CHECK-NEXT: v_cmp_eq_u16_e64 s0, 0, v0
2417 ; GFX11CHECK-NEXT: s_or_b32 s0, s0, vcc_lo
2418 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
2419 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
2421 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 98) ; 0x60|0x2 = "zero|qnan"
2425 define i1 @iszero_or_snan_bf16(bfloat %x) {
2426 ; GFX7CHECK-LABEL: iszero_or_snan_bf16:
2427 ; GFX7CHECK: ; %bb.0: ; %entry
2428 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2429 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
2430 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
2431 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7fc0
2432 ; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0
2433 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
2434 ; GFX7CHECK-NEXT: v_cmp_lt_i32_e64 s[4:5], s4, v0
2435 ; GFX7CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
2436 ; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2437 ; GFX7CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
2438 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2439 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
2441 ; GFX8CHECK-LABEL: iszero_or_snan_bf16:
2442 ; GFX8CHECK: ; %bb.0: ; %entry
2443 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2444 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2445 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7fc0
2446 ; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0
2447 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
2448 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e64 s[4:5], s4, v0
2449 ; GFX8CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
2450 ; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
2451 ; GFX8CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
2452 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2453 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
2455 ; GFX9CHECK-LABEL: iszero_or_snan_bf16:
2456 ; GFX9CHECK: ; %bb.0: ; %entry
2457 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2458 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2459 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7fc0
2460 ; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0
2461 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
2462 ; GFX9CHECK-NEXT: v_cmp_lt_i16_e64 s[4:5], s4, v0
2463 ; GFX9CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
2464 ; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
2465 ; GFX9CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
2466 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2467 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
2469 ; GFX10CHECK-LABEL: iszero_or_snan_bf16:
2470 ; GFX10CHECK: ; %bb.0: ; %entry
2471 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2472 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2473 ; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0
2474 ; GFX10CHECK-NEXT: v_cmp_lt_i16_e64 s4, 0x7f80, v0
2475 ; GFX10CHECK-NEXT: v_cmp_eq_u16_e64 s5, 0, v0
2476 ; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo
2477 ; GFX10CHECK-NEXT: s_or_b32 s4, s5, s4
2478 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
2479 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
2481 ; GFX11CHECK-LABEL: iszero_or_snan_bf16:
2482 ; GFX11CHECK: ; %bb.0: ; %entry
2483 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2484 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2485 ; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0
2486 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e64 s0, 0x7f80, v0
2487 ; GFX11CHECK-NEXT: v_cmp_eq_u16_e64 s1, 0, v0
2488 ; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo
2489 ; GFX11CHECK-NEXT: s_or_b32 s0, s1, s0
2490 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
2491 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
2493 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 97) ; 0x60|0x1 = "zero|snan"
2497 define i1 @not_iszero_or_qnan_bf16(bfloat %x) {
2498 ; GFX7CHECK-LABEL: not_iszero_or_qnan_bf16:
2499 ; GFX7CHECK: ; %bb.0: ; %entry
2500 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2501 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
2502 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
2503 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7fc0
2504 ; GFX7CHECK-NEXT: s_movk_i32 s8, 0x7f80
2505 ; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0
2506 ; GFX7CHECK-NEXT: v_cmp_lt_i32_e64 s[4:5], s8, v0
2507 ; GFX7CHECK-NEXT: s_and_b64 s[6:7], s[4:5], vcc
2508 ; GFX7CHECK-NEXT: v_add_i32_e64 v1, s[4:5], -1, v0
2509 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f
2510 ; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s8, v0
2511 ; GFX7CHECK-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v1
2512 ; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2513 ; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, 0xffffff80, v0
2514 ; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
2515 ; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
2516 ; GFX7CHECK-NEXT: s_movk_i32 s6, 0x7f00
2517 ; GFX7CHECK-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0
2518 ; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2519 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2520 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
2522 ; GFX8CHECK-LABEL: not_iszero_or_qnan_bf16:
2523 ; GFX8CHECK: ; %bb.0: ; %entry
2524 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2525 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2526 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7fc0
2527 ; GFX8CHECK-NEXT: s_movk_i32 s8, 0x7f80
2528 ; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0
2529 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e64 s[4:5], s8, v0
2530 ; GFX8CHECK-NEXT: s_and_b64 s[6:7], s[4:5], vcc
2531 ; GFX8CHECK-NEXT: v_add_u16_e32 v1, -1, v0
2532 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f
2533 ; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s8, v0
2534 ; GFX8CHECK-NEXT: v_cmp_gt_u16_e64 s[4:5], s4, v1
2535 ; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2536 ; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
2537 ; GFX8CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0
2538 ; GFX8CHECK-NEXT: s_movk_i32 s6, 0x7f00
2539 ; GFX8CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s6, v0
2540 ; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2541 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2542 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
2544 ; GFX9CHECK-LABEL: not_iszero_or_qnan_bf16:
2545 ; GFX9CHECK: ; %bb.0: ; %entry
2546 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2547 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2548 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7fc0
2549 ; GFX9CHECK-NEXT: s_movk_i32 s8, 0x7f80
2550 ; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0
2551 ; GFX9CHECK-NEXT: v_cmp_lt_i16_e64 s[4:5], s8, v0
2552 ; GFX9CHECK-NEXT: s_and_b64 s[6:7], s[4:5], vcc
2553 ; GFX9CHECK-NEXT: v_add_u16_e32 v1, -1, v0
2554 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f
2555 ; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s8, v0
2556 ; GFX9CHECK-NEXT: v_cmp_gt_u16_e64 s[4:5], s4, v1
2557 ; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2558 ; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
2559 ; GFX9CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0
2560 ; GFX9CHECK-NEXT: s_movk_i32 s6, 0x7f00
2561 ; GFX9CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s6, v0
2562 ; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2563 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2564 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
2566 ; GFX10CHECK-LABEL: not_iszero_or_qnan_bf16:
2567 ; GFX10CHECK: ; %bb.0: ; %entry
2568 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2569 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2570 ; GFX10CHECK-NEXT: v_add_nc_u16 v1, v0, -1
2571 ; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0
2572 ; GFX10CHECK-NEXT: v_cmp_lt_i16_e64 s4, 0x7f80, v0
2573 ; GFX10CHECK-NEXT: v_cmp_eq_u16_e64 s5, 0x7f80, v0
2574 ; GFX10CHECK-NEXT: v_add_nc_u16 v0, v0, 0xff80
2575 ; GFX10CHECK-NEXT: v_cmp_gt_u16_e64 s6, 0x7f, v1
2576 ; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo
2577 ; GFX10CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f00, v0
2578 ; GFX10CHECK-NEXT: s_or_b32 s5, s6, s5
2579 ; GFX10CHECK-NEXT: s_or_b32 s4, s5, s4
2580 ; GFX10CHECK-NEXT: s_or_b32 s4, s4, vcc_lo
2581 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
2582 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
2584 ; GFX11CHECK-LABEL: not_iszero_or_qnan_bf16:
2585 ; GFX11CHECK: ; %bb.0: ; %entry
2586 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2587 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2588 ; GFX11CHECK-NEXT: v_add_nc_u16 v1, v0, -1
2589 ; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0
2590 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e64 s0, 0x7f80, v0
2591 ; GFX11CHECK-NEXT: v_cmp_eq_u16_e64 s1, 0x7f80, v0
2592 ; GFX11CHECK-NEXT: v_add_nc_u16 v0, v0, 0xff80
2593 ; GFX11CHECK-NEXT: v_cmp_gt_u16_e64 s2, 0x7f, v1
2594 ; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo
2595 ; GFX11CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f00, v0
2596 ; GFX11CHECK-NEXT: s_or_b32 s1, s2, s1
2597 ; GFX11CHECK-NEXT: s_or_b32 s0, s1, s0
2598 ; GFX11CHECK-NEXT: s_or_b32 s0, s0, vcc_lo
2599 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
2600 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
2602 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 925) ; ~(0x60|0x2) = "~(zero|qnan)"
2606 define i1 @not_iszero_or_snan_bf16(bfloat %x) {
2607 ; GFX7CHECK-LABEL: not_iszero_or_snan_bf16:
2608 ; GFX7CHECK: ; %bb.0: ; %entry
2609 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2610 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
2611 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
2612 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
2613 ; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0
2614 ; GFX7CHECK-NEXT: v_add_i32_e64 v1, s[4:5], -1, v0
2615 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f
2616 ; GFX7CHECK-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v1
2617 ; GFX7CHECK-NEXT: s_movk_i32 s6, 0x7fbf
2618 ; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2619 ; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s6, v0
2620 ; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2621 ; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, 0xffffff80, v0
2622 ; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
2623 ; GFX7CHECK-NEXT: s_movk_i32 s6, 0x7f00
2624 ; GFX7CHECK-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0
2625 ; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2626 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2627 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
2629 ; GFX8CHECK-LABEL: not_iszero_or_snan_bf16:
2630 ; GFX8CHECK: ; %bb.0: ; %entry
2631 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2632 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2633 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
2634 ; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
2635 ; GFX8CHECK-NEXT: v_add_u16_e32 v1, -1, v0
2636 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f
2637 ; GFX8CHECK-NEXT: v_cmp_gt_u16_e64 s[4:5], s4, v1
2638 ; GFX8CHECK-NEXT: s_movk_i32 s6, 0x7fbf
2639 ; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2640 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s6, v0
2641 ; GFX8CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0
2642 ; GFX8CHECK-NEXT: s_movk_i32 s6, 0x7f00
2643 ; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2644 ; GFX8CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s6, v0
2645 ; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2646 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2647 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
2649 ; GFX9CHECK-LABEL: not_iszero_or_snan_bf16:
2650 ; GFX9CHECK: ; %bb.0: ; %entry
2651 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2652 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2653 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
2654 ; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
2655 ; GFX9CHECK-NEXT: v_add_u16_e32 v1, -1, v0
2656 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f
2657 ; GFX9CHECK-NEXT: v_cmp_gt_u16_e64 s[4:5], s4, v1
2658 ; GFX9CHECK-NEXT: s_movk_i32 s6, 0x7fbf
2659 ; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2660 ; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s6, v0
2661 ; GFX9CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0
2662 ; GFX9CHECK-NEXT: s_movk_i32 s6, 0x7f00
2663 ; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2664 ; GFX9CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s6, v0
2665 ; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2666 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2667 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
2669 ; GFX10CHECK-LABEL: not_iszero_or_snan_bf16:
2670 ; GFX10CHECK: ; %bb.0: ; %entry
2671 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2672 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2673 ; GFX10CHECK-NEXT: v_add_nc_u16 v1, v0, -1
2674 ; GFX10CHECK-NEXT: v_add_nc_u16 v2, v0, 0xff80
2675 ; GFX10CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
2676 ; GFX10CHECK-NEXT: v_cmp_lt_i16_e64 s5, 0x7fbf, v0
2677 ; GFX10CHECK-NEXT: v_cmp_gt_u16_e64 s4, 0x7f, v1
2678 ; GFX10CHECK-NEXT: v_cmp_gt_u16_e64 s6, 0x7f00, v2
2679 ; GFX10CHECK-NEXT: s_or_b32 s4, s4, vcc_lo
2680 ; GFX10CHECK-NEXT: s_or_b32 s4, s4, s5
2681 ; GFX10CHECK-NEXT: s_or_b32 s4, s4, s6
2682 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
2683 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
2685 ; GFX11CHECK-LABEL: not_iszero_or_snan_bf16:
2686 ; GFX11CHECK: ; %bb.0: ; %entry
2687 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2688 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2689 ; GFX11CHECK-NEXT: v_add_nc_u16 v1, v0, -1
2690 ; GFX11CHECK-NEXT: v_add_nc_u16 v2, v0, 0xff80
2691 ; GFX11CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
2692 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e64 s1, 0x7fbf, v0
2693 ; GFX11CHECK-NEXT: v_cmp_gt_u16_e64 s0, 0x7f, v1
2694 ; GFX11CHECK-NEXT: v_cmp_gt_u16_e64 s2, 0x7f00, v2
2695 ; GFX11CHECK-NEXT: s_or_b32 s0, s0, vcc_lo
2696 ; GFX11CHECK-NEXT: s_or_b32 s0, s0, s1
2697 ; GFX11CHECK-NEXT: s_or_b32 s0, s0, s2
2698 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
2699 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
2701 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 926) ; ~(0x60|0x1) = "~(zero|snan)"
2705 define i1 @isinf_or_nan_bf16(bfloat %x) {
2706 ; GFX7CHECK-LABEL: isinf_or_nan_bf16:
2707 ; GFX7CHECK: ; %bb.0: ; %entry
2708 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2709 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
2710 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
2711 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f7f
2712 ; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0
2713 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
2714 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
2716 ; GFX8CHECK-LABEL: isinf_or_nan_bf16:
2717 ; GFX8CHECK: ; %bb.0: ; %entry
2718 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2719 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2720 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f7f
2721 ; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0
2722 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
2723 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
2725 ; GFX9CHECK-LABEL: isinf_or_nan_bf16:
2726 ; GFX9CHECK: ; %bb.0: ; %entry
2727 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2728 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2729 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f7f
2730 ; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0
2731 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
2732 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
2734 ; GFX10CHECK-LABEL: isinf_or_nan_bf16:
2735 ; GFX10CHECK: ; %bb.0: ; %entry
2736 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2737 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2738 ; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f7f, v0
2739 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
2740 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
2742 ; GFX11CHECK-LABEL: isinf_or_nan_bf16:
2743 ; GFX11CHECK: ; %bb.0: ; %entry
2744 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2745 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2746 ; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f7f, v0
2747 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
2748 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
2750 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 519) ; 0x204|0x3 = "inf|nan"
2754 define i1 @not_isinf_or_nan_bf16(bfloat %x) {
2755 ; GFX7CHECK-LABEL: not_isinf_or_nan_bf16:
2756 ; GFX7CHECK: ; %bb.0: ; %entry
2757 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2758 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
2759 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
2760 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
2761 ; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0
2762 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
2763 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
2765 ; GFX8CHECK-LABEL: not_isinf_or_nan_bf16:
2766 ; GFX8CHECK: ; %bb.0: ; %entry
2767 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2768 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2769 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
2770 ; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0
2771 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
2772 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
2774 ; GFX9CHECK-LABEL: not_isinf_or_nan_bf16:
2775 ; GFX9CHECK: ; %bb.0: ; %entry
2776 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2777 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2778 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
2779 ; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0
2780 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
2781 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
2783 ; GFX10CHECK-LABEL: not_isinf_or_nan_bf16:
2784 ; GFX10CHECK: ; %bb.0: ; %entry
2785 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2786 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2787 ; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7f80, v0
2788 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
2789 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
2791 ; GFX11CHECK-LABEL: not_isinf_or_nan_bf16:
2792 ; GFX11CHECK: ; %bb.0: ; %entry
2793 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2794 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2795 ; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7f80, v0
2796 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
2797 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
2799 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 504) ; ~(0x204|0x3) = "~(inf|nan)"
2803 define i1 @isfinite_or_nan_f(bfloat %x) {
2804 ; GFX7CHECK-LABEL: isfinite_or_nan_f:
2805 ; GFX7CHECK: ; %bb.0: ; %entry
2806 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2807 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
2808 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
2809 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
2810 ; GFX7CHECK-NEXT: v_cmp_ne_u32_e32 vcc, s4, v0
2811 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
2812 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
2814 ; GFX8CHECK-LABEL: isfinite_or_nan_f:
2815 ; GFX8CHECK: ; %bb.0: ; %entry
2816 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2817 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2818 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
2819 ; GFX8CHECK-NEXT: v_cmp_ne_u16_e32 vcc, s4, v0
2820 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
2821 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
2823 ; GFX9CHECK-LABEL: isfinite_or_nan_f:
2824 ; GFX9CHECK: ; %bb.0: ; %entry
2825 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2826 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2827 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
2828 ; GFX9CHECK-NEXT: v_cmp_ne_u16_e32 vcc, s4, v0
2829 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
2830 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
2832 ; GFX10CHECK-LABEL: isfinite_or_nan_f:
2833 ; GFX10CHECK: ; %bb.0: ; %entry
2834 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2835 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2836 ; GFX10CHECK-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0x7f80, v0
2837 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
2838 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
2840 ; GFX11CHECK-LABEL: isfinite_or_nan_f:
2841 ; GFX11CHECK: ; %bb.0: ; %entry
2842 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2843 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2844 ; GFX11CHECK-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0x7f80, v0
2845 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
2846 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
2848 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 507) ; 0x1f8|0x3 = "finite|nan"
2852 define i1 @not_isfinite_or_nan_f(bfloat %x) {
2853 ; GFX7CHECK-LABEL: not_isfinite_or_nan_f:
2854 ; GFX7CHECK: ; %bb.0: ; %entry
2855 ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2856 ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
2857 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
2858 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
2859 ; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0
2860 ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
2861 ; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
2863 ; GFX8CHECK-LABEL: not_isfinite_or_nan_f:
2864 ; GFX8CHECK: ; %bb.0: ; %entry
2865 ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2866 ; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2867 ; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
2868 ; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
2869 ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
2870 ; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
2872 ; GFX9CHECK-LABEL: not_isfinite_or_nan_f:
2873 ; GFX9CHECK: ; %bb.0: ; %entry
2874 ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2875 ; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2876 ; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
2877 ; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
2878 ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
2879 ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
2881 ; GFX10CHECK-LABEL: not_isfinite_or_nan_f:
2882 ; GFX10CHECK: ; %bb.0: ; %entry
2883 ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2884 ; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2885 ; GFX10CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
2886 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
2887 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
2889 ; GFX11CHECK-LABEL: not_isfinite_or_nan_f:
2890 ; GFX11CHECK: ; %bb.0: ; %entry
2891 ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2892 ; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2893 ; GFX11CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
2894 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
2895 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
2897 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 516) ; ~(0x1f8|0x3) = "~(finite|nan)"
2901 declare i1 @llvm.is.fpclass.bf16(bfloat, i32)
2902 declare <2 x i1> @llvm.is.fpclass.v2bf16(<2 x bfloat>, i32)
2903 declare <3 x i1> @llvm.is.fpclass.v3bf16(<3 x bfloat>, i32)
2904 declare <4 x i1> @llvm.is.fpclass.v4bf16(<4 x bfloat>, i32)
2907 attributes #0 = { "denormal-fp-math"="ieee,preserve-sign" }
2910 attributes #1 = { "denormal-fp-math"="ieee,dynamic" }
2911 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
2912 ; GFX10SELDAG: {{.*}}
2913 ; GFX11SELDAG: {{.*}}
2914 ; GFX7SELDAG: {{.*}}
2915 ; GFX8SELDAG: {{.*}}
2916 ; GFX9SELDAG: {{.*}}