1 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3 declare half @llvm.fabs.f16(half %a)
4 declare i1 @llvm.amdgcn.class.f16(half %a, i32 %b)
6 ; GCN-LABEL: {{^}}class_f16
7 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
8 ; GCN: buffer_load_dword v[[B_I32:[0-9]+]]
9 ; VI: v_cmp_class_f16_e32 vcc, v[[A_F16]], v[[B_I32]]
10 ; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
11 ; GCN: buffer_store_dword v[[R_I32]]
13 define amdgpu_kernel void @class_f16(
15 half addrspace(1)* %a,
16 i32 addrspace(1)* %b) {
18 %a.val = load half, half addrspace(1)* %a
19 %b.val = load i32, i32 addrspace(1)* %b
20 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 %b.val)
21 %r.val.sext = sext i1 %r.val to i32
22 store i32 %r.val.sext, i32 addrspace(1)* %r
26 ; GCN-LABEL: {{^}}class_f16_fabs
27 ; GCN: s_load_dword s[[SA_F16:[0-9]+]]
28 ; GCN: s_load_dword s[[SB_I32:[0-9]+]]
29 ; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]]
30 ; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |v[[VA_F16]]|, s[[SB_I32]]
31 ; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
32 ; GCN: buffer_store_dword v[[VR_I32]]
34 define amdgpu_kernel void @class_f16_fabs(
39 %a.val.fabs = call half @llvm.fabs.f16(half %a.val)
40 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val.fabs, i32 %b.val)
41 %r.val.sext = sext i1 %r.val to i32
42 store i32 %r.val.sext, i32 addrspace(1)* %r
46 ; GCN-LABEL: {{^}}class_f16_fneg
47 ; GCN: s_load_dword s[[SA_F16:[0-9]+]]
48 ; GCN: s_load_dword s[[SB_I32:[0-9]+]]
49 ; VI: v_trunc_f16_e64 v[[VA_F16:[0-9]+]], -s[[SA_F16]]
50 ; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], v[[VA_F16]], s[[SB_I32]]
51 ; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
52 ; GCN: buffer_store_dword v[[VR_I32]]
54 define amdgpu_kernel void @class_f16_fneg(
59 %a.val.fneg = fsub half -0.0, %a.val
60 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val.fneg, i32 %b.val)
61 %r.val.sext = sext i1 %r.val to i32
62 store i32 %r.val.sext, i32 addrspace(1)* %r
66 ; GCN-LABEL: {{^}}class_f16_fabs_fneg
67 ; GCN: s_load_dword s[[SA_F16:[0-9]+]]
68 ; GCN: s_load_dword s[[SB_I32:[0-9]+]]
69 ; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]]
70 ; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -|v[[VA_F16]]|, s[[SB_I32]]
71 ; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
72 ; GCN: buffer_store_dword v[[VR_I32]]
74 define amdgpu_kernel void @class_f16_fabs_fneg(
79 %a.val.fabs = call half @llvm.fabs.f16(half %a.val)
80 %a.val.fabs.fneg = fsub half -0.0, %a.val.fabs
81 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val.fabs.fneg, i32 %b.val)
82 %r.val.sext = sext i1 %r.val to i32
83 store i32 %r.val.sext, i32 addrspace(1)* %r
87 ; GCN-LABEL: {{^}}class_f16_1
88 ; GCN: s_load_dword s[[SA_F16:[0-9]+]]
89 ; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]]
90 ; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], v[[VA_F16]], 1{{$}}
91 ; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
92 ; GCN: buffer_store_dword v[[VR_I32]]
94 define amdgpu_kernel void @class_f16_1(
98 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 1)
99 %r.val.sext = sext i1 %r.val to i32
100 store i32 %r.val.sext, i32 addrspace(1)* %r
104 ; GCN-LABEL: {{^}}class_f16_64
105 ; GCN: s_load_dword s[[SA_F16:[0-9]+]]
106 ; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]]
107 ; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], v[[VA_F16]], 64{{$}}
108 ; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
109 ; GCN: buffer_store_dword v[[VR_I32]]
111 define amdgpu_kernel void @class_f16_64(
112 i32 addrspace(1)* %r,
115 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 64)
116 %r.val.sext = sext i1 %r.val to i32
117 store i32 %r.val.sext, i32 addrspace(1)* %r
121 ; GCN-LABEL: {{^}}class_f16_full_mask
122 ; GCN: s_load_dword s[[SA_F16:[0-9]+]]
123 ; VI: v_mov_b32_e32 v[[MASK:[0-9]+]], 0x3ff{{$}}
124 ; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]]
125 ; VI: v_cmp_class_f16_e32 vcc, v[[VA_F16]], v[[MASK]]
126 ; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, vcc
127 ; GCN: buffer_store_dword v[[VR_I32]]
129 define amdgpu_kernel void @class_f16_full_mask(
130 i32 addrspace(1)* %r,
133 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 1023)
134 %r.val.sext = sext i1 %r.val to i32
135 store i32 %r.val.sext, i32 addrspace(1)* %r
139 ; GCN-LABEL: {{^}}class_f16_nine_bit_mask
140 ; GCN: s_load_dword s[[SA_F16:[0-9]+]]
141 ; VI: v_mov_b32_e32 v[[MASK:[0-9]+]], 0x1ff{{$}}
142 ; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]]
143 ; VI: v_cmp_class_f16_e32 vcc, v[[VA_F16]], v[[MASK]]
144 ; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, vcc
145 ; GCN: buffer_store_dword v[[VR_I32]]
147 define amdgpu_kernel void @class_f16_nine_bit_mask(
148 i32 addrspace(1)* %r,
151 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 511)
152 %r.val.sext = sext i1 %r.val to i32
153 store i32 %r.val.sext, i32 addrspace(1)* %r