1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
2 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
4 ; GCN-LABEL: {{^}}br_cc_f16:
5 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
6 ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
8 ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
9 ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
10 ; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
11 ; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
12 ; GCN: s_cbranch_vccnz
15 ; SI: v_cvt_f16_f32_e32 v[[CVT:[0-9]+]], v[[A_F32]]
18 ; SI: v_cvt_f16_f32_e32 v[[CVT]], v[[B_F32]]
21 ; SI: buffer_store_short v[[CVT]]
27 ; VI: buffer_store_short v[[A_F16]]
31 ; VI: buffer_store_short v[[B_F16]]
33 define amdgpu_kernel void @br_cc_f16(
34 half addrspace(1)* %r,
35 half addrspace(1)* %a,
36 half addrspace(1)* %b) {
38 %a.val = load volatile half, half addrspace(1)* %a
39 %b.val = load volatile half, half addrspace(1)* %b
40 %fcmp = fcmp olt half %a.val, %b.val
41 br i1 %fcmp, label %one, label %two
44 store half %a.val, half addrspace(1)* %r
48 store half %b.val, half addrspace(1)* %r
52 ; GCN-LABEL: {{^}}br_cc_f16_imm_a:
53 ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
55 ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
56 ; SI: v_cmp_nlt_f32_e32 vcc, 0.5, v[[B_F32]]
59 ; VI: v_cmp_nlt_f16_e32 vcc, 0.5, v[[B_F16]]
63 ; GCN: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x380{{0|1}}{{$}}
65 ; SI: buffer_store_short v[[A_F16]]
70 ; SI: v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[B_F32]]
72 define amdgpu_kernel void @br_cc_f16_imm_a(
73 half addrspace(1)* %r,
74 half addrspace(1)* %b) {
76 %b.val = load half, half addrspace(1)* %b
77 %fcmp = fcmp olt half 0xH3800, %b.val
78 br i1 %fcmp, label %one, label %two
81 store half 0xH3800, half addrspace(1)* %r
85 store half %b.val, half addrspace(1)* %r
89 ; GCN-LABEL: {{^}}br_cc_f16_imm_b:
90 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
92 ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
93 ; SI: v_cmp_ngt_f32_e32 vcc, 0.5, v[[A_F32]]
95 ; VI: v_cmp_ngt_f16_e32 vcc, 0.5, v[[A_F16]]
96 ; GCN: s_cbranch_vccnz
99 ; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]]
102 ; GCN: v_mov_b32_e32 v[[B_F16:[0-9]+]], 0x3800{{$}}
103 ; GCN: buffer_store_short v[[B_F16]]
105 define amdgpu_kernel void @br_cc_f16_imm_b(
106 half addrspace(1)* %r,
107 half addrspace(1)* %a) {
109 %a.val = load half, half addrspace(1)* %a
110 %fcmp = fcmp olt half %a.val, 0xH3800
111 br i1 %fcmp, label %one, label %two
114 store half %a.val, half addrspace(1)* %r
118 store half 0xH3800, half addrspace(1)* %r