1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
4 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
6 define amdgpu_kernel void @br_cc_f16(
8 ; SI: ; %bb.0: ; %entry
9 ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
10 ; SI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
11 ; SI-NEXT: s_mov_b32 s7, 0xf000
12 ; SI-NEXT: s_mov_b32 s6, -1
13 ; SI-NEXT: s_mov_b32 s10, s6
14 ; SI-NEXT: s_waitcnt lgkmcnt(0)
15 ; SI-NEXT: s_mov_b32 s4, s2
16 ; SI-NEXT: s_mov_b32 s5, s3
17 ; SI-NEXT: s_mov_b32 s11, s7
18 ; SI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 glc
19 ; SI-NEXT: s_waitcnt vmcnt(0)
20 ; SI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 glc
21 ; SI-NEXT: s_waitcnt vmcnt(0)
22 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
23 ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
24 ; SI-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1
25 ; SI-NEXT: s_cbranch_vccnz .LBB0_2
26 ; SI-NEXT: ; %bb.1: ; %one
27 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
28 ; SI-NEXT: s_branch .LBB0_3
29 ; SI-NEXT: .LBB0_2: ; %two
30 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v1
31 ; SI-NEXT: .LBB0_3: ; %one
32 ; SI-NEXT: s_mov_b32 s2, s6
33 ; SI-NEXT: s_mov_b32 s3, s7
34 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
37 ; VI-LABEL: br_cc_f16:
38 ; VI: ; %bb.0: ; %entry
39 ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
40 ; VI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x34
41 ; VI-NEXT: s_mov_b32 s7, 0xf000
42 ; VI-NEXT: s_mov_b32 s6, -1
43 ; VI-NEXT: s_mov_b32 s10, s6
44 ; VI-NEXT: s_waitcnt lgkmcnt(0)
45 ; VI-NEXT: s_mov_b32 s4, s2
46 ; VI-NEXT: s_mov_b32 s5, s3
47 ; VI-NEXT: s_mov_b32 s11, s7
48 ; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 glc
49 ; VI-NEXT: s_waitcnt vmcnt(0)
50 ; VI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 glc
51 ; VI-NEXT: s_waitcnt vmcnt(0)
52 ; VI-NEXT: s_mov_b32 s2, s6
53 ; VI-NEXT: s_mov_b32 s3, s7
54 ; VI-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1
55 ; VI-NEXT: s_cbranch_vccnz .LBB0_2
56 ; VI-NEXT: ; %bb.1: ; %one
57 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
59 ; VI-NEXT: .LBB0_2: ; %two
60 ; VI-NEXT: buffer_store_short v1, off, s[0:3], 0
63 ; GFX11-LABEL: br_cc_f16:
64 ; GFX11: ; %bb.0: ; %entry
65 ; GFX11-NEXT: s_clause 0x1
66 ; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
67 ; GFX11-NEXT: s_load_b64 s[8:9], s[4:5], 0x34
68 ; GFX11-NEXT: s_mov_b32 s6, -1
69 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
70 ; GFX11-NEXT: s_mov_b32 s10, s6
71 ; GFX11-NEXT: s_mov_b32 s11, s7
72 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
73 ; GFX11-NEXT: s_mov_b32 s4, s2
74 ; GFX11-NEXT: s_mov_b32 s5, s3
75 ; GFX11-NEXT: buffer_load_u16 v0, off, s[4:7], 0 glc dlc
76 ; GFX11-NEXT: s_waitcnt vmcnt(0)
77 ; GFX11-NEXT: buffer_load_u16 v1, off, s[8:11], 0 glc dlc
78 ; GFX11-NEXT: s_waitcnt vmcnt(0)
79 ; GFX11-NEXT: s_mov_b32 s2, s6
80 ; GFX11-NEXT: s_mov_b32 s3, s7
81 ; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
82 ; GFX11-NEXT: s_cbranch_vccnz .LBB0_2
83 ; GFX11-NEXT: ; %bb.1: ; %one
84 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
85 ; GFX11-NEXT: s_endpgm
86 ; GFX11-NEXT: .LBB0_2: ; %two
87 ; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0
88 ; GFX11-NEXT: s_endpgm
91 ptr addrspace(1) %b) {
93 %a.val = load volatile half, ptr addrspace(1) %a
94 %b.val = load volatile half, ptr addrspace(1) %b
95 %fcmp = fcmp olt half %a.val, %b.val
96 br i1 %fcmp, label %one, label %two
99 store half %a.val, ptr addrspace(1) %r
103 store half %b.val, ptr addrspace(1) %r
107 define amdgpu_kernel void @br_cc_f16_imm_a(
108 ; SI-LABEL: br_cc_f16_imm_a:
109 ; SI: ; %bb.0: ; %entry
110 ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
111 ; SI-NEXT: s_mov_b32 s7, 0xf000
112 ; SI-NEXT: s_mov_b32 s6, -1
113 ; SI-NEXT: s_waitcnt lgkmcnt(0)
114 ; SI-NEXT: s_mov_b32 s4, s2
115 ; SI-NEXT: s_mov_b32 s5, s3
116 ; SI-NEXT: buffer_load_ushort v0, off, s[4:7], 0
117 ; SI-NEXT: s_waitcnt vmcnt(0)
118 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
119 ; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0.5, v0
120 ; SI-NEXT: s_cbranch_vccnz .LBB1_2
121 ; SI-NEXT: ; %bb.1: ; %one
122 ; SI-NEXT: s_mov_b32 s2, s6
123 ; SI-NEXT: s_mov_b32 s3, s7
124 ; SI-NEXT: v_mov_b32_e32 v0, 0x3800
125 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
127 ; SI-NEXT: .LBB1_2: ; %two
128 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
129 ; SI-NEXT: s_mov_b32 s2, s6
130 ; SI-NEXT: s_mov_b32 s3, s7
131 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
134 ; VI-LABEL: br_cc_f16_imm_a:
135 ; VI: ; %bb.0: ; %entry
136 ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
137 ; VI-NEXT: s_mov_b32 s7, 0xf000
138 ; VI-NEXT: s_mov_b32 s6, -1
139 ; VI-NEXT: s_waitcnt lgkmcnt(0)
140 ; VI-NEXT: s_mov_b32 s4, s2
141 ; VI-NEXT: s_mov_b32 s5, s3
142 ; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0
143 ; VI-NEXT: s_mov_b32 s2, s6
144 ; VI-NEXT: s_mov_b32 s3, s7
145 ; VI-NEXT: s_waitcnt vmcnt(0)
146 ; VI-NEXT: v_cmp_nlt_f16_e32 vcc, 0.5, v0
147 ; VI-NEXT: s_cbranch_vccnz .LBB1_2
148 ; VI-NEXT: ; %bb.1: ; %one
149 ; VI-NEXT: v_mov_b32_e32 v0, 0x3800
150 ; VI-NEXT: .LBB1_2: ; %two
151 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
154 ; GFX11-LABEL: br_cc_f16_imm_a:
155 ; GFX11: ; %bb.0: ; %entry
156 ; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
157 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
158 ; GFX11-NEXT: s_mov_b32 s6, -1
159 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
160 ; GFX11-NEXT: s_mov_b32 s4, s2
161 ; GFX11-NEXT: s_mov_b32 s5, s3
162 ; GFX11-NEXT: buffer_load_u16 v0, off, s[4:7], 0
163 ; GFX11-NEXT: s_waitcnt vmcnt(0)
164 ; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v0
165 ; GFX11-NEXT: s_cbranch_vccnz .LBB1_2
166 ; GFX11-NEXT: ; %bb.1: ; %one
167 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x3800
168 ; GFX11-NEXT: .LBB1_2: ; %two
169 ; GFX11-NEXT: s_mov_b32 s2, s6
170 ; GFX11-NEXT: s_mov_b32 s3, s7
171 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
172 ; GFX11-NEXT: s_endpgm
174 ptr addrspace(1) %b) {
176 %b.val = load half, ptr addrspace(1) %b
177 %fcmp = fcmp olt half 0xH3800, %b.val
178 br i1 %fcmp, label %one, label %two
181 store half 0xH3800, ptr addrspace(1) %r
185 store half %b.val, ptr addrspace(1) %r
189 define amdgpu_kernel void @br_cc_f16_imm_b(
190 ; SI-LABEL: br_cc_f16_imm_b:
191 ; SI: ; %bb.0: ; %entry
192 ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
193 ; SI-NEXT: s_mov_b32 s7, 0xf000
194 ; SI-NEXT: s_mov_b32 s6, -1
195 ; SI-NEXT: s_waitcnt lgkmcnt(0)
196 ; SI-NEXT: s_mov_b32 s4, s2
197 ; SI-NEXT: s_mov_b32 s5, s3
198 ; SI-NEXT: buffer_load_ushort v0, off, s[4:7], 0
199 ; SI-NEXT: s_waitcnt vmcnt(0)
200 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
201 ; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0.5, v0
202 ; SI-NEXT: s_cbranch_vccnz .LBB2_2
203 ; SI-NEXT: ; %bb.1: ; %one
204 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
205 ; SI-NEXT: s_mov_b32 s2, s6
206 ; SI-NEXT: s_mov_b32 s3, s7
207 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
209 ; SI-NEXT: .LBB2_2: ; %two
210 ; SI-NEXT: s_mov_b32 s2, s6
211 ; SI-NEXT: s_mov_b32 s3, s7
212 ; SI-NEXT: v_mov_b32_e32 v0, 0x3800
213 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
216 ; VI-LABEL: br_cc_f16_imm_b:
217 ; VI: ; %bb.0: ; %entry
218 ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
219 ; VI-NEXT: s_mov_b32 s7, 0xf000
220 ; VI-NEXT: s_mov_b32 s6, -1
221 ; VI-NEXT: s_waitcnt lgkmcnt(0)
222 ; VI-NEXT: s_mov_b32 s4, s2
223 ; VI-NEXT: s_mov_b32 s5, s3
224 ; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0
225 ; VI-NEXT: s_mov_b32 s2, s6
226 ; VI-NEXT: s_mov_b32 s3, s7
227 ; VI-NEXT: s_waitcnt vmcnt(0)
228 ; VI-NEXT: v_cmp_ngt_f16_e32 vcc, 0.5, v0
229 ; VI-NEXT: s_cbranch_vccnz .LBB2_2
230 ; VI-NEXT: ; %bb.1: ; %one
231 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
233 ; VI-NEXT: .LBB2_2: ; %two
234 ; VI-NEXT: v_mov_b32_e32 v0, 0x3800
235 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
238 ; GFX11-LABEL: br_cc_f16_imm_b:
239 ; GFX11: ; %bb.0: ; %entry
240 ; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
241 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
242 ; GFX11-NEXT: s_mov_b32 s6, -1
243 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
244 ; GFX11-NEXT: s_mov_b32 s4, s2
245 ; GFX11-NEXT: s_mov_b32 s5, s3
246 ; GFX11-NEXT: buffer_load_u16 v0, off, s[4:7], 0
247 ; GFX11-NEXT: s_waitcnt vmcnt(0)
248 ; GFX11-NEXT: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v0
249 ; GFX11-NEXT: s_cbranch_vccz .LBB2_2
250 ; GFX11-NEXT: ; %bb.1: ; %two
251 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x3800
252 ; GFX11-NEXT: .LBB2_2: ; %one
253 ; GFX11-NEXT: s_mov_b32 s2, s6
254 ; GFX11-NEXT: s_mov_b32 s3, s7
255 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
256 ; GFX11-NEXT: s_endpgm
258 ptr addrspace(1) %a) {
260 %a.val = load half, ptr addrspace(1) %a
261 %fcmp = fcmp olt half %a.val, 0xH3800
262 br i1 %fcmp, label %one, label %two
265 store half %a.val, ptr addrspace(1) %r
269 store half 0xH3800, ptr addrspace(1) %r