1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
3 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
4 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
6 define amdgpu_kernel void @br_cc_f16(
8 ; SI: ; %bb.0: ; %entry
9 ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
10 ; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd
11 ; SI-NEXT: s_mov_b32 s3, 0xf000
12 ; SI-NEXT: s_mov_b32 s2, -1
13 ; SI-NEXT: s_mov_b32 s10, s2
14 ; SI-NEXT: s_waitcnt lgkmcnt(0)
15 ; SI-NEXT: s_mov_b32 s0, s6
16 ; SI-NEXT: s_mov_b32 s1, s7
17 ; SI-NEXT: s_mov_b32 s11, s3
18 ; SI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc
19 ; SI-NEXT: s_waitcnt vmcnt(0)
20 ; SI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 glc
21 ; SI-NEXT: s_waitcnt vmcnt(0)
22 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
23 ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
24 ; SI-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1
25 ; SI-NEXT: s_cbranch_vccnz .LBB0_2
26 ; SI-NEXT: ; %bb.1: ; %one
27 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
28 ; SI-NEXT: s_branch .LBB0_3
29 ; SI-NEXT: .LBB0_2: ; %two
30 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v1
31 ; SI-NEXT: .LBB0_3: ; %one
32 ; SI-NEXT: s_mov_b32 s6, s2
33 ; SI-NEXT: s_mov_b32 s7, s3
34 ; SI-NEXT: buffer_store_short v0, off, s[4:7], 0
37 ; VI-LABEL: br_cc_f16:
38 ; VI: ; %bb.0: ; %entry
39 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
40 ; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34
41 ; VI-NEXT: s_mov_b32 s3, 0xf000
42 ; VI-NEXT: s_mov_b32 s2, -1
43 ; VI-NEXT: s_mov_b32 s10, s2
44 ; VI-NEXT: s_waitcnt lgkmcnt(0)
45 ; VI-NEXT: s_mov_b32 s0, s6
46 ; VI-NEXT: s_mov_b32 s1, s7
47 ; VI-NEXT: s_mov_b32 s11, s3
48 ; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc
49 ; VI-NEXT: s_waitcnt vmcnt(0)
50 ; VI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 glc
51 ; VI-NEXT: s_waitcnt vmcnt(0)
52 ; VI-NEXT: s_mov_b32 s6, s2
53 ; VI-NEXT: s_mov_b32 s7, s3
54 ; VI-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1
55 ; VI-NEXT: s_cbranch_vccnz .LBB0_2
56 ; VI-NEXT: ; %bb.1: ; %one
57 ; VI-NEXT: buffer_store_short v0, off, s[4:7], 0
59 ; VI-NEXT: .LBB0_2: ; %two
60 ; VI-NEXT: buffer_store_short v1, off, s[4:7], 0
63 ; GFX11-LABEL: br_cc_f16:
64 ; GFX11: ; %bb.0: ; %entry
65 ; GFX11-NEXT: s_clause 0x1
66 ; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
67 ; GFX11-NEXT: s_load_b64 s[8:9], s[0:1], 0x34
68 ; GFX11-NEXT: s_mov_b32 s2, -1
69 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
70 ; GFX11-NEXT: s_mov_b32 s10, s2
71 ; GFX11-NEXT: s_mov_b32 s11, s3
72 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
73 ; GFX11-NEXT: s_mov_b32 s0, s6
74 ; GFX11-NEXT: s_mov_b32 s1, s7
75 ; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0 glc dlc
76 ; GFX11-NEXT: s_waitcnt vmcnt(0)
77 ; GFX11-NEXT: buffer_load_u16 v1, off, s[8:11], 0 glc dlc
78 ; GFX11-NEXT: s_waitcnt vmcnt(0)
79 ; GFX11-NEXT: s_mov_b32 s6, s2
80 ; GFX11-NEXT: s_mov_b32 s7, s3
81 ; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
82 ; GFX11-NEXT: s_cbranch_vccnz .LBB0_2
83 ; GFX11-NEXT: ; %bb.1: ; %one
84 ; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
86 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
87 ; GFX11-NEXT: s_endpgm
88 ; GFX11-NEXT: .LBB0_2: ; %two
89 ; GFX11-NEXT: buffer_store_b16 v1, off, s[4:7], 0
91 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
92 ; GFX11-NEXT: s_endpgm
95 ptr addrspace(1) %b) {
97 %a.val = load volatile half, ptr addrspace(1) %a
98 %b.val = load volatile half, ptr addrspace(1) %b
99 %fcmp = fcmp olt half %a.val, %b.val
100 br i1 %fcmp, label %one, label %two
103 store half %a.val, ptr addrspace(1) %r
107 store half %b.val, ptr addrspace(1) %r
111 define amdgpu_kernel void @br_cc_f16_imm_a(
112 ; SI-LABEL: br_cc_f16_imm_a:
113 ; SI: ; %bb.0: ; %entry
114 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
115 ; SI-NEXT: s_mov_b32 s7, 0xf000
116 ; SI-NEXT: s_mov_b32 s6, -1
117 ; SI-NEXT: s_waitcnt lgkmcnt(0)
118 ; SI-NEXT: s_mov_b32 s4, s2
119 ; SI-NEXT: s_mov_b32 s5, s3
120 ; SI-NEXT: buffer_load_ushort v0, off, s[4:7], 0
121 ; SI-NEXT: s_waitcnt vmcnt(0)
122 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
123 ; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0.5, v0
124 ; SI-NEXT: s_cbranch_vccnz .LBB1_2
125 ; SI-NEXT: ; %bb.1: ; %one
126 ; SI-NEXT: s_mov_b32 s2, s6
127 ; SI-NEXT: s_mov_b32 s3, s7
128 ; SI-NEXT: v_mov_b32_e32 v0, 0x3800
129 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
131 ; SI-NEXT: .LBB1_2: ; %two
132 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
133 ; SI-NEXT: s_mov_b32 s2, s6
134 ; SI-NEXT: s_mov_b32 s3, s7
135 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
138 ; VI-LABEL: br_cc_f16_imm_a:
139 ; VI: ; %bb.0: ; %entry
140 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
141 ; VI-NEXT: s_mov_b32 s7, 0xf000
142 ; VI-NEXT: s_mov_b32 s6, -1
143 ; VI-NEXT: s_waitcnt lgkmcnt(0)
144 ; VI-NEXT: s_mov_b32 s4, s2
145 ; VI-NEXT: s_mov_b32 s5, s3
146 ; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0
147 ; VI-NEXT: s_mov_b32 s2, s6
148 ; VI-NEXT: s_mov_b32 s3, s7
149 ; VI-NEXT: s_waitcnt vmcnt(0)
150 ; VI-NEXT: v_cmp_nlt_f16_e32 vcc, 0.5, v0
151 ; VI-NEXT: s_cbranch_vccnz .LBB1_2
152 ; VI-NEXT: ; %bb.1: ; %one
153 ; VI-NEXT: v_mov_b32_e32 v0, 0x3800
154 ; VI-NEXT: .LBB1_2: ; %two
155 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
158 ; GFX11-LABEL: br_cc_f16_imm_a:
159 ; GFX11: ; %bb.0: ; %entry
160 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
161 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
162 ; GFX11-NEXT: s_mov_b32 s6, -1
163 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
164 ; GFX11-NEXT: s_mov_b32 s4, s2
165 ; GFX11-NEXT: s_mov_b32 s5, s3
166 ; GFX11-NEXT: buffer_load_u16 v0, off, s[4:7], 0
167 ; GFX11-NEXT: s_waitcnt vmcnt(0)
168 ; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v0
169 ; GFX11-NEXT: s_cbranch_vccnz .LBB1_2
170 ; GFX11-NEXT: ; %bb.1: ; %one
171 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x3800
172 ; GFX11-NEXT: .LBB1_2: ; %two
173 ; GFX11-NEXT: s_mov_b32 s2, s6
174 ; GFX11-NEXT: s_mov_b32 s3, s7
175 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
176 ; GFX11-NEXT: s_nop 0
177 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
178 ; GFX11-NEXT: s_endpgm
180 ptr addrspace(1) %b) {
182 %b.val = load half, ptr addrspace(1) %b
183 %fcmp = fcmp olt half 0xH3800, %b.val
184 br i1 %fcmp, label %one, label %two
187 store half 0xH3800, ptr addrspace(1) %r
191 store half %b.val, ptr addrspace(1) %r
195 define amdgpu_kernel void @br_cc_f16_imm_b(
196 ; SI-LABEL: br_cc_f16_imm_b:
197 ; SI: ; %bb.0: ; %entry
198 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
199 ; SI-NEXT: s_mov_b32 s7, 0xf000
200 ; SI-NEXT: s_mov_b32 s6, -1
201 ; SI-NEXT: s_waitcnt lgkmcnt(0)
202 ; SI-NEXT: s_mov_b32 s4, s2
203 ; SI-NEXT: s_mov_b32 s5, s3
204 ; SI-NEXT: buffer_load_ushort v0, off, s[4:7], 0
205 ; SI-NEXT: s_waitcnt vmcnt(0)
206 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
207 ; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0.5, v0
208 ; SI-NEXT: s_cbranch_vccnz .LBB2_2
209 ; SI-NEXT: ; %bb.1: ; %one
210 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
211 ; SI-NEXT: s_mov_b32 s2, s6
212 ; SI-NEXT: s_mov_b32 s3, s7
213 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
215 ; SI-NEXT: .LBB2_2: ; %two
216 ; SI-NEXT: s_mov_b32 s2, s6
217 ; SI-NEXT: s_mov_b32 s3, s7
218 ; SI-NEXT: v_mov_b32_e32 v0, 0x3800
219 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
222 ; VI-LABEL: br_cc_f16_imm_b:
223 ; VI: ; %bb.0: ; %entry
224 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
225 ; VI-NEXT: s_mov_b32 s7, 0xf000
226 ; VI-NEXT: s_mov_b32 s6, -1
227 ; VI-NEXT: s_waitcnt lgkmcnt(0)
228 ; VI-NEXT: s_mov_b32 s4, s2
229 ; VI-NEXT: s_mov_b32 s5, s3
230 ; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0
231 ; VI-NEXT: s_mov_b32 s2, s6
232 ; VI-NEXT: s_mov_b32 s3, s7
233 ; VI-NEXT: s_waitcnt vmcnt(0)
234 ; VI-NEXT: v_cmp_ngt_f16_e32 vcc, 0.5, v0
235 ; VI-NEXT: s_cbranch_vccnz .LBB2_2
236 ; VI-NEXT: ; %bb.1: ; %one
237 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
239 ; VI-NEXT: .LBB2_2: ; %two
240 ; VI-NEXT: v_mov_b32_e32 v0, 0x3800
241 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
244 ; GFX11-LABEL: br_cc_f16_imm_b:
245 ; GFX11: ; %bb.0: ; %entry
246 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
247 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
248 ; GFX11-NEXT: s_mov_b32 s6, -1
249 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
250 ; GFX11-NEXT: s_mov_b32 s4, s2
251 ; GFX11-NEXT: s_mov_b32 s5, s3
252 ; GFX11-NEXT: buffer_load_u16 v0, off, s[4:7], 0
253 ; GFX11-NEXT: s_waitcnt vmcnt(0)
254 ; GFX11-NEXT: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v0
255 ; GFX11-NEXT: s_cbranch_vccz .LBB2_2
256 ; GFX11-NEXT: ; %bb.1: ; %two
257 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x3800
258 ; GFX11-NEXT: .LBB2_2: ; %one
259 ; GFX11-NEXT: s_mov_b32 s2, s6
260 ; GFX11-NEXT: s_mov_b32 s3, s7
261 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
262 ; GFX11-NEXT: s_nop 0
263 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
264 ; GFX11-NEXT: s_endpgm
266 ptr addrspace(1) %a) {
268 %a.val = load half, ptr addrspace(1) %a
269 %fcmp = fcmp olt half %a.val, 0xH3800
270 br i1 %fcmp, label %one, label %two
273 store half %a.val, ptr addrspace(1) %r
277 store half 0xH3800, ptr addrspace(1) %r