1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX678,GFX6 %s
3 ; RUN: llc -march=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX678,GFX7 %s
4 ; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX678,GFX8 %s
5 ; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
6 ; RUN: llc -march=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s
7 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
9 declare i32 @llvm.get.fpmode.i32()
11 define i32 @func_fpmode_i32() {
12 ; GFX678-LABEL: func_fpmode_i32:
14 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15 ; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
16 ; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
17 ; GFX678-NEXT: v_mov_b32_e32 v0, s4
18 ; GFX678-NEXT: s_setpc_b64 s[30:31]
20 ; GFX9-LABEL: func_fpmode_i32:
22 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23 ; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
24 ; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
25 ; GFX9-NEXT: v_mov_b32_e32 v0, s4
26 ; GFX9-NEXT: s_setpc_b64 s[30:31]
28 ; GFX10-LABEL: func_fpmode_i32:
30 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31 ; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
32 ; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
33 ; GFX10-NEXT: v_mov_b32_e32 v0, s4
34 ; GFX10-NEXT: s_setpc_b64 s[30:31]
36 ; GFX11-LABEL: func_fpmode_i32:
38 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39 ; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
40 ; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
41 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
42 ; GFX11-NEXT: s_setpc_b64 s[30:31]
43 %fpmode = call i32 @llvm.get.fpmode.i32()
47 define i32 @strictfp_func_fpmode_i32() strictfp {
48 ; GFX678-LABEL: strictfp_func_fpmode_i32:
50 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51 ; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
52 ; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
53 ; GFX678-NEXT: v_mov_b32_e32 v0, s4
54 ; GFX678-NEXT: s_setpc_b64 s[30:31]
56 ; GFX9-LABEL: strictfp_func_fpmode_i32:
58 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
59 ; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
60 ; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
61 ; GFX9-NEXT: v_mov_b32_e32 v0, s4
62 ; GFX9-NEXT: s_setpc_b64 s[30:31]
64 ; GFX10-LABEL: strictfp_func_fpmode_i32:
66 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67 ; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
68 ; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
69 ; GFX10-NEXT: v_mov_b32_e32 v0, s4
70 ; GFX10-NEXT: s_setpc_b64 s[30:31]
72 ; GFX11-LABEL: strictfp_func_fpmode_i32:
74 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75 ; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
76 ; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
77 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
78 ; GFX11-NEXT: s_setpc_b64 s[30:31]
79 %fpmode = call i32 @llvm.get.fpmode.i32()
83 define amdgpu_kernel void @kernel_fpmode_i32(ptr addrspace(1) %ptr) {
84 ; GFX6-LABEL: kernel_fpmode_i32:
86 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
87 ; GFX6-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
88 ; GFX6-NEXT: s_and_b32 s4, 0x7f3ff, s4
89 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
90 ; GFX6-NEXT: s_mov_b32 s2, -1
91 ; GFX6-NEXT: v_mov_b32_e32 v0, s4
92 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
93 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
96 ; GFX7-LABEL: kernel_fpmode_i32:
98 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
99 ; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
100 ; GFX7-NEXT: s_and_b32 s4, 0x7f3ff, s4
101 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
102 ; GFX7-NEXT: s_mov_b32 s2, -1
103 ; GFX7-NEXT: v_mov_b32_e32 v0, s4
104 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
105 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
106 ; GFX7-NEXT: s_endpgm
108 ; GFX8-LABEL: kernel_fpmode_i32:
110 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
111 ; GFX8-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 19)
112 ; GFX8-NEXT: s_and_b32 s2, 0x7f3ff, s2
113 ; GFX8-NEXT: v_mov_b32_e32 v2, s2
114 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
115 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
116 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
117 ; GFX8-NEXT: flat_store_dword v[0:1], v2
118 ; GFX8-NEXT: s_endpgm
120 ; GFX9-LABEL: kernel_fpmode_i32:
122 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
123 ; GFX9-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 24)
124 ; GFX9-NEXT: s_and_b32 s2, 0x87f3ff, s2
125 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
126 ; GFX9-NEXT: v_mov_b32_e32 v1, s2
127 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
128 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
129 ; GFX9-NEXT: s_endpgm
131 ; GFX10-LABEL: kernel_fpmode_i32:
133 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
134 ; GFX10-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 24)
135 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
136 ; GFX10-NEXT: s_and_b32 s2, 0x87f3ff, s2
137 ; GFX10-NEXT: v_mov_b32_e32 v1, s2
138 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
139 ; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
140 ; GFX10-NEXT: s_endpgm
142 ; GFX11-LABEL: kernel_fpmode_i32:
144 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
145 ; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 24)
146 ; GFX11-NEXT: s_and_b32 s2, 0x87f3ff, s2
147 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
148 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
149 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
150 ; GFX11-NEXT: s_nop 0
151 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
152 ; GFX11-NEXT: s_endpgm
153 %fpmode = call i32 @llvm.get.fpmode.i32()
154 store i32 %fpmode, ptr addrspace(1) %ptr
158 ; TODO: We should be able to reduce the demanded bits and ask for less
160 define i32 @func_fpmode_i32_denormonly() {
161 ; GFX678-LABEL: func_fpmode_i32_denormonly:
163 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164 ; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
165 ; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
166 ; GFX678-NEXT: s_and_b32 s4, s4, 0xf0
167 ; GFX678-NEXT: v_mov_b32_e32 v0, s4
168 ; GFX678-NEXT: s_setpc_b64 s[30:31]
170 ; GFX9-LABEL: func_fpmode_i32_denormonly:
172 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173 ; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
174 ; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
175 ; GFX9-NEXT: s_and_b32 s4, s4, 0xf0
176 ; GFX9-NEXT: v_mov_b32_e32 v0, s4
177 ; GFX9-NEXT: s_setpc_b64 s[30:31]
179 ; GFX10-LABEL: func_fpmode_i32_denormonly:
181 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
182 ; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
183 ; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
184 ; GFX10-NEXT: s_and_b32 s4, s4, 0xf0
185 ; GFX10-NEXT: v_mov_b32_e32 v0, s4
186 ; GFX10-NEXT: s_setpc_b64 s[30:31]
188 ; GFX11-LABEL: func_fpmode_i32_denormonly:
190 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
191 ; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
192 ; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
193 ; GFX11-NEXT: s_and_b32 s0, s0, 0xf0
194 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
195 ; GFX11-NEXT: s_setpc_b64 s[30:31]
196 %fpmode = call i32 @llvm.get.fpmode.i32()
197 %denorm.only = and i32 %fpmode, 240
201 define i32 @func_fpmode_i32_roundonly() {
202 ; GFX678-LABEL: func_fpmode_i32_roundonly:
204 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
205 ; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
206 ; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
207 ; GFX678-NEXT: s_and_b32 s4, s4, 15
208 ; GFX678-NEXT: v_mov_b32_e32 v0, s4
209 ; GFX678-NEXT: s_setpc_b64 s[30:31]
211 ; GFX9-LABEL: func_fpmode_i32_roundonly:
213 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
214 ; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
215 ; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
216 ; GFX9-NEXT: s_and_b32 s4, s4, 15
217 ; GFX9-NEXT: v_mov_b32_e32 v0, s4
218 ; GFX9-NEXT: s_setpc_b64 s[30:31]
220 ; GFX10-LABEL: func_fpmode_i32_roundonly:
222 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
223 ; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
224 ; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
225 ; GFX10-NEXT: s_and_b32 s4, s4, 15
226 ; GFX10-NEXT: v_mov_b32_e32 v0, s4
227 ; GFX10-NEXT: s_setpc_b64 s[30:31]
229 ; GFX11-LABEL: func_fpmode_i32_roundonly:
231 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232 ; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
233 ; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
234 ; GFX11-NEXT: s_and_b32 s0, s0, 15
235 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
236 ; GFX11-NEXT: s_setpc_b64 s[30:31]
237 %fpmode = call i32 @llvm.get.fpmode.i32()
238 %round.only = and i32 %fpmode, 15
242 define i32 @func_fpmode_i32_round_denorm_only() {
243 ; GFX678-LABEL: func_fpmode_i32_round_denorm_only:
245 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
246 ; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
247 ; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
248 ; GFX678-NEXT: s_and_b32 s4, s4, 0xff
249 ; GFX678-NEXT: v_mov_b32_e32 v0, s4
250 ; GFX678-NEXT: s_setpc_b64 s[30:31]
252 ; GFX9-LABEL: func_fpmode_i32_round_denorm_only:
254 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
255 ; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
256 ; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
257 ; GFX9-NEXT: s_and_b32 s4, s4, 0xff
258 ; GFX9-NEXT: v_mov_b32_e32 v0, s4
259 ; GFX9-NEXT: s_setpc_b64 s[30:31]
261 ; GFX10-LABEL: func_fpmode_i32_round_denorm_only:
263 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
264 ; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
265 ; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
266 ; GFX10-NEXT: s_and_b32 s4, s4, 0xff
267 ; GFX10-NEXT: v_mov_b32_e32 v0, s4
268 ; GFX10-NEXT: s_setpc_b64 s[30:31]
270 ; GFX11-LABEL: func_fpmode_i32_round_denorm_only:
272 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273 ; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
274 ; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
275 ; GFX11-NEXT: s_and_b32 s0, s0, 0xff
276 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
277 ; GFX11-NEXT: s_setpc_b64 s[30:31]
278 %fpmode = call i32 @llvm.get.fpmode.i32()
279 %round.denorm.only = and i32 %fpmode, 255
280 ret i32 %round.denorm.only
283 define i32 @func_fpmode_i32_round_denorm_dx10_ieee() {
284 ; GFX678-LABEL: func_fpmode_i32_round_denorm_dx10_ieee:
286 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
287 ; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
288 ; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
289 ; GFX678-NEXT: s_and_b32 s4, s4, 0x3ff
290 ; GFX678-NEXT: v_mov_b32_e32 v0, s4
291 ; GFX678-NEXT: s_setpc_b64 s[30:31]
293 ; GFX9-LABEL: func_fpmode_i32_round_denorm_dx10_ieee:
295 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
296 ; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
297 ; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
298 ; GFX9-NEXT: s_and_b32 s4, s4, 0x3ff
299 ; GFX9-NEXT: v_mov_b32_e32 v0, s4
300 ; GFX9-NEXT: s_setpc_b64 s[30:31]
302 ; GFX10-LABEL: func_fpmode_i32_round_denorm_dx10_ieee:
304 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
305 ; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
306 ; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
307 ; GFX10-NEXT: s_and_b32 s4, s4, 0x3ff
308 ; GFX10-NEXT: v_mov_b32_e32 v0, s4
309 ; GFX10-NEXT: s_setpc_b64 s[30:31]
311 ; GFX11-LABEL: func_fpmode_i32_round_denorm_dx10_ieee:
313 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
314 ; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
315 ; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
316 ; GFX11-NEXT: s_and_b32 s0, s0, 0x3ff
317 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
318 ; GFX11-NEXT: s_setpc_b64 s[30:31]
319 %fpmode = call i32 @llvm.get.fpmode.i32()
320 %core.mode = and i32 %fpmode, 1023
324 define i32 @func_fpmode_i32_excp_en() {
325 ; GFX678-LABEL: func_fpmode_i32_excp_en:
327 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
328 ; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
329 ; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
330 ; GFX678-NEXT: s_and_b32 s4, s4, 0x7f000
331 ; GFX678-NEXT: v_mov_b32_e32 v0, s4
332 ; GFX678-NEXT: s_setpc_b64 s[30:31]
334 ; GFX9-LABEL: func_fpmode_i32_excp_en:
336 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
337 ; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
338 ; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
339 ; GFX9-NEXT: s_and_b32 s4, s4, 0x7f000
340 ; GFX9-NEXT: v_mov_b32_e32 v0, s4
341 ; GFX9-NEXT: s_setpc_b64 s[30:31]
343 ; GFX10-LABEL: func_fpmode_i32_excp_en:
345 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
346 ; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
347 ; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
348 ; GFX10-NEXT: s_and_b32 s4, s4, 0x7f000
349 ; GFX10-NEXT: v_mov_b32_e32 v0, s4
350 ; GFX10-NEXT: s_setpc_b64 s[30:31]
352 ; GFX11-LABEL: func_fpmode_i32_excp_en:
354 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
355 ; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
356 ; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
357 ; GFX11-NEXT: s_and_b32 s0, s0, 0x7f000
358 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
359 ; GFX11-NEXT: s_setpc_b64 s[30:31]
360 %fpmode = call i32 @llvm.get.fpmode.i32()
361 %core.mode = and i32 %fpmode, 520192
365 ; Mask for all bits used on gfx6+
366 define i32 @func_fpmode_i32_environment_gfx6() {
367 ; GFX678-LABEL: func_fpmode_i32_environment_gfx6:
369 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
370 ; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
371 ; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
372 ; GFX678-NEXT: s_and_b32 s4, s4, 0x7f3ff
373 ; GFX678-NEXT: v_mov_b32_e32 v0, s4
374 ; GFX678-NEXT: s_setpc_b64 s[30:31]
376 ; GFX9-LABEL: func_fpmode_i32_environment_gfx6:
378 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
379 ; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
380 ; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
381 ; GFX9-NEXT: s_and_b32 s4, s4, 0x7f3ff
382 ; GFX9-NEXT: v_mov_b32_e32 v0, s4
383 ; GFX9-NEXT: s_setpc_b64 s[30:31]
385 ; GFX10-LABEL: func_fpmode_i32_environment_gfx6:
387 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
388 ; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
389 ; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
390 ; GFX10-NEXT: s_and_b32 s4, s4, 0x7f3ff
391 ; GFX10-NEXT: v_mov_b32_e32 v0, s4
392 ; GFX10-NEXT: s_setpc_b64 s[30:31]
394 ; GFX11-LABEL: func_fpmode_i32_environment_gfx6:
396 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
397 ; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
398 ; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
399 ; GFX11-NEXT: s_and_b32 s0, s0, 0x7f3ff
400 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
401 ; GFX11-NEXT: s_setpc_b64 s[30:31]
402 %fpmode = call i32 @llvm.get.fpmode.i32()
403 %core.mode = and i32 %fpmode, 521215
407 ; Mask for all bits used on gfx9+
408 define i32 @func_fpmode_i32_environment_gfx9() {
409 ; GFX678-LABEL: func_fpmode_i32_environment_gfx9:
411 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
412 ; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
413 ; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
414 ; GFX678-NEXT: s_and_b32 s4, s4, 0x87f3ff
415 ; GFX678-NEXT: v_mov_b32_e32 v0, s4
416 ; GFX678-NEXT: s_setpc_b64 s[30:31]
418 ; GFX9-LABEL: func_fpmode_i32_environment_gfx9:
420 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
421 ; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
422 ; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
423 ; GFX9-NEXT: s_and_b32 s4, s4, 0x87f3ff
424 ; GFX9-NEXT: v_mov_b32_e32 v0, s4
425 ; GFX9-NEXT: s_setpc_b64 s[30:31]
427 ; GFX10-LABEL: func_fpmode_i32_environment_gfx9:
429 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
430 ; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
431 ; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
432 ; GFX10-NEXT: s_and_b32 s4, s4, 0x87f3ff
433 ; GFX10-NEXT: v_mov_b32_e32 v0, s4
434 ; GFX10-NEXT: s_setpc_b64 s[30:31]
436 ; GFX11-LABEL: func_fpmode_i32_environment_gfx9:
438 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
439 ; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
440 ; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
441 ; GFX11-NEXT: s_and_b32 s0, s0, 0x87f3ff
442 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
443 ; GFX11-NEXT: s_setpc_b64 s[30:31]
444 %fpmode = call i32 @llvm.get.fpmode.i32()
445 %core.mode = and i32 %fpmode, 8909823
449 define i32 @func_fpmode_i32_denormf32only() {
450 ; GFX678-LABEL: func_fpmode_i32_denormf32only:
452 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
453 ; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
454 ; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
455 ; GFX678-NEXT: s_and_b32 s4, s4, 48
456 ; GFX678-NEXT: v_mov_b32_e32 v0, s4
457 ; GFX678-NEXT: s_setpc_b64 s[30:31]
459 ; GFX9-LABEL: func_fpmode_i32_denormf32only:
461 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
462 ; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
463 ; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
464 ; GFX9-NEXT: s_and_b32 s4, s4, 48
465 ; GFX9-NEXT: v_mov_b32_e32 v0, s4
466 ; GFX9-NEXT: s_setpc_b64 s[30:31]
468 ; GFX10-LABEL: func_fpmode_i32_denormf32only:
470 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
471 ; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
472 ; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
473 ; GFX10-NEXT: s_and_b32 s4, s4, 48
474 ; GFX10-NEXT: v_mov_b32_e32 v0, s4
475 ; GFX10-NEXT: s_setpc_b64 s[30:31]
477 ; GFX11-LABEL: func_fpmode_i32_denormf32only:
479 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
480 ; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
481 ; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
482 ; GFX11-NEXT: s_and_b32 s0, s0, 48
483 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
484 ; GFX11-NEXT: s_setpc_b64 s[30:31]
485 %fpmode = call i32 @llvm.get.fpmode.i32()
486 %denorm.only = and i32 %fpmode, 48
490 define i32 @func_fpmode_i32_denormf32only_0() {
491 ; GFX678-LABEL: func_fpmode_i32_denormf32only_0:
493 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
494 ; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
495 ; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
496 ; GFX678-NEXT: s_and_b32 s4, s4, 32
497 ; GFX678-NEXT: v_mov_b32_e32 v0, s4
498 ; GFX678-NEXT: s_setpc_b64 s[30:31]
500 ; GFX9-LABEL: func_fpmode_i32_denormf32only_0:
502 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
503 ; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
504 ; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
505 ; GFX9-NEXT: s_and_b32 s4, s4, 32
506 ; GFX9-NEXT: v_mov_b32_e32 v0, s4
507 ; GFX9-NEXT: s_setpc_b64 s[30:31]
509 ; GFX10-LABEL: func_fpmode_i32_denormf32only_0:
511 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
512 ; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
513 ; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
514 ; GFX10-NEXT: s_and_b32 s4, s4, 32
515 ; GFX10-NEXT: v_mov_b32_e32 v0, s4
516 ; GFX10-NEXT: s_setpc_b64 s[30:31]
518 ; GFX11-LABEL: func_fpmode_i32_denormf32only_0:
520 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521 ; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
522 ; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
523 ; GFX11-NEXT: s_and_b32 s0, s0, 32
524 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
525 ; GFX11-NEXT: s_setpc_b64 s[30:31]
526 %fpmode = call i32 @llvm.get.fpmode.i32()
527 %denorm.only = and i32 %fpmode, 32
531 define i32 @func_fpmode_i32_denormf32only_1() {
532 ; GFX678-LABEL: func_fpmode_i32_denormf32only_1:
534 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
535 ; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
536 ; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
537 ; GFX678-NEXT: s_and_b32 s4, s4, 64
538 ; GFX678-NEXT: v_mov_b32_e32 v0, s4
539 ; GFX678-NEXT: s_setpc_b64 s[30:31]
541 ; GFX9-LABEL: func_fpmode_i32_denormf32only_1:
543 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
544 ; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
545 ; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
546 ; GFX9-NEXT: s_and_b32 s4, s4, 64
547 ; GFX9-NEXT: v_mov_b32_e32 v0, s4
548 ; GFX9-NEXT: s_setpc_b64 s[30:31]
550 ; GFX10-LABEL: func_fpmode_i32_denormf32only_1:
552 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
553 ; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
554 ; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
555 ; GFX10-NEXT: s_and_b32 s4, s4, 64
556 ; GFX10-NEXT: v_mov_b32_e32 v0, s4
557 ; GFX10-NEXT: s_setpc_b64 s[30:31]
559 ; GFX11-LABEL: func_fpmode_i32_denormf32only_1:
561 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
562 ; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
563 ; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
564 ; GFX11-NEXT: s_and_b32 s0, s0, 64
565 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
566 ; GFX11-NEXT: s_setpc_b64 s[30:31]
567 %fpmode = call i32 @llvm.get.fpmode.i32()
568 %denorm.only = and i32 %fpmode, 64
572 define i32 @func_fpmode_i32_denormf64f16only() {
573 ; GFX678-LABEL: func_fpmode_i32_denormf64f16only:
575 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
576 ; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
577 ; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
578 ; GFX678-NEXT: s_and_b32 s4, s4, 0xc0
579 ; GFX678-NEXT: v_mov_b32_e32 v0, s4
580 ; GFX678-NEXT: s_setpc_b64 s[30:31]
582 ; GFX9-LABEL: func_fpmode_i32_denormf64f16only:
584 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
585 ; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
586 ; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
587 ; GFX9-NEXT: s_and_b32 s4, s4, 0xc0
588 ; GFX9-NEXT: v_mov_b32_e32 v0, s4
589 ; GFX9-NEXT: s_setpc_b64 s[30:31]
591 ; GFX10-LABEL: func_fpmode_i32_denormf64f16only:
593 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
594 ; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
595 ; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
596 ; GFX10-NEXT: s_and_b32 s4, s4, 0xc0
597 ; GFX10-NEXT: v_mov_b32_e32 v0, s4
598 ; GFX10-NEXT: s_setpc_b64 s[30:31]
600 ; GFX11-LABEL: func_fpmode_i32_denormf64f16only:
602 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
603 ; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
604 ; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
605 ; GFX11-NEXT: s_and_b32 s0, s0, 0xc0
606 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
607 ; GFX11-NEXT: s_setpc_b64 s[30:31]
608 %fpmode = call i32 @llvm.get.fpmode.i32()
609 %denorm.only = and i32 %fpmode, 192
613 define i32 @func_fpmode_i32_dx10_clamp_only() {
614 ; GFX678-LABEL: func_fpmode_i32_dx10_clamp_only:
616 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
617 ; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
618 ; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
619 ; GFX678-NEXT: s_and_b32 s4, s4, 0x100
620 ; GFX678-NEXT: v_mov_b32_e32 v0, s4
621 ; GFX678-NEXT: s_setpc_b64 s[30:31]
623 ; GFX9-LABEL: func_fpmode_i32_dx10_clamp_only:
625 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
626 ; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
627 ; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
628 ; GFX9-NEXT: s_and_b32 s4, s4, 0x100
629 ; GFX9-NEXT: v_mov_b32_e32 v0, s4
630 ; GFX9-NEXT: s_setpc_b64 s[30:31]
632 ; GFX10-LABEL: func_fpmode_i32_dx10_clamp_only:
634 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
635 ; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
636 ; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
637 ; GFX10-NEXT: s_and_b32 s4, s4, 0x100
638 ; GFX10-NEXT: v_mov_b32_e32 v0, s4
639 ; GFX10-NEXT: s_setpc_b64 s[30:31]
641 ; GFX11-LABEL: func_fpmode_i32_dx10_clamp_only:
643 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
644 ; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
645 ; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
646 ; GFX11-NEXT: s_and_b32 s0, s0, 0x100
647 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
648 ; GFX11-NEXT: s_setpc_b64 s[30:31]
649 %fpmode = call i32 @llvm.get.fpmode.i32()
650 %dx10.only = and i32 %fpmode, 256
654 define i32 @func_fpmode_i32_ieee_only() {
655 ; GFX678-LABEL: func_fpmode_i32_ieee_only:
657 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
658 ; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
659 ; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
660 ; GFX678-NEXT: s_and_b32 s4, s4, 0x200
661 ; GFX678-NEXT: v_mov_b32_e32 v0, s4
662 ; GFX678-NEXT: s_setpc_b64 s[30:31]
664 ; GFX9-LABEL: func_fpmode_i32_ieee_only:
666 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
667 ; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
668 ; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
669 ; GFX9-NEXT: s_and_b32 s4, s4, 0x200
670 ; GFX9-NEXT: v_mov_b32_e32 v0, s4
671 ; GFX9-NEXT: s_setpc_b64 s[30:31]
673 ; GFX10-LABEL: func_fpmode_i32_ieee_only:
675 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
676 ; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
677 ; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
678 ; GFX10-NEXT: s_and_b32 s4, s4, 0x200
679 ; GFX10-NEXT: v_mov_b32_e32 v0, s4
680 ; GFX10-NEXT: s_setpc_b64 s[30:31]
682 ; GFX11-LABEL: func_fpmode_i32_ieee_only:
684 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
685 ; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
686 ; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
687 ; GFX11-NEXT: s_and_b32 s0, s0, 0x200
688 ; GFX11-NEXT: v_mov_b32_e32 v0, s0
689 ; GFX11-NEXT: s_setpc_b64 s[30:31]
690 %fpmode = call i32 @llvm.get.fpmode.i32()
691 %ieee.only = and i32 %fpmode, 512
695 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: