1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefix=GFX12 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefix=GFX12 %s
5 define amdgpu_kernel void @flat_last_use_load_0(ptr %in, ptr %out) {
6 ; GFX12-LABEL: flat_last_use_load_0:
7 ; GFX12: ; %bb.0: ; %entry
8 ; GFX12-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
9 ; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
10 ; GFX12-NEXT: s_wait_kmcnt 0x0
11 ; GFX12-NEXT: v_mov_b32_e32 v0, s2
12 ; GFX12-NEXT: v_mov_b32_e32 v1, s3
13 ; GFX12-NEXT: flat_load_b32 v2, v[0:1] th:TH_LOAD_LU
14 ; GFX12-NEXT: v_mov_b32_e32 v0, s0
15 ; GFX12-NEXT: v_mov_b32_e32 v1, s1
16 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
17 ; GFX12-NEXT: flat_store_b32 v[0:1], v2
18 ; GFX12-NEXT: s_endpgm
20 %val = load i32, ptr %in, align 4, !amdgpu.last.use !{}
21 store i32 %val, ptr %out
25 define amdgpu_kernel void @flat_last_use_load_1(ptr %in, ptr %out) {
26 ; GFX12-LABEL: flat_last_use_load_1:
27 ; GFX12: ; %bb.0: ; %entry
28 ; GFX12-NEXT: s_mov_b64 s[0:1], s[4:5]
29 ; GFX12-NEXT: s_wait_alu 0xfffe
30 ; GFX12-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
31 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
32 ; GFX12-NEXT: s_mov_b32 s2, 0x3ff
33 ; GFX12-NEXT: s_wait_alu 0xfffe
34 ; GFX12-NEXT: v_and_b32_e64 v0, v0, s2
35 ; GFX12-NEXT: s_mov_b32 s2, 2
36 ; GFX12-NEXT: s_wait_alu 0xfffe
37 ; GFX12-NEXT: v_lshlrev_b32_e64 v1, s2, v0
38 ; GFX12-NEXT: s_mov_b32 s2, 0
39 ; GFX12-NEXT: s_wait_alu 0xfffe
40 ; GFX12-NEXT: ; implicit-def: $sgpr2
41 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
42 ; GFX12-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
43 ; GFX12-NEXT: v_mov_b32_e32 v2, v0
44 ; GFX12-NEXT: s_wait_kmcnt 0x0
45 ; GFX12-NEXT: s_mov_b32 s3, s4
46 ; GFX12-NEXT: s_wait_alu 0xfffe
47 ; GFX12-NEXT: v_mov_b32_e32 v0, v1
48 ; GFX12-NEXT: s_mov_b32 s2, s5
49 ; GFX12-NEXT: s_wait_alu 0xfffe
50 ; GFX12-NEXT: v_mov_b32_e32 v1, v2
51 ; GFX12-NEXT: v_add_co_u32 v0, s3, s3, v0
52 ; GFX12-NEXT: v_add_co_ci_u32_e64 v2, s2, s2, v1, s3
53 ; GFX12-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
54 ; GFX12-NEXT: v_mov_b32_e32 v1, v2
55 ; GFX12-NEXT: flat_load_b32 v2, v[0:1] th:TH_LOAD_LU
56 ; GFX12-NEXT: v_mov_b32_e32 v0, s0
57 ; GFX12-NEXT: v_mov_b32_e32 v1, s1
58 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
59 ; GFX12-NEXT: flat_store_b32 v[0:1], v2
60 ; GFX12-NEXT: s_endpgm
62 %tid = call i32 @llvm.amdgcn.workitem.id.x()
63 %val.gep = getelementptr inbounds i32, ptr %in, i32 %tid
64 %val = load i32, ptr %val.gep, align 4, !amdgpu.last.use !{}
65 store i32 %val, ptr %out
69 define amdgpu_kernel void @flat_last_use_and_volatile_load(ptr %in, ptr %out) {
70 ; GFX12-LABEL: flat_last_use_and_volatile_load:
71 ; GFX12: ; %bb.0: ; %entry
72 ; GFX12-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
73 ; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
74 ; GFX12-NEXT: s_wait_kmcnt 0x0
75 ; GFX12-NEXT: v_mov_b32_e32 v0, s2
76 ; GFX12-NEXT: v_mov_b32_e32 v1, s3
77 ; GFX12-NEXT: flat_load_b32 v2, v[0:1] th:TH_LOAD_BYPASS scope:SCOPE_SYS
78 ; GFX12-NEXT: s_wait_bvhcnt 0x0
79 ; GFX12-NEXT: s_wait_samplecnt 0x0
80 ; GFX12-NEXT: s_wait_loadcnt 0x0
81 ; GFX12-NEXT: v_mov_b32_e32 v0, s0
82 ; GFX12-NEXT: v_mov_b32_e32 v1, s1
83 ; GFX12-NEXT: s_wait_dscnt 0x0
84 ; GFX12-NEXT: flat_store_b32 v[0:1], v2
85 ; GFX12-NEXT: s_endpgm
87 %val = load volatile i32, ptr %in, align 4, !amdgpu.last.use !{}
88 store i32 %val, ptr %out
92 define amdgpu_kernel void @flat_last_use_and_nontemporal_load(ptr %in, ptr %out) {
93 ; GFX12-LABEL: flat_last_use_and_nontemporal_load:
94 ; GFX12: ; %bb.0: ; %entry
95 ; GFX12-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
96 ; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
97 ; GFX12-NEXT: s_wait_kmcnt 0x0
98 ; GFX12-NEXT: v_mov_b32_e32 v0, s2
99 ; GFX12-NEXT: v_mov_b32_e32 v1, s3
100 ; GFX12-NEXT: flat_load_b32 v2, v[0:1] th:TH_LOAD_LU
101 ; GFX12-NEXT: v_mov_b32_e32 v0, s0
102 ; GFX12-NEXT: v_mov_b32_e32 v1, s1
103 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
104 ; GFX12-NEXT: flat_store_b32 v[0:1], v2
105 ; GFX12-NEXT: s_endpgm
107 %val = load i32, ptr %in, align 4, !amdgpu.last.use !{}, !nontemporal !0
108 store i32 %val, ptr %out
113 declare i32 @llvm.amdgcn.workitem.id.x()