1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 < %s | FileCheck -check-prefix=GFX908 %s
3 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
4 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s
5 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
7 define void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 {
8 ; GFX908-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
10 ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; GFX908-NEXT: buffer_atomic_add_f32 v0, v1, s[16:19], s20 offen
12 ; GFX908-NEXT: s_waitcnt vmcnt(0)
13 ; GFX908-NEXT: s_setpc_b64 s[30:31]
15 ; GFX90A-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
17 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18 ; GFX90A-NEXT: buffer_atomic_add_f32 v0, v1, s[16:19], s20 offen scc
19 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
20 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
22 ; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
24 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25 ; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s16 offen sc1
26 ; GFX940-NEXT: s_waitcnt vmcnt(0)
27 ; GFX940-NEXT: s_setpc_b64 s[30:31]
29 ; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
31 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
32 ; GFX12-NEXT: s_wait_expcnt 0x0
33 ; GFX12-NEXT: s_wait_samplecnt 0x0
34 ; GFX12-NEXT: s_wait_bvhcnt 0x0
35 ; GFX12-NEXT: s_wait_kmcnt 0x0
36 ; GFX12-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s16 offen scope:SCOPE_SYS
37 ; GFX12-NEXT: s_setpc_b64 s[30:31]
38 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 24)
42 define void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) #0 {
43 ; GFX908-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
45 ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46 ; GFX908-NEXT: buffer_atomic_add_f32 v0, off, s[16:19], s20
47 ; GFX908-NEXT: s_waitcnt vmcnt(0)
48 ; GFX908-NEXT: s_setpc_b64 s[30:31]
50 ; GFX90A-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
52 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53 ; GFX90A-NEXT: buffer_atomic_add_f32 v0, off, s[16:19], s20
54 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
55 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
57 ; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
59 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60 ; GFX940-NEXT: buffer_atomic_add_f32 v0, off, s[0:3], s16
61 ; GFX940-NEXT: s_waitcnt vmcnt(0)
62 ; GFX940-NEXT: s_setpc_b64 s[30:31]
64 ; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
66 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
67 ; GFX12-NEXT: s_wait_expcnt 0x0
68 ; GFX12-NEXT: s_wait_samplecnt 0x0
69 ; GFX12-NEXT: s_wait_bvhcnt 0x0
70 ; GFX12-NEXT: s_wait_kmcnt 0x0
71 ; GFX12-NEXT: buffer_atomic_add_f32 v0, off, s[0:3], s16
72 ; GFX12-NEXT: s_setpc_b64 s[30:31]
73 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
77 define void @raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 {
78 ; GFX908-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
80 ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
81 ; GFX908-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[16:19], s20 offen
82 ; GFX908-NEXT: s_waitcnt vmcnt(0)
83 ; GFX908-NEXT: s_setpc_b64 s[30:31]
85 ; GFX90A-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
87 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88 ; GFX90A-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[16:19], s20 offen
89 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
90 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
92 ; GFX940-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
94 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95 ; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s16 offen
96 ; GFX940-NEXT: s_waitcnt vmcnt(0)
97 ; GFX940-NEXT: s_setpc_b64 s[30:31]
99 ; GFX12-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
101 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
102 ; GFX12-NEXT: s_wait_expcnt 0x0
103 ; GFX12-NEXT: s_wait_samplecnt 0x0
104 ; GFX12-NEXT: s_wait_bvhcnt 0x0
105 ; GFX12-NEXT: s_wait_kmcnt 0x0
106 ; GFX12-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s16 offen
107 ; GFX12-NEXT: s_setpc_b64 s[30:31]
108 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
112 define void @raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 {
113 ; GFX908-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
115 ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116 ; GFX908-NEXT: buffer_atomic_pk_add_f16 v0, off, s[16:19], s20 offset:92
117 ; GFX908-NEXT: s_waitcnt vmcnt(0)
118 ; GFX908-NEXT: s_setpc_b64 s[30:31]
120 ; GFX90A-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
122 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123 ; GFX90A-NEXT: buffer_atomic_pk_add_f16 v0, off, s[16:19], s20 offset:92
124 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
125 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
127 ; GFX940-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
129 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130 ; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s16 offset:92
131 ; GFX940-NEXT: s_waitcnt vmcnt(0)
132 ; GFX940-NEXT: s_setpc_b64 s[30:31]
134 ; GFX12-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
136 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
137 ; GFX12-NEXT: s_wait_expcnt 0x0
138 ; GFX12-NEXT: s_wait_samplecnt 0x0
139 ; GFX12-NEXT: s_wait_bvhcnt 0x0
140 ; GFX12-NEXT: s_wait_kmcnt 0x0
141 ; GFX12-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s16 offset:92
142 ; GFX12-NEXT: s_setpc_b64 s[30:31]
143 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 92, i32 %soffset, i32 0)
147 define void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 {
148 ; GFX908-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
150 ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151 ; GFX908-NEXT: buffer_atomic_add_f32 v0, v1, s[16:19], s20 offen slc
152 ; GFX908-NEXT: s_waitcnt vmcnt(0)
153 ; GFX908-NEXT: s_setpc_b64 s[30:31]
155 ; GFX90A-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
157 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
158 ; GFX90A-NEXT: buffer_atomic_add_f32 v0, v1, s[16:19], s20 offen slc
159 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
160 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
162 ; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
164 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165 ; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s16 offen nt
166 ; GFX940-NEXT: s_waitcnt vmcnt(0)
167 ; GFX940-NEXT: s_setpc_b64 s[30:31]
169 ; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
171 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
172 ; GFX12-NEXT: s_wait_expcnt 0x0
173 ; GFX12-NEXT: s_wait_samplecnt 0x0
174 ; GFX12-NEXT: s_wait_bvhcnt 0x0
175 ; GFX12-NEXT: s_wait_kmcnt 0x0
176 ; GFX12-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s16 offen th:TH_ATOMIC_NT
177 ; GFX12-NEXT: s_setpc_b64 s[30:31]
178 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 2)
182 declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
183 declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg)
185 attributes #0 = { nounwind }