1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
4 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX11 %s
5 ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
6 ; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
7 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX11 %s
9 define amdgpu_ps float @buffer_atomic_fadd_f32_offset_rtn(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
10 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offset_rtn
11 ; GFX90A_GFX940: bb.0 (%ir-block.0):
12 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
13 ; GFX90A_GFX940-NEXT: {{ $}}
14 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
15 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
16 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
17 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
18 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
19 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
20 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
21 ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
22 ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]]
23 ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0
25 ; GFX11-LABEL: name: buffer_atomic_fadd_f32_offset_rtn
26 ; GFX11: bb.0 (%ir-block.0):
27 ; GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
29 ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
30 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
31 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
32 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
33 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
34 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
35 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
36 ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
37 ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]]
38 ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0
39 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
43 define amdgpu_ps float @buffer_atomic_fadd_f32_offen_rtn(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
44 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offen_rtn
45 ; GFX90A_GFX940: bb.0 (%ir-block.0):
46 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
47 ; GFX90A_GFX940-NEXT: {{ $}}
48 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
49 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
50 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
51 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
52 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
53 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
54 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
55 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
56 ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
57 ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]]
58 ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0
60 ; GFX11-LABEL: name: buffer_atomic_fadd_f32_offen_rtn
61 ; GFX11: bb.0 (%ir-block.0):
62 ; GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
64 ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
65 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
66 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
67 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
68 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
69 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
70 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
71 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
72 ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
73 ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]]
74 ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0
75 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
79 define amdgpu_ps float @buffer_atomic_fadd_f32_idxen_rtn(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
80 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_idxen_rtn
81 ; GFX90A_GFX940: bb.0 (%ir-block.0):
82 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
83 ; GFX90A_GFX940-NEXT: {{ $}}
84 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
85 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
86 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
87 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
88 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
89 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
90 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
91 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
92 ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
93 ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]]
94 ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0
96 ; GFX11-LABEL: name: buffer_atomic_fadd_f32_idxen_rtn
97 ; GFX11: bb.0 (%ir-block.0):
98 ; GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
100 ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
101 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
102 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
103 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
104 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
105 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
106 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
107 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
108 ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
109 ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]]
110 ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0
111 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
115 define amdgpu_ps float @buffer_atomic_fadd_f32_bothen_rtn(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
116 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_bothen_rtn
117 ; GFX90A_GFX940: bb.0 (%ir-block.0):
118 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
119 ; GFX90A_GFX940-NEXT: {{ $}}
120 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
121 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
122 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
123 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
124 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
125 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
126 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
127 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
128 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
129 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
130 ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
131 ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]]
132 ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0
134 ; GFX11-LABEL: name: buffer_atomic_fadd_f32_bothen_rtn
135 ; GFX11: bb.0 (%ir-block.0):
136 ; GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
138 ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
139 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
140 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
141 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
142 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
143 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
144 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
145 ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
146 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
147 ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
148 ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
149 ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]]
150 ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0
151 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
155 define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offset_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) {
156 ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn
157 ; GFX90A_GFX940: bb.0 (%ir-block.0):
158 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
159 ; GFX90A_GFX940-NEXT: {{ $}}
160 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
161 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
162 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
163 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
164 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
165 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
166 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
167 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
168 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
169 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
170 ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
171 ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
172 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
173 ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
174 ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]]
175 ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0
177 ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn
178 ; GFX11: bb.0 (%ir-block.0):
179 ; GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
181 ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
182 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
183 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
184 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
185 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
186 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
187 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
188 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
189 ; GFX11-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
190 ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
191 ; GFX11-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
192 ; GFX11-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
193 ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
194 ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
195 ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]]
196 ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0
197 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
201 define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
202 ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn
203 ; GFX90A_GFX940: bb.0 (%ir-block.0):
204 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
205 ; GFX90A_GFX940-NEXT: {{ $}}
206 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
207 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
208 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
209 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
210 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
211 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
212 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
213 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
214 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
215 ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
216 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
217 ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
218 ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
219 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
220 ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
221 ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]]
222 ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0
224 ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn
225 ; GFX11: bb.0 (%ir-block.0):
226 ; GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
228 ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
229 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
230 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
231 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
232 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
233 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
234 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
235 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
236 ; GFX11-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
237 ; GFX11-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
238 ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
239 ; GFX11-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
240 ; GFX11-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
241 ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
242 ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
243 ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]]
244 ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0
245 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
249 define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_idxen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
250 ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn
251 ; GFX90A_GFX940: bb.0 (%ir-block.0):
252 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
253 ; GFX90A_GFX940-NEXT: {{ $}}
254 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
255 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
256 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
257 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
258 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
259 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
260 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
261 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
262 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
263 ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
264 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
265 ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
266 ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
267 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
268 ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
269 ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]]
270 ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0
272 ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn
273 ; GFX11: bb.0 (%ir-block.0):
274 ; GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
276 ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
277 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
278 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
279 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
280 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
281 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
282 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
283 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
284 ; GFX11-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
285 ; GFX11-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
286 ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
287 ; GFX11-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
288 ; GFX11-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
289 ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
290 ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
291 ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]]
292 ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0
293 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
297 define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_bothen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
298 ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn
299 ; GFX90A_GFX940: bb.0 (%ir-block.0):
300 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
301 ; GFX90A_GFX940-NEXT: {{ $}}
302 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
303 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
304 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
305 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
306 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
307 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
308 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
309 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
310 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
311 ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
312 ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
313 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
314 ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
315 ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
316 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
317 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
318 ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
319 ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]]
320 ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0
322 ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn
323 ; GFX11: bb.0 (%ir-block.0):
324 ; GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
326 ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
327 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
328 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
329 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
330 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
331 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
332 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
333 ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
334 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
335 ; GFX11-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
336 ; GFX11-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
337 ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
338 ; GFX11-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
339 ; GFX11-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
340 ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
341 ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
342 ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
343 ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]]
344 ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0
345 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
349 declare float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg)
350 declare float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg)
352 declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
353 declare float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg)