1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
3 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
4 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
6 define amdgpu_ps float @buffer_atomic_fadd_f32_offset_rtn(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
7 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offset_rtn
8 ; GFX90A_GFX940: bb.1 (%ir-block.0):
9 ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
10 ; GFX90A_GFX940-NEXT: {{ $}}
11 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
12 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
13 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
14 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
15 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
16 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
17 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
18 ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
19 ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]]
20 ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
21 ; GFX11-LABEL: name: buffer_atomic_fadd_f32_offset_rtn
22 ; GFX11: bb.1 (%ir-block.0):
23 ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
25 ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
26 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
27 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
28 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
29 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
30 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
31 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
32 ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
33 ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]]
34 ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
35 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
39 define amdgpu_ps float @buffer_atomic_fadd_f32_offen_rtn(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
40 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offen_rtn
41 ; GFX90A_GFX940: bb.1 (%ir-block.0):
42 ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
43 ; GFX90A_GFX940-NEXT: {{ $}}
44 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
45 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
46 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
47 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
48 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
49 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
50 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
51 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
52 ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
53 ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]]
54 ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
55 ; GFX11-LABEL: name: buffer_atomic_fadd_f32_offen_rtn
56 ; GFX11: bb.1 (%ir-block.0):
57 ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
59 ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
60 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
61 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
62 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
63 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
64 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
65 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
66 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
67 ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
68 ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]]
69 ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
70 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
74 define amdgpu_ps float @buffer_atomic_fadd_f32_idxen_rtn(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
75 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_idxen_rtn
76 ; GFX90A_GFX940: bb.1 (%ir-block.0):
77 ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
78 ; GFX90A_GFX940-NEXT: {{ $}}
79 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
80 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
81 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
82 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
83 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
84 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
85 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
86 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
87 ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
88 ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]]
89 ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
90 ; GFX11-LABEL: name: buffer_atomic_fadd_f32_idxen_rtn
91 ; GFX11: bb.1 (%ir-block.0):
92 ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
94 ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
95 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
96 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
97 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
98 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
99 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
100 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
101 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
102 ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
103 ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]]
104 ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
105 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
109 define amdgpu_ps float @buffer_atomic_fadd_f32_bothen_rtn(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
110 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_bothen_rtn
111 ; GFX90A_GFX940: bb.1 (%ir-block.0):
112 ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
113 ; GFX90A_GFX940-NEXT: {{ $}}
114 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
115 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
116 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
117 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
118 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
119 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
120 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
121 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
122 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
123 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
124 ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
125 ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]]
126 ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
127 ; GFX11-LABEL: name: buffer_atomic_fadd_f32_bothen_rtn
128 ; GFX11: bb.1 (%ir-block.0):
129 ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
131 ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
132 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
133 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
134 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
135 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
136 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
137 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
138 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
139 ; GFX11-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
140 ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
141 ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
142 ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]]
143 ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
144 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
148 define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offset_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) {
149 ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn
150 ; GFX90A_GFX940: bb.1 (%ir-block.0):
151 ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
152 ; GFX90A_GFX940-NEXT: {{ $}}
153 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
154 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
155 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
156 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
157 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
158 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
159 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
160 ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
161 ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]]
162 ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
163 ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn
164 ; GFX11: bb.1 (%ir-block.0):
165 ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
167 ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
168 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
169 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
170 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
171 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
172 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
173 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
174 ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
175 ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]]
176 ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
177 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
181 define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
182 ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn
183 ; GFX90A_GFX940: bb.1 (%ir-block.0):
184 ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
185 ; GFX90A_GFX940-NEXT: {{ $}}
186 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
187 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
188 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
189 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
190 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
191 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
192 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
193 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
194 ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
195 ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]]
196 ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
197 ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn
198 ; GFX11: bb.1 (%ir-block.0):
199 ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
201 ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
202 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
203 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
204 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
205 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
206 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
207 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
208 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
209 ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
210 ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]]
211 ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
212 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
216 define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_idxen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
217 ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn
218 ; GFX90A_GFX940: bb.1 (%ir-block.0):
219 ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
220 ; GFX90A_GFX940-NEXT: {{ $}}
221 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
222 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
223 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
224 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
225 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
226 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
227 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
228 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
229 ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
230 ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]]
231 ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
232 ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn
233 ; GFX11: bb.1 (%ir-block.0):
234 ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
236 ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
237 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
238 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
239 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
240 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
241 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
242 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
243 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
244 ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
245 ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]]
246 ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
247 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
251 define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_bothen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
252 ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn
253 ; GFX90A_GFX940: bb.1 (%ir-block.0):
254 ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
255 ; GFX90A_GFX940-NEXT: {{ $}}
256 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
257 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
258 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
259 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
260 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
261 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
262 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
263 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
264 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
265 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
266 ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
267 ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]]
268 ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
269 ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn
270 ; GFX11: bb.1 (%ir-block.0):
271 ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
273 ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
274 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
275 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
276 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
277 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
278 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
279 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
280 ; GFX11-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
281 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
282 ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
283 ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
284 ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]]
285 ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
286 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
290 declare float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg)
291 declare float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg)
293 declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
294 declare float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg)