1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908_GFX11 %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
4 ; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
5 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908_GFX11 %s
7 define amdgpu_ps void @buffer_atomic_fadd_f32_offset_no_rtn(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
8 ; GFX908_GFX11-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn
9 ; GFX908_GFX11: bb.0 (%ir-block.0):
10 ; GFX908_GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
11 ; GFX908_GFX11-NEXT: {{ $}}
12 ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
13 ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
14 ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
15 ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
16 ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
17 ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
18 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
19 ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
20 ; GFX908_GFX11-NEXT: S_ENDPGM 0
21 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn
22 ; GFX90A_GFX940: bb.0 (%ir-block.0):
23 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
24 ; GFX90A_GFX940-NEXT: {{ $}}
25 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
26 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
27 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
28 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
29 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
30 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
31 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
32 ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
33 ; GFX90A_GFX940-NEXT: S_ENDPGM 0
34 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
38 define amdgpu_ps void @buffer_atomic_fadd_f32_offen_no_rtn(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
39 ; GFX908_GFX11-LABEL: name: buffer_atomic_fadd_f32_offen_no_rtn
40 ; GFX908_GFX11: bb.0 (%ir-block.0):
41 ; GFX908_GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
42 ; GFX908_GFX11-NEXT: {{ $}}
43 ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
44 ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
45 ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
46 ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
47 ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
48 ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
49 ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
50 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
51 ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
52 ; GFX908_GFX11-NEXT: S_ENDPGM 0
53 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offen_no_rtn
54 ; GFX90A_GFX940: bb.0 (%ir-block.0):
55 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
56 ; GFX90A_GFX940-NEXT: {{ $}}
57 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
58 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
59 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
60 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
61 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
62 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
63 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
64 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
65 ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
66 ; GFX90A_GFX940-NEXT: S_ENDPGM 0
67 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
71 define amdgpu_ps void @buffer_atomic_fadd_f32_idxen_no_rtn(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
72 ; GFX908_GFX11-LABEL: name: buffer_atomic_fadd_f32_idxen_no_rtn
73 ; GFX908_GFX11: bb.0 (%ir-block.0):
74 ; GFX908_GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
75 ; GFX908_GFX11-NEXT: {{ $}}
76 ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
77 ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
78 ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
79 ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
80 ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
81 ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
82 ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
83 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
84 ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
85 ; GFX908_GFX11-NEXT: S_ENDPGM 0
86 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_idxen_no_rtn
87 ; GFX90A_GFX940: bb.0 (%ir-block.0):
88 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
89 ; GFX90A_GFX940-NEXT: {{ $}}
90 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
91 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
92 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
93 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
94 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
95 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
96 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
97 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
98 ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
99 ; GFX90A_GFX940-NEXT: S_ENDPGM 0
100 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
104 define amdgpu_ps void @buffer_atomic_fadd_f32_bothen_no_rtn(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
105 ; GFX908_GFX11-LABEL: name: buffer_atomic_fadd_f32_bothen_no_rtn
106 ; GFX908_GFX11: bb.0 (%ir-block.0):
107 ; GFX908_GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
108 ; GFX908_GFX11-NEXT: {{ $}}
109 ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
110 ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
111 ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
112 ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
113 ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
114 ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
115 ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
116 ; GFX908_GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
117 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
118 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
119 ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
120 ; GFX908_GFX11-NEXT: S_ENDPGM 0
121 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_bothen_no_rtn
122 ; GFX90A_GFX940: bb.0 (%ir-block.0):
123 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
124 ; GFX90A_GFX940-NEXT: {{ $}}
125 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
126 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
127 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
128 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
129 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
130 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
131 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
132 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
133 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
134 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
135 ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
136 ; GFX90A_GFX940-NEXT: S_ENDPGM 0
137 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
141 define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offset_no_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) {
142 ; GFX908_GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn
143 ; GFX908_GFX11: bb.0 (%ir-block.0):
144 ; GFX908_GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
145 ; GFX908_GFX11-NEXT: {{ $}}
146 ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
147 ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
148 ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
149 ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
150 ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
151 ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
152 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
153 ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
154 ; GFX908_GFX11-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
155 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
156 ; GFX908_GFX11-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
157 ; GFX908_GFX11-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
158 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
159 ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
160 ; GFX908_GFX11-NEXT: S_ENDPGM 0
161 ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn
162 ; GFX90A_GFX940: bb.0 (%ir-block.0):
163 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
164 ; GFX90A_GFX940-NEXT: {{ $}}
165 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
166 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
167 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
168 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
169 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
170 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
171 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
172 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
173 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
174 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
175 ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
176 ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
177 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
178 ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
179 ; GFX90A_GFX940-NEXT: S_ENDPGM 0
180 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
184 define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offen_no_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
185 ; GFX908_GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn
186 ; GFX908_GFX11: bb.0 (%ir-block.0):
187 ; GFX908_GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
188 ; GFX908_GFX11-NEXT: {{ $}}
189 ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
190 ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
191 ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
192 ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
193 ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
194 ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
195 ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
196 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
197 ; GFX908_GFX11-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
198 ; GFX908_GFX11-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
199 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
200 ; GFX908_GFX11-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
201 ; GFX908_GFX11-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
202 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
203 ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
204 ; GFX908_GFX11-NEXT: S_ENDPGM 0
205 ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn
206 ; GFX90A_GFX940: bb.0 (%ir-block.0):
207 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
208 ; GFX90A_GFX940-NEXT: {{ $}}
209 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
210 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
211 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
212 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
213 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
214 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
215 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
216 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
217 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
218 ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
219 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
220 ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
221 ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
222 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
223 ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
224 ; GFX90A_GFX940-NEXT: S_ENDPGM 0
225 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
229 define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_idxen_no_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
230 ; GFX908_GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn
231 ; GFX908_GFX11: bb.0 (%ir-block.0):
232 ; GFX908_GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
233 ; GFX908_GFX11-NEXT: {{ $}}
234 ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
235 ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
236 ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
237 ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
238 ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
239 ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
240 ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
241 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
242 ; GFX908_GFX11-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
243 ; GFX908_GFX11-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
244 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
245 ; GFX908_GFX11-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
246 ; GFX908_GFX11-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
247 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
248 ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
249 ; GFX908_GFX11-NEXT: S_ENDPGM 0
250 ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn
251 ; GFX90A_GFX940: bb.0 (%ir-block.0):
252 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
253 ; GFX90A_GFX940-NEXT: {{ $}}
254 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
255 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
256 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
257 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
258 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
259 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
260 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
261 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
262 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
263 ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
264 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
265 ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
266 ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
267 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
268 ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
269 ; GFX90A_GFX940-NEXT: S_ENDPGM 0
270 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
274 define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_bothen_no_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
275 ; GFX908_GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn
276 ; GFX908_GFX11: bb.0 (%ir-block.0):
277 ; GFX908_GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
278 ; GFX908_GFX11-NEXT: {{ $}}
279 ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
280 ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
281 ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
282 ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
283 ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
284 ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
285 ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
286 ; GFX908_GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
287 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
288 ; GFX908_GFX11-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
289 ; GFX908_GFX11-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
290 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
291 ; GFX908_GFX11-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
292 ; GFX908_GFX11-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
293 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
294 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
295 ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
296 ; GFX908_GFX11-NEXT: S_ENDPGM 0
297 ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn
298 ; GFX90A_GFX940: bb.0 (%ir-block.0):
299 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
300 ; GFX90A_GFX940-NEXT: {{ $}}
301 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
302 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
303 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
304 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
305 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
306 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
307 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
308 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
309 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
310 ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
311 ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
312 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
313 ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
314 ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
315 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
316 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
317 ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
318 ; GFX90A_GFX940-NEXT: S_ENDPGM 0
319 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
323 declare float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg)
324 declare float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg)
326 declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
327 declare float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg)