1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
5 define amdgpu_ps void @buffer_atomic_fadd_f64_offset_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
6 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_offset_no_rtn
7 ; GFX90A_GFX940: bb.0 (%ir-block.0):
8 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
9 ; GFX90A_GFX940-NEXT: {{ $}}
10 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
11 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
12 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
13 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
14 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
15 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
16 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
17 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
18 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
19 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
20 ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY7]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
21 ; GFX90A_GFX940-NEXT: S_ENDPGM 0
22 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
26 define amdgpu_ps void @buffer_atomic_fadd_f64_offen_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
27 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_offen_no_rtn
28 ; GFX90A_GFX940: bb.0 (%ir-block.0):
29 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
30 ; GFX90A_GFX940-NEXT: {{ $}}
31 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
32 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
33 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
34 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
35 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
36 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
37 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
38 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
39 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
40 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
41 ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
42 ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
43 ; GFX90A_GFX940-NEXT: S_ENDPGM 0
44 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
48 define amdgpu_ps void @buffer_atomic_fadd_f64_idxen_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
49 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_idxen_no_rtn
50 ; GFX90A_GFX940: bb.0 (%ir-block.0):
51 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
52 ; GFX90A_GFX940-NEXT: {{ $}}
53 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
54 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
55 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
56 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
57 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
58 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
59 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
60 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
61 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
62 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
63 ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
64 ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
65 ; GFX90A_GFX940-NEXT: S_ENDPGM 0
66 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
70 define amdgpu_ps void @buffer_atomic_fadd_f64_bothen_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
71 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_bothen_no_rtn
72 ; GFX90A_GFX940: bb.0 (%ir-block.0):
73 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
74 ; GFX90A_GFX940-NEXT: {{ $}}
75 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
76 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
77 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
78 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
79 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
80 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
81 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
82 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
83 ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
84 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
85 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
86 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
87 ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
88 ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY9]], killed [[REG_SEQUENCE2]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
89 ; GFX90A_GFX940-NEXT: S_ENDPGM 0
90 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
94 define amdgpu_ps double @buffer_atomic_fadd_f64_offset_rtn(double %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
95 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_offset_rtn
96 ; GFX90A_GFX940: bb.0 (%ir-block.0):
97 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
98 ; GFX90A_GFX940-NEXT: {{ $}}
99 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
100 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
101 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
102 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
103 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
104 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
105 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
106 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
107 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
108 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
109 ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY7]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
110 ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0
111 ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1
112 ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY8]]
113 ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY9]]
114 ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
115 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
119 define amdgpu_ps double @buffer_atomic_fadd_f64_offen_rtn(double %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
120 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_offen_rtn
121 ; GFX90A_GFX940: bb.0 (%ir-block.0):
122 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
123 ; GFX90A_GFX940-NEXT: {{ $}}
124 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
125 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
126 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
127 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
128 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
129 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
130 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
131 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
132 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
133 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
134 ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
135 ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
136 ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0
137 ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1
138 ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY9]]
139 ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY10]]
140 ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
141 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
145 define amdgpu_ps double @buffer_atomic_fadd_f64_idxen_rtn(double %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
146 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_idxen_rtn
147 ; GFX90A_GFX940: bb.0 (%ir-block.0):
148 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
149 ; GFX90A_GFX940-NEXT: {{ $}}
150 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
151 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
152 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
153 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
154 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
155 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
156 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
157 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
158 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
159 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
160 ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
161 ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
162 ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0
163 ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1
164 ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY9]]
165 ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY10]]
166 ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
167 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
171 define amdgpu_ps double @buffer_atomic_fadd_f64_bothen_rtn(double %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
172 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_bothen_rtn
173 ; GFX90A_GFX940: bb.0 (%ir-block.0):
174 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
175 ; GFX90A_GFX940-NEXT: {{ $}}
176 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
177 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
178 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
179 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
180 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
181 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
182 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
183 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
184 ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
185 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
186 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
187 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
188 ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
189 ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY9]], killed [[REG_SEQUENCE2]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
190 ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0
191 ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1
192 ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY10]]
193 ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY11]]
194 ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
195 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
199 define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_offset_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) {
200 ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_no_rtn
201 ; GFX90A_GFX940: bb.0 (%ir-block.0):
202 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
203 ; GFX90A_GFX940-NEXT: {{ $}}
204 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
205 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
206 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
207 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
208 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
209 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
210 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
211 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
212 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
213 ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
214 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
215 ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
216 ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
217 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
218 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
219 ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
220 ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
221 ; GFX90A_GFX940-NEXT: S_ENDPGM 0
222 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
226 define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_offen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
227 ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_no_rtn
228 ; GFX90A_GFX940: bb.0 (%ir-block.0):
229 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
230 ; GFX90A_GFX940-NEXT: {{ $}}
231 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
232 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
233 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
234 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
235 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
236 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
237 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
238 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
239 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
240 ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
241 ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
242 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
243 ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
244 ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
245 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
246 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
247 ; GFX90A_GFX940-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
248 ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
249 ; GFX90A_GFX940-NEXT: S_ENDPGM 0
250 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
254 define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_idxen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
255 ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_no_rtn
256 ; GFX90A_GFX940: bb.0 (%ir-block.0):
257 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
258 ; GFX90A_GFX940-NEXT: {{ $}}
259 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
260 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
261 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
262 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
263 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
264 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
265 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
266 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
267 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
268 ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
269 ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
270 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
271 ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
272 ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
273 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
274 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
275 ; GFX90A_GFX940-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
276 ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
277 ; GFX90A_GFX940-NEXT: S_ENDPGM 0
278 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
282 define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_bothen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
283 ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_no_rtn
284 ; GFX90A_GFX940: bb.0 (%ir-block.0):
285 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
286 ; GFX90A_GFX940-NEXT: {{ $}}
287 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
288 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
289 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
290 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
291 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
292 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
293 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
294 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
295 ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
296 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
297 ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
298 ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
299 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
300 ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
301 ; GFX90A_GFX940-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
302 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3
303 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
304 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
305 ; GFX90A_GFX940-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
306 ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
307 ; GFX90A_GFX940-NEXT: S_ENDPGM 0
308 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
312 define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_offset_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) {
313 ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_rtn
314 ; GFX90A_GFX940: bb.0 (%ir-block.0):
315 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
316 ; GFX90A_GFX940-NEXT: {{ $}}
317 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
318 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
319 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
320 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
321 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
322 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
323 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
324 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
325 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
326 ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
327 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
328 ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
329 ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
330 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
331 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
332 ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
333 ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
334 ; GFX90A_GFX940-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0
335 ; GFX90A_GFX940-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1
336 ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY12]]
337 ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY13]]
338 ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
339 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
343 define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_offen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
344 ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_rtn
345 ; GFX90A_GFX940: bb.0 (%ir-block.0):
346 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
347 ; GFX90A_GFX940-NEXT: {{ $}}
348 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
349 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
350 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
351 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
352 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
353 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
354 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
355 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
356 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
357 ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
358 ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
359 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
360 ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
361 ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
362 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
363 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
364 ; GFX90A_GFX940-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
365 ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
366 ; GFX90A_GFX940-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0
367 ; GFX90A_GFX940-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1
368 ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY13]]
369 ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY14]]
370 ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
371 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
375 define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_idxen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
376 ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_rtn
377 ; GFX90A_GFX940: bb.0 (%ir-block.0):
378 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
379 ; GFX90A_GFX940-NEXT: {{ $}}
380 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
381 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
382 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
383 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
384 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
385 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
386 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
387 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
388 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
389 ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
390 ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
391 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
392 ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
393 ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
394 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
395 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
396 ; GFX90A_GFX940-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
397 ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
398 ; GFX90A_GFX940-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0
399 ; GFX90A_GFX940-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1
400 ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY13]]
401 ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY14]]
402 ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
403 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
407 define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_bothen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
408 ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_rtn
409 ; GFX90A_GFX940: bb.0 (%ir-block.0):
410 ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
411 ; GFX90A_GFX940-NEXT: {{ $}}
412 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
413 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
414 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
415 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
416 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
417 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
418 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
419 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
420 ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
421 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
422 ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
423 ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
424 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
425 ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
426 ; GFX90A_GFX940-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
427 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3
428 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
429 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
430 ; GFX90A_GFX940-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
431 ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
432 ; GFX90A_GFX940-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0
433 ; GFX90A_GFX940-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1
434 ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY14]]
435 ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY15]]
436 ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
437 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
441 declare double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32 immarg)
442 declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
444 declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32 immarg)
445 declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg)