1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefixes=GFX90A,GFX90A_ITERATIVE %s
3 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefixes=GFX90A,GFX90A_DPP %s
4 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefixes=GFX940,GFX940_ITERATIVE %s
5 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefixes=GFX940,GFX940_DPP %s
7 define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_intrinsic(ptr addrspace(1) %ptr, double %data) {
8 ; GFX90A-LABEL: name: global_atomic_fadd_f64_no_rtn_intrinsic
9 ; GFX90A: bb.1 (%ir-block.0):
10 ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
12 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
13 ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
14 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
15 ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
16 ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
17 ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
18 ; GFX90A-NEXT: GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
19 ; GFX90A-NEXT: S_ENDPGM 0
21 ; GFX940-LABEL: name: global_atomic_fadd_f64_no_rtn_intrinsic
22 ; GFX940: bb.1 (%ir-block.0):
23 ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
25 ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
26 ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
27 ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
28 ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
29 ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
30 ; GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
31 ; GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
32 ; GFX940-NEXT: S_ENDPGM 0
33 %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
37 define amdgpu_ps double @global_atomic_fadd_f64_rtn_intrinsic(ptr addrspace(1) %ptr, double %data) {
38 ; GFX90A-LABEL: name: global_atomic_fadd_f64_rtn_intrinsic
39 ; GFX90A: bb.1 (%ir-block.0):
40 ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
42 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
43 ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
44 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
45 ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
46 ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
47 ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
48 ; GFX90A-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
49 ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0
50 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1
51 ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
52 ; GFX90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
53 ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
54 ; GFX90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
55 ; GFX90A-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
57 ; GFX940-LABEL: name: global_atomic_fadd_f64_rtn_intrinsic
58 ; GFX940: bb.1 (%ir-block.0):
59 ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
61 ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
62 ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
63 ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
64 ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
65 ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
66 ; GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
67 ; GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
68 ; GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0
69 ; GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1
70 ; GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
71 ; GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
72 ; GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
73 ; GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
74 ; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
75 %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
79 define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_intrinsic(ptr addrspace(1) inreg %ptr, double %data) {
80 ; GFX90A-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_intrinsic
81 ; GFX90A: bb.1 (%ir-block.0):
82 ; GFX90A-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
84 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
85 ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
86 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
87 ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
88 ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
89 ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
90 ; GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
91 ; GFX90A-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
92 ; GFX90A-NEXT: S_ENDPGM 0
94 ; GFX940-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_intrinsic
95 ; GFX940: bb.1 (%ir-block.0):
96 ; GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
98 ; GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
99 ; GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
100 ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
101 ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
102 ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
103 ; GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
104 ; GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
105 ; GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
106 ; GFX940-NEXT: S_ENDPGM 0
107 %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
111 define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_intrinsic(ptr addrspace(1) inreg %ptr, double %data) {
112 ; GFX90A-LABEL: name: global_atomic_fadd_f64_saddr_rtn_intrinsic
113 ; GFX90A: bb.1 (%ir-block.0):
114 ; GFX90A-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
115 ; GFX90A-NEXT: {{ $}}
116 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
117 ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
118 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
119 ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
120 ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
121 ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
122 ; GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
123 ; GFX90A-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
124 ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0
125 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1
126 ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
127 ; GFX90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
128 ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
129 ; GFX90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
130 ; GFX90A-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
132 ; GFX940-LABEL: name: global_atomic_fadd_f64_saddr_rtn_intrinsic
133 ; GFX940: bb.1 (%ir-block.0):
134 ; GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
135 ; GFX940-NEXT: {{ $}}
136 ; GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
137 ; GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
138 ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
139 ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
140 ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
141 ; GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
142 ; GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
143 ; GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
144 ; GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0
145 ; GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1
146 ; GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
147 ; GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
148 ; GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
149 ; GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
150 ; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
151 %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
155 define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_flat_intrinsic(ptr addrspace(1) %ptr, double %data) {
156 ; GFX90A-LABEL: name: global_atomic_fadd_f64_no_rtn_flat_intrinsic
157 ; GFX90A: bb.1 (%ir-block.0):
158 ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
159 ; GFX90A-NEXT: {{ $}}
160 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
161 ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
162 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
163 ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
164 ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
165 ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
166 ; GFX90A-NEXT: GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
167 ; GFX90A-NEXT: S_ENDPGM 0
169 ; GFX940-LABEL: name: global_atomic_fadd_f64_no_rtn_flat_intrinsic
170 ; GFX940: bb.1 (%ir-block.0):
171 ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
172 ; GFX940-NEXT: {{ $}}
173 ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
174 ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
175 ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
176 ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
177 ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
178 ; GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
179 ; GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
180 ; GFX940-NEXT: S_ENDPGM 0
181 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
185 define amdgpu_ps double @global_atomic_fadd_f64_rtn_flat_intrinsic(ptr addrspace(1) %ptr, double %data) {
186 ; GFX90A-LABEL: name: global_atomic_fadd_f64_rtn_flat_intrinsic
187 ; GFX90A: bb.1 (%ir-block.0):
188 ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
189 ; GFX90A-NEXT: {{ $}}
190 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
191 ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
192 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
193 ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
194 ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
195 ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
196 ; GFX90A-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
197 ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0
198 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1
199 ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
200 ; GFX90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
201 ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
202 ; GFX90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
203 ; GFX90A-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
205 ; GFX940-LABEL: name: global_atomic_fadd_f64_rtn_flat_intrinsic
206 ; GFX940: bb.1 (%ir-block.0):
207 ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
208 ; GFX940-NEXT: {{ $}}
209 ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
210 ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
211 ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
212 ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
213 ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
214 ; GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
215 ; GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
216 ; GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0
217 ; GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1
218 ; GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
219 ; GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
220 ; GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
221 ; GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
222 ; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
223 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
227 define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_flat_intrinsic(ptr addrspace(1) inreg %ptr, double %data) {
228 ; GFX90A-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_flat_intrinsic
229 ; GFX90A: bb.1 (%ir-block.0):
230 ; GFX90A-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
231 ; GFX90A-NEXT: {{ $}}
232 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
233 ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
234 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
235 ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
236 ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
237 ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
238 ; GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
239 ; GFX90A-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
240 ; GFX90A-NEXT: S_ENDPGM 0
242 ; GFX940-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_flat_intrinsic
243 ; GFX940: bb.1 (%ir-block.0):
244 ; GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
245 ; GFX940-NEXT: {{ $}}
246 ; GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
247 ; GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
248 ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
249 ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
250 ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
251 ; GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
252 ; GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
253 ; GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
254 ; GFX940-NEXT: S_ENDPGM 0
255 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
259 define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_flat_intrinsic(ptr addrspace(1) inreg %ptr, double %data) {
260 ; GFX90A-LABEL: name: global_atomic_fadd_f64_saddr_rtn_flat_intrinsic
261 ; GFX90A: bb.1 (%ir-block.0):
262 ; GFX90A-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
263 ; GFX90A-NEXT: {{ $}}
264 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
265 ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
266 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
267 ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
268 ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
269 ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
270 ; GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
271 ; GFX90A-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
272 ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0
273 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1
274 ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
275 ; GFX90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
276 ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
277 ; GFX90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
278 ; GFX90A-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
280 ; GFX940-LABEL: name: global_atomic_fadd_f64_saddr_rtn_flat_intrinsic
281 ; GFX940: bb.1 (%ir-block.0):
282 ; GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
283 ; GFX940-NEXT: {{ $}}
284 ; GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
285 ; GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
286 ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
287 ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
288 ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
289 ; GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
290 ; GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
291 ; GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
292 ; GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0
293 ; GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1
294 ; GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
295 ; GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
296 ; GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
297 ; GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
298 ; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
299 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
303 define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) #0 {
304 ; GFX90A-LABEL: name: global_atomic_fadd_f64_no_rtn_atomicrmw
305 ; GFX90A: bb.1 (%ir-block.0):
306 ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
307 ; GFX90A-NEXT: {{ $}}
308 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
309 ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
310 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
311 ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
312 ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
313 ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
314 ; GFX90A-NEXT: GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
315 ; GFX90A-NEXT: S_ENDPGM 0
317 ; GFX940-LABEL: name: global_atomic_fadd_f64_no_rtn_atomicrmw
318 ; GFX940: bb.1 (%ir-block.0):
319 ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
320 ; GFX940-NEXT: {{ $}}
321 ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
322 ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
323 ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
324 ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
325 ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
326 ; GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
327 ; GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
328 ; GFX940-NEXT: S_ENDPGM 0
329 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic
333 define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) #0 {
334 ; GFX90A-LABEL: name: global_atomic_fadd_f64_rtn_atomicrmw
335 ; GFX90A: bb.1 (%ir-block.0):
336 ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
337 ; GFX90A-NEXT: {{ $}}
338 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
339 ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
340 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
341 ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
342 ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
343 ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
344 ; GFX90A-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
345 ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0
346 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1
347 ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
348 ; GFX90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
349 ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
350 ; GFX90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
351 ; GFX90A-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
353 ; GFX940-LABEL: name: global_atomic_fadd_f64_rtn_atomicrmw
354 ; GFX940: bb.1 (%ir-block.0):
355 ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
356 ; GFX940-NEXT: {{ $}}
357 ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
358 ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
359 ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
360 ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
361 ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
362 ; GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
363 ; GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
364 ; GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0
365 ; GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1
366 ; GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
367 ; GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
368 ; GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
369 ; GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
370 ; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
371 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic
375 define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) #0 {
376 ; GFX90A_ITERATIVE-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_atomicrmw
377 ; GFX90A_ITERATIVE: bb.1 (%ir-block.0):
378 ; GFX90A_ITERATIVE-NEXT: successors: %bb.2(0x40000000), %bb.6(0x40000000)
379 ; GFX90A_ITERATIVE-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
380 ; GFX90A_ITERATIVE-NEXT: {{ $}}
381 ; GFX90A_ITERATIVE-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
382 ; GFX90A_ITERATIVE-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
383 ; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
384 ; GFX90A_ITERATIVE-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
385 ; GFX90A_ITERATIVE-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
386 ; GFX90A_ITERATIVE-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
387 ; GFX90A_ITERATIVE-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
388 ; GFX90A_ITERATIVE-NEXT: S_BRANCH %bb.2
389 ; GFX90A_ITERATIVE-NEXT: {{ $}}
390 ; GFX90A_ITERATIVE-NEXT: bb.2 (%ir-block.5):
391 ; GFX90A_ITERATIVE-NEXT: successors: %bb.7(0x80000000)
392 ; GFX90A_ITERATIVE-NEXT: {{ $}}
393 ; GFX90A_ITERATIVE-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
394 ; GFX90A_ITERATIVE-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
395 ; GFX90A_ITERATIVE-NEXT: S_BRANCH %bb.7
396 ; GFX90A_ITERATIVE-NEXT: {{ $}}
397 ; GFX90A_ITERATIVE-NEXT: bb.3 (%ir-block.7):
398 ; GFX90A_ITERATIVE-NEXT: successors: %bb.4(0x80000000)
399 ; GFX90A_ITERATIVE-NEXT: {{ $}}
400 ; GFX90A_ITERATIVE-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
401 ; GFX90A_ITERATIVE-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], %25, [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
402 ; GFX90A_ITERATIVE-NEXT: {{ $}}
403 ; GFX90A_ITERATIVE-NEXT: bb.4.Flow:
404 ; GFX90A_ITERATIVE-NEXT: successors: %bb.6(0x80000000)
405 ; GFX90A_ITERATIVE-NEXT: {{ $}}
406 ; GFX90A_ITERATIVE-NEXT: SI_END_CF %35, implicit-def $exec, implicit-def $scc, implicit $exec
407 ; GFX90A_ITERATIVE-NEXT: S_BRANCH %bb.6
408 ; GFX90A_ITERATIVE-NEXT: {{ $}}
409 ; GFX90A_ITERATIVE-NEXT: bb.5 (%ir-block.9):
410 ; GFX90A_ITERATIVE-NEXT: S_ENDPGM 0
411 ; GFX90A_ITERATIVE-NEXT: {{ $}}
412 ; GFX90A_ITERATIVE-NEXT: bb.6.Flow1:
413 ; GFX90A_ITERATIVE-NEXT: successors: %bb.5(0x80000000)
414 ; GFX90A_ITERATIVE-NEXT: {{ $}}
415 ; GFX90A_ITERATIVE-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
416 ; GFX90A_ITERATIVE-NEXT: S_BRANCH %bb.5
417 ; GFX90A_ITERATIVE-NEXT: {{ $}}
418 ; GFX90A_ITERATIVE-NEXT: bb.7.ComputeLoop:
419 ; GFX90A_ITERATIVE-NEXT: successors: %bb.8(0x04000000), %bb.7(0x7c000000)
420 ; GFX90A_ITERATIVE-NEXT: {{ $}}
421 ; GFX90A_ITERATIVE-NEXT: [[PHI:%[0-9]+]]:vreg_64_align2 = PHI %17, %bb.7, [[S_MOV_B]], %bb.2
422 ; GFX90A_ITERATIVE-NEXT: [[PHI1:%[0-9]+]]:vreg_64_align2 = PHI %22, %bb.7, [[COPY4]], %bb.2
423 ; GFX90A_ITERATIVE-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub0
424 ; GFX90A_ITERATIVE-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub1
425 ; GFX90A_ITERATIVE-NEXT: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY5]], implicit $exec
426 ; GFX90A_ITERATIVE-NEXT: [[V_FFBL_B32_e64_1:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY6]], implicit $exec
427 ; GFX90A_ITERATIVE-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
428 ; GFX90A_ITERATIVE-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_FFBL_B32_e64_1]], [[V_MOV_B32_e32_1]], 0, implicit $exec
429 ; GFX90A_ITERATIVE-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_FFBL_B32_e64_]], [[V_ADD_U32_e64_]], implicit $exec
430 ; GFX90A_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
431 ; GFX90A_ITERATIVE-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY2]], [[V_READFIRSTLANE_B32_]]
432 ; GFX90A_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
433 ; GFX90A_ITERATIVE-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY3]], [[V_READFIRSTLANE_B32_1]]
434 ; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
435 ; GFX90A_ITERATIVE-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
436 ; GFX90A_ITERATIVE-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI]], 0, [[COPY7]], 0, 0, implicit $mode, implicit $exec
437 ; GFX90A_ITERATIVE-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
438 ; GFX90A_ITERATIVE-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B1]]
439 ; GFX90A_ITERATIVE-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[V_MIN_U32_e64_]], [[COPY8]], implicit $exec
440 ; GFX90A_ITERATIVE-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
441 ; GFX90A_ITERATIVE-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
442 ; GFX90A_ITERATIVE-NEXT: [[V_NOT_B32_e32_:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[COPY9]], implicit $exec
443 ; GFX90A_ITERATIVE-NEXT: [[V_NOT_B32_e32_1:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[COPY10]], implicit $exec
444 ; GFX90A_ITERATIVE-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub0
445 ; GFX90A_ITERATIVE-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub1
446 ; GFX90A_ITERATIVE-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY11]], [[V_NOT_B32_e32_]], implicit $exec
447 ; GFX90A_ITERATIVE-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY12]], [[V_NOT_B32_e32_1]], implicit $exec
448 ; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
449 ; GFX90A_ITERATIVE-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
450 ; GFX90A_ITERATIVE-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B2]]
451 ; GFX90A_ITERATIVE-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U64_e64 [[REG_SEQUENCE2]], [[COPY13]], implicit $exec
452 ; GFX90A_ITERATIVE-NEXT: $vcc = COPY [[V_CMP_NE_U64_e64_]]
453 ; GFX90A_ITERATIVE-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit $vcc
454 ; GFX90A_ITERATIVE-NEXT: S_BRANCH %bb.8
455 ; GFX90A_ITERATIVE-NEXT: {{ $}}
456 ; GFX90A_ITERATIVE-NEXT: bb.8.ComputeEnd:
457 ; GFX90A_ITERATIVE-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
458 ; GFX90A_ITERATIVE-NEXT: {{ $}}
459 ; GFX90A_ITERATIVE-NEXT: [[PHI2:%[0-9]+]]:vreg_64_align2 = PHI [[V_ADD_F64_e64_]], %bb.7
460 ; GFX90A_ITERATIVE-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
461 ; GFX90A_ITERATIVE-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
462 ; GFX90A_ITERATIVE-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
463 ; GFX90A_ITERATIVE-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
464 ; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY16]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
465 ; GFX90A_ITERATIVE-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE3]].sub0
466 ; GFX90A_ITERATIVE-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
467 ; GFX90A_ITERATIVE-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
468 ; GFX90A_ITERATIVE-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
469 ; GFX90A_ITERATIVE-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY18]], [[COPY19]], implicit $exec
470 ; GFX90A_ITERATIVE-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY17]]
471 ; GFX90A_ITERATIVE-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY20]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
472 ; GFX90A_ITERATIVE-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
473 ; GFX90A_ITERATIVE-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY21]], implicit $exec
474 ; GFX90A_ITERATIVE-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
475 ; GFX90A_ITERATIVE-NEXT: S_BRANCH %bb.3
477 ; GFX90A_DPP-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_atomicrmw
478 ; GFX90A_DPP: bb.1 (%ir-block.0):
479 ; GFX90A_DPP-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000)
480 ; GFX90A_DPP-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
481 ; GFX90A_DPP-NEXT: {{ $}}
482 ; GFX90A_DPP-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
483 ; GFX90A_DPP-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
484 ; GFX90A_DPP-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
485 ; GFX90A_DPP-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
486 ; GFX90A_DPP-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
487 ; GFX90A_DPP-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
488 ; GFX90A_DPP-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
489 ; GFX90A_DPP-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
490 ; GFX90A_DPP-NEXT: S_BRANCH %bb.2
491 ; GFX90A_DPP-NEXT: {{ $}}
492 ; GFX90A_DPP-NEXT: bb.2 (%ir-block.5):
493 ; GFX90A_DPP-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
494 ; GFX90A_DPP-NEXT: {{ $}}
495 ; GFX90A_DPP-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
496 ; GFX90A_DPP-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
497 ; GFX90A_DPP-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
498 ; GFX90A_DPP-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
499 ; GFX90A_DPP-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
500 ; GFX90A_DPP-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
501 ; GFX90A_DPP-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE2]].sub0
502 ; GFX90A_DPP-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
503 ; GFX90A_DPP-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
504 ; GFX90A_DPP-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
505 ; GFX90A_DPP-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY9]], [[COPY10]], implicit $exec
506 ; GFX90A_DPP-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
507 ; GFX90A_DPP-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY11]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
508 ; GFX90A_DPP-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
509 ; GFX90A_DPP-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
510 ; GFX90A_DPP-NEXT: [[V_SET_INACTIVE_B64_:%[0-9]+]]:vreg_64_align2 = V_SET_INACTIVE_B64 [[REG_SEQUENCE1]], [[COPY12]], implicit-def dead $scc, implicit $exec
511 ; GFX90A_DPP-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
512 ; GFX90A_DPP-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY13]], [[V_SET_INACTIVE_B64_]], 273, 15, 15, 0, implicit $exec
513 ; GFX90A_DPP-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_SET_INACTIVE_B64_]], 0, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec
514 ; GFX90A_DPP-NEXT: [[COPY14:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
515 ; GFX90A_DPP-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY14]], [[V_ADD_F64_e64_]], 274, 15, 15, 0, implicit $exec
516 ; GFX90A_DPP-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_]], 0, [[V_MOV_B1]], 0, 0, implicit $mode, implicit $exec
517 ; GFX90A_DPP-NEXT: [[COPY15:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
518 ; GFX90A_DPP-NEXT: [[V_MOV_B2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY15]], [[V_ADD_F64_e64_1]], 276, 15, 15, 0, implicit $exec
519 ; GFX90A_DPP-NEXT: [[V_ADD_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_1]], 0, [[V_MOV_B2]], 0, 0, implicit $mode, implicit $exec
520 ; GFX90A_DPP-NEXT: [[COPY16:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
521 ; GFX90A_DPP-NEXT: [[V_MOV_B3:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY16]], [[V_ADD_F64_e64_2]], 280, 15, 15, 0, implicit $exec
522 ; GFX90A_DPP-NEXT: [[V_ADD_F64_e64_3:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_2]], 0, [[V_MOV_B3]], 0, 0, implicit $mode, implicit $exec
523 ; GFX90A_DPP-NEXT: [[COPY17:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
524 ; GFX90A_DPP-NEXT: [[V_MOV_B4:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY17]], [[V_ADD_F64_e64_3]], 322, 10, 15, 0, implicit $exec
525 ; GFX90A_DPP-NEXT: [[V_ADD_F64_e64_4:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_3]], 0, [[V_MOV_B4]], 0, 0, implicit $mode, implicit $exec
526 ; GFX90A_DPP-NEXT: [[COPY18:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
527 ; GFX90A_DPP-NEXT: [[V_MOV_B5:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY18]], [[V_ADD_F64_e64_4]], 323, 12, 15, 0, implicit $exec
528 ; GFX90A_DPP-NEXT: [[V_ADD_F64_e64_5:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_4]], 0, [[V_MOV_B5]], 0, 0, implicit $mode, implicit $exec
529 ; GFX90A_DPP-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
530 ; GFX90A_DPP-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub0
531 ; GFX90A_DPP-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub1
532 ; GFX90A_DPP-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY19]], [[S_MOV_B32_2]]
533 ; GFX90A_DPP-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY20]], [[S_MOV_B32_2]]
534 ; GFX90A_DPP-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
535 ; GFX90A_DPP-NEXT: [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
536 ; GFX90A_DPP-NEXT: [[STRICT_WWM:%[0-9]+]]:vreg_64_align2 = STRICT_WWM [[COPY21]], implicit $exec
537 ; GFX90A_DPP-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
538 ; GFX90A_DPP-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY22]], implicit $exec
539 ; GFX90A_DPP-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
540 ; GFX90A_DPP-NEXT: S_BRANCH %bb.3
541 ; GFX90A_DPP-NEXT: {{ $}}
542 ; GFX90A_DPP-NEXT: bb.3 (%ir-block.31):
543 ; GFX90A_DPP-NEXT: successors: %bb.4(0x80000000)
544 ; GFX90A_DPP-NEXT: {{ $}}
545 ; GFX90A_DPP-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
546 ; GFX90A_DPP-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
547 ; GFX90A_DPP-NEXT: {{ $}}
548 ; GFX90A_DPP-NEXT: bb.4.Flow:
549 ; GFX90A_DPP-NEXT: successors: %bb.5(0x80000000)
550 ; GFX90A_DPP-NEXT: {{ $}}
551 ; GFX90A_DPP-NEXT: SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec
552 ; GFX90A_DPP-NEXT: {{ $}}
553 ; GFX90A_DPP-NEXT: bb.5 (%ir-block.33):
554 ; GFX90A_DPP-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
555 ; GFX90A_DPP-NEXT: S_ENDPGM 0
557 ; GFX940_ITERATIVE-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_atomicrmw
558 ; GFX940_ITERATIVE: bb.1 (%ir-block.0):
559 ; GFX940_ITERATIVE-NEXT: successors: %bb.2(0x40000000), %bb.6(0x40000000)
560 ; GFX940_ITERATIVE-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
561 ; GFX940_ITERATIVE-NEXT: {{ $}}
562 ; GFX940_ITERATIVE-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
563 ; GFX940_ITERATIVE-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
564 ; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
565 ; GFX940_ITERATIVE-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
566 ; GFX940_ITERATIVE-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
567 ; GFX940_ITERATIVE-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
568 ; GFX940_ITERATIVE-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
569 ; GFX940_ITERATIVE-NEXT: S_BRANCH %bb.2
570 ; GFX940_ITERATIVE-NEXT: {{ $}}
571 ; GFX940_ITERATIVE-NEXT: bb.2 (%ir-block.5):
572 ; GFX940_ITERATIVE-NEXT: successors: %bb.7(0x80000000)
573 ; GFX940_ITERATIVE-NEXT: {{ $}}
574 ; GFX940_ITERATIVE-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
575 ; GFX940_ITERATIVE-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
576 ; GFX940_ITERATIVE-NEXT: S_BRANCH %bb.7
577 ; GFX940_ITERATIVE-NEXT: {{ $}}
578 ; GFX940_ITERATIVE-NEXT: bb.3 (%ir-block.7):
579 ; GFX940_ITERATIVE-NEXT: successors: %bb.4(0x80000000)
580 ; GFX940_ITERATIVE-NEXT: {{ $}}
581 ; GFX940_ITERATIVE-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
582 ; GFX940_ITERATIVE-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], %24, [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
583 ; GFX940_ITERATIVE-NEXT: {{ $}}
584 ; GFX940_ITERATIVE-NEXT: bb.4.Flow:
585 ; GFX940_ITERATIVE-NEXT: successors: %bb.6(0x80000000)
586 ; GFX940_ITERATIVE-NEXT: {{ $}}
587 ; GFX940_ITERATIVE-NEXT: SI_END_CF %34, implicit-def $exec, implicit-def $scc, implicit $exec
588 ; GFX940_ITERATIVE-NEXT: S_BRANCH %bb.6
589 ; GFX940_ITERATIVE-NEXT: {{ $}}
590 ; GFX940_ITERATIVE-NEXT: bb.5 (%ir-block.9):
591 ; GFX940_ITERATIVE-NEXT: S_ENDPGM 0
592 ; GFX940_ITERATIVE-NEXT: {{ $}}
593 ; GFX940_ITERATIVE-NEXT: bb.6.Flow1:
594 ; GFX940_ITERATIVE-NEXT: successors: %bb.5(0x80000000)
595 ; GFX940_ITERATIVE-NEXT: {{ $}}
596 ; GFX940_ITERATIVE-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
597 ; GFX940_ITERATIVE-NEXT: S_BRANCH %bb.5
598 ; GFX940_ITERATIVE-NEXT: {{ $}}
599 ; GFX940_ITERATIVE-NEXT: bb.7.ComputeLoop:
600 ; GFX940_ITERATIVE-NEXT: successors: %bb.8(0x04000000), %bb.7(0x7c000000)
601 ; GFX940_ITERATIVE-NEXT: {{ $}}
602 ; GFX940_ITERATIVE-NEXT: [[PHI:%[0-9]+]]:vreg_64_align2 = PHI %16, %bb.7, [[S_MOV_B]], %bb.2
603 ; GFX940_ITERATIVE-NEXT: [[PHI1:%[0-9]+]]:vreg_64_align2 = PHI %21, %bb.7, [[COPY4]], %bb.2
604 ; GFX940_ITERATIVE-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub0
605 ; GFX940_ITERATIVE-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub1
606 ; GFX940_ITERATIVE-NEXT: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY5]], implicit $exec
607 ; GFX940_ITERATIVE-NEXT: [[V_FFBL_B32_e64_1:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY6]], implicit $exec
608 ; GFX940_ITERATIVE-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
609 ; GFX940_ITERATIVE-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_FFBL_B32_e64_1]], [[V_MOV_B32_e32_1]], 0, implicit $exec
610 ; GFX940_ITERATIVE-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_FFBL_B32_e64_]], [[V_ADD_U32_e64_]], implicit $exec
611 ; GFX940_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
612 ; GFX940_ITERATIVE-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY2]], [[V_READFIRSTLANE_B32_]]
613 ; GFX940_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
614 ; GFX940_ITERATIVE-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY3]], [[V_READFIRSTLANE_B32_1]]
615 ; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
616 ; GFX940_ITERATIVE-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
617 ; GFX940_ITERATIVE-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI]], 0, [[COPY7]], 0, 0, implicit $mode, implicit $exec
618 ; GFX940_ITERATIVE-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
619 ; GFX940_ITERATIVE-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B1]]
620 ; GFX940_ITERATIVE-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[V_MIN_U32_e64_]], [[COPY8]], implicit $exec
621 ; GFX940_ITERATIVE-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
622 ; GFX940_ITERATIVE-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
623 ; GFX940_ITERATIVE-NEXT: [[V_NOT_B32_e32_:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[COPY9]], implicit $exec
624 ; GFX940_ITERATIVE-NEXT: [[V_NOT_B32_e32_1:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[COPY10]], implicit $exec
625 ; GFX940_ITERATIVE-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub0
626 ; GFX940_ITERATIVE-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub1
627 ; GFX940_ITERATIVE-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY11]], [[V_NOT_B32_e32_]], implicit $exec
628 ; GFX940_ITERATIVE-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY12]], [[V_NOT_B32_e32_1]], implicit $exec
629 ; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
630 ; GFX940_ITERATIVE-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
631 ; GFX940_ITERATIVE-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B2]]
632 ; GFX940_ITERATIVE-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U64_e64 [[REG_SEQUENCE2]], [[COPY13]], implicit $exec
633 ; GFX940_ITERATIVE-NEXT: $vcc = COPY [[V_CMP_NE_U64_e64_]]
634 ; GFX940_ITERATIVE-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit $vcc
635 ; GFX940_ITERATIVE-NEXT: S_BRANCH %bb.8
636 ; GFX940_ITERATIVE-NEXT: {{ $}}
637 ; GFX940_ITERATIVE-NEXT: bb.8.ComputeEnd:
638 ; GFX940_ITERATIVE-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
639 ; GFX940_ITERATIVE-NEXT: {{ $}}
640 ; GFX940_ITERATIVE-NEXT: [[PHI2:%[0-9]+]]:vreg_64_align2 = PHI [[V_ADD_F64_e64_]], %bb.7
641 ; GFX940_ITERATIVE-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
642 ; GFX940_ITERATIVE-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
643 ; GFX940_ITERATIVE-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
644 ; GFX940_ITERATIVE-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
645 ; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY16]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
646 ; GFX940_ITERATIVE-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE3]].sub0
647 ; GFX940_ITERATIVE-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
648 ; GFX940_ITERATIVE-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
649 ; GFX940_ITERATIVE-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
650 ; GFX940_ITERATIVE-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY18]], [[COPY19]], implicit $exec
651 ; GFX940_ITERATIVE-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY17]]
652 ; GFX940_ITERATIVE-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY20]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
653 ; GFX940_ITERATIVE-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
654 ; GFX940_ITERATIVE-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY21]], implicit $exec
655 ; GFX940_ITERATIVE-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
656 ; GFX940_ITERATIVE-NEXT: S_BRANCH %bb.3
658 ; GFX940_DPP-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_atomicrmw
659 ; GFX940_DPP: bb.1 (%ir-block.0):
660 ; GFX940_DPP-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000)
661 ; GFX940_DPP-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
662 ; GFX940_DPP-NEXT: {{ $}}
663 ; GFX940_DPP-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
664 ; GFX940_DPP-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
665 ; GFX940_DPP-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
666 ; GFX940_DPP-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
667 ; GFX940_DPP-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
668 ; GFX940_DPP-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
669 ; GFX940_DPP-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
670 ; GFX940_DPP-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
671 ; GFX940_DPP-NEXT: S_BRANCH %bb.2
672 ; GFX940_DPP-NEXT: {{ $}}
673 ; GFX940_DPP-NEXT: bb.2 (%ir-block.5):
674 ; GFX940_DPP-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
675 ; GFX940_DPP-NEXT: {{ $}}
676 ; GFX940_DPP-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
677 ; GFX940_DPP-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
678 ; GFX940_DPP-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
679 ; GFX940_DPP-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
680 ; GFX940_DPP-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
681 ; GFX940_DPP-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
682 ; GFX940_DPP-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE2]].sub0
683 ; GFX940_DPP-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
684 ; GFX940_DPP-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
685 ; GFX940_DPP-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
686 ; GFX940_DPP-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY9]], [[COPY10]], implicit $exec
687 ; GFX940_DPP-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
688 ; GFX940_DPP-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY11]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
689 ; GFX940_DPP-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
690 ; GFX940_DPP-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
691 ; GFX940_DPP-NEXT: [[V_SET_INACTIVE_B64_:%[0-9]+]]:vreg_64_align2 = V_SET_INACTIVE_B64 [[REG_SEQUENCE1]], [[COPY12]], implicit-def dead $scc, implicit $exec
692 ; GFX940_DPP-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
693 ; GFX940_DPP-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY13]], [[V_SET_INACTIVE_B64_]], 273, 15, 15, 0, implicit $exec
694 ; GFX940_DPP-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_SET_INACTIVE_B64_]], 0, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec
695 ; GFX940_DPP-NEXT: [[COPY14:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
696 ; GFX940_DPP-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY14]], [[V_ADD_F64_e64_]], 274, 15, 15, 0, implicit $exec
697 ; GFX940_DPP-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_]], 0, [[V_MOV_B1]], 0, 0, implicit $mode, implicit $exec
698 ; GFX940_DPP-NEXT: [[COPY15:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
699 ; GFX940_DPP-NEXT: [[V_MOV_B2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY15]], [[V_ADD_F64_e64_1]], 276, 15, 15, 0, implicit $exec
700 ; GFX940_DPP-NEXT: [[V_ADD_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_1]], 0, [[V_MOV_B2]], 0, 0, implicit $mode, implicit $exec
701 ; GFX940_DPP-NEXT: [[COPY16:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
702 ; GFX940_DPP-NEXT: [[V_MOV_B3:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY16]], [[V_ADD_F64_e64_2]], 280, 15, 15, 0, implicit $exec
703 ; GFX940_DPP-NEXT: [[V_ADD_F64_e64_3:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_2]], 0, [[V_MOV_B3]], 0, 0, implicit $mode, implicit $exec
704 ; GFX940_DPP-NEXT: [[COPY17:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
705 ; GFX940_DPP-NEXT: [[V_MOV_B4:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY17]], [[V_ADD_F64_e64_3]], 322, 10, 15, 0, implicit $exec
706 ; GFX940_DPP-NEXT: [[V_ADD_F64_e64_4:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_3]], 0, [[V_MOV_B4]], 0, 0, implicit $mode, implicit $exec
707 ; GFX940_DPP-NEXT: [[COPY18:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
708 ; GFX940_DPP-NEXT: [[V_MOV_B5:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY18]], [[V_ADD_F64_e64_4]], 323, 12, 15, 0, implicit $exec
709 ; GFX940_DPP-NEXT: [[V_ADD_F64_e64_5:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_4]], 0, [[V_MOV_B5]], 0, 0, implicit $mode, implicit $exec
710 ; GFX940_DPP-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
711 ; GFX940_DPP-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub0
712 ; GFX940_DPP-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub1
713 ; GFX940_DPP-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY19]], [[S_MOV_B32_2]]
714 ; GFX940_DPP-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY20]], [[S_MOV_B32_2]]
715 ; GFX940_DPP-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
716 ; GFX940_DPP-NEXT: [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
717 ; GFX940_DPP-NEXT: [[STRICT_WWM:%[0-9]+]]:vreg_64_align2 = STRICT_WWM [[COPY21]], implicit $exec
718 ; GFX940_DPP-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
719 ; GFX940_DPP-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY22]], implicit $exec
720 ; GFX940_DPP-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
721 ; GFX940_DPP-NEXT: S_BRANCH %bb.3
722 ; GFX940_DPP-NEXT: {{ $}}
723 ; GFX940_DPP-NEXT: bb.3 (%ir-block.31):
724 ; GFX940_DPP-NEXT: successors: %bb.4(0x80000000)
725 ; GFX940_DPP-NEXT: {{ $}}
726 ; GFX940_DPP-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
727 ; GFX940_DPP-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
728 ; GFX940_DPP-NEXT: {{ $}}
729 ; GFX940_DPP-NEXT: bb.4.Flow:
730 ; GFX940_DPP-NEXT: successors: %bb.5(0x80000000)
731 ; GFX940_DPP-NEXT: {{ $}}
732 ; GFX940_DPP-NEXT: SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec
733 ; GFX940_DPP-NEXT: {{ $}}
734 ; GFX940_DPP-NEXT: bb.5 (%ir-block.33):
735 ; GFX940_DPP-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
736 ; GFX940_DPP-NEXT: S_ENDPGM 0
737 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic
741 define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) #0 {
742 ; GFX90A_ITERATIVE-LABEL: name: global_atomic_fadd_f64_saddr_rtn_atomicrmw
743 ; GFX90A_ITERATIVE: bb.1 (%ir-block.0):
744 ; GFX90A_ITERATIVE-NEXT: successors: %bb.2(0x40000000), %bb.6(0x40000000)
745 ; GFX90A_ITERATIVE-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
746 ; GFX90A_ITERATIVE-NEXT: {{ $}}
747 ; GFX90A_ITERATIVE-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
748 ; GFX90A_ITERATIVE-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
749 ; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
750 ; GFX90A_ITERATIVE-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
751 ; GFX90A_ITERATIVE-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
752 ; GFX90A_ITERATIVE-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
753 ; GFX90A_ITERATIVE-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
754 ; GFX90A_ITERATIVE-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
755 ; GFX90A_ITERATIVE-NEXT: S_BRANCH %bb.2
756 ; GFX90A_ITERATIVE-NEXT: {{ $}}
757 ; GFX90A_ITERATIVE-NEXT: bb.2 (%ir-block.5):
758 ; GFX90A_ITERATIVE-NEXT: successors: %bb.7(0x80000000)
759 ; GFX90A_ITERATIVE-NEXT: {{ $}}
760 ; GFX90A_ITERATIVE-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
761 ; GFX90A_ITERATIVE-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
762 ; GFX90A_ITERATIVE-NEXT: S_BRANCH %bb.7
763 ; GFX90A_ITERATIVE-NEXT: {{ $}}
764 ; GFX90A_ITERATIVE-NEXT: bb.3 (%ir-block.7):
765 ; GFX90A_ITERATIVE-NEXT: successors: %bb.4(0x80000000)
766 ; GFX90A_ITERATIVE-NEXT: {{ $}}
767 ; GFX90A_ITERATIVE-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
768 ; GFX90A_ITERATIVE-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], %28, [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
769 ; GFX90A_ITERATIVE-NEXT: {{ $}}
770 ; GFX90A_ITERATIVE-NEXT: bb.4 (%ir-block.9):
771 ; GFX90A_ITERATIVE-NEXT: successors: %bb.6(0x80000000)
772 ; GFX90A_ITERATIVE-NEXT: {{ $}}
773 ; GFX90A_ITERATIVE-NEXT: [[PHI:%[0-9]+]]:vreg_64_align2 = PHI [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]], %bb.3, [[DEF]], %bb.8
774 ; GFX90A_ITERATIVE-NEXT: SI_END_CF %38, implicit-def $exec, implicit-def $scc, implicit $exec
775 ; GFX90A_ITERATIVE-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
776 ; GFX90A_ITERATIVE-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
777 ; GFX90A_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
778 ; GFX90A_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
779 ; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
780 ; GFX90A_ITERATIVE-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
781 ; GFX90A_ITERATIVE-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[COPY7]], 0, %27, 0, 0, implicit $mode, implicit $exec
782 ; GFX90A_ITERATIVE-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]]
783 ; GFX90A_ITERATIVE-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_1]]
784 ; GFX90A_ITERATIVE-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_]].sub0
785 ; GFX90A_ITERATIVE-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_]].sub1
786 ; GFX90A_ITERATIVE-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY10]], 0, [[COPY8]], %36, implicit $exec
787 ; GFX90A_ITERATIVE-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY11]], 0, [[COPY9]], %36, implicit $exec
788 ; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_CNDMASK_B32_e64_]], %subreg.sub0, [[V_CNDMASK_B32_e64_1]], %subreg.sub1
789 ; GFX90A_ITERATIVE-NEXT: S_BRANCH %bb.6
790 ; GFX90A_ITERATIVE-NEXT: {{ $}}
791 ; GFX90A_ITERATIVE-NEXT: bb.5 (%ir-block.14):
792 ; GFX90A_ITERATIVE-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY %44.sub0
793 ; GFX90A_ITERATIVE-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY %44.sub1
794 ; GFX90A_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
795 ; GFX90A_ITERATIVE-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]]
796 ; GFX90A_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
797 ; GFX90A_ITERATIVE-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_3]]
798 ; GFX90A_ITERATIVE-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
799 ; GFX90A_ITERATIVE-NEXT: {{ $}}
800 ; GFX90A_ITERATIVE-NEXT: bb.6.Flow:
801 ; GFX90A_ITERATIVE-NEXT: successors: %bb.5(0x80000000)
802 ; GFX90A_ITERATIVE-NEXT: {{ $}}
803 ; GFX90A_ITERATIVE-NEXT: [[PHI1:%[0-9]+]]:vreg_64_align2 = PHI [[REG_SEQUENCE2]], %bb.4, [[DEF]], %bb.1
804 ; GFX90A_ITERATIVE-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
805 ; GFX90A_ITERATIVE-NEXT: S_BRANCH %bb.5
806 ; GFX90A_ITERATIVE-NEXT: {{ $}}
807 ; GFX90A_ITERATIVE-NEXT: bb.7.ComputeLoop:
808 ; GFX90A_ITERATIVE-NEXT: successors: %bb.8(0x04000000), %bb.7(0x7c000000)
809 ; GFX90A_ITERATIVE-NEXT: {{ $}}
810 ; GFX90A_ITERATIVE-NEXT: [[PHI2:%[0-9]+]]:vreg_64_align2 = PHI %19, %bb.7, [[S_MOV_B]], %bb.2
811 ; GFX90A_ITERATIVE-NEXT: [[PHI3:%[0-9]+]]:vreg_64_align2 = PHI %18, %bb.7, [[DEF]], %bb.2
812 ; GFX90A_ITERATIVE-NEXT: [[PHI4:%[0-9]+]]:vreg_64_align2 = PHI %24, %bb.7, [[COPY4]], %bb.2
813 ; GFX90A_ITERATIVE-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[PHI4]].sub0
814 ; GFX90A_ITERATIVE-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[PHI4]].sub1
815 ; GFX90A_ITERATIVE-NEXT: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY14]], implicit $exec
816 ; GFX90A_ITERATIVE-NEXT: [[V_FFBL_B32_e64_1:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY15]], implicit $exec
817 ; GFX90A_ITERATIVE-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
818 ; GFX90A_ITERATIVE-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_FFBL_B32_e64_1]], [[V_MOV_B32_e32_1]], 0, implicit $exec
819 ; GFX90A_ITERATIVE-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_FFBL_B32_e64_]], [[V_ADD_U32_e64_]], implicit $exec
820 ; GFX90A_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
821 ; GFX90A_ITERATIVE-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY2]], [[V_READFIRSTLANE_B32_4]]
822 ; GFX90A_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
823 ; GFX90A_ITERATIVE-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY3]], [[V_READFIRSTLANE_B32_5]]
824 ; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
825 ; GFX90A_ITERATIVE-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[PHI2]].sub0
826 ; GFX90A_ITERATIVE-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[PHI2]].sub1
827 ; GFX90A_ITERATIVE-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[PHI3]].sub0
828 ; GFX90A_ITERATIVE-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[PHI3]].sub1
829 ; GFX90A_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
830 ; GFX90A_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
831 ; GFX90A_ITERATIVE-NEXT: $m0 = COPY [[V_READFIRSTLANE_B32_7]]
832 ; GFX90A_ITERATIVE-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 [[V_READFIRSTLANE_B32_6]], $m0, [[COPY18]]
833 ; GFX90A_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
834 ; GFX90A_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
835 ; GFX90A_ITERATIVE-NEXT: $m0 = COPY [[V_READFIRSTLANE_B32_9]]
836 ; GFX90A_ITERATIVE-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 [[V_READFIRSTLANE_B32_8]], $m0, [[COPY19]]
837 ; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_WRITELANE_B32_]], %subreg.sub0, [[V_WRITELANE_B32_1]], %subreg.sub1
838 ; GFX90A_ITERATIVE-NEXT: [[COPY20:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
839 ; GFX90A_ITERATIVE-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI2]], 0, [[COPY20]], 0, 0, implicit $mode, implicit $exec
840 ; GFX90A_ITERATIVE-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
841 ; GFX90A_ITERATIVE-NEXT: [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B1]]
842 ; GFX90A_ITERATIVE-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[V_MIN_U32_e64_]], [[COPY21]], implicit $exec
843 ; GFX90A_ITERATIVE-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
844 ; GFX90A_ITERATIVE-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
845 ; GFX90A_ITERATIVE-NEXT: [[V_NOT_B32_e32_:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[COPY22]], implicit $exec
846 ; GFX90A_ITERATIVE-NEXT: [[V_NOT_B32_e32_1:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[COPY23]], implicit $exec
847 ; GFX90A_ITERATIVE-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[PHI4]].sub0
848 ; GFX90A_ITERATIVE-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[PHI4]].sub1
849 ; GFX90A_ITERATIVE-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY24]], [[V_NOT_B32_e32_]], implicit $exec
850 ; GFX90A_ITERATIVE-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY25]], [[V_NOT_B32_e32_1]], implicit $exec
851 ; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
852 ; GFX90A_ITERATIVE-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
853 ; GFX90A_ITERATIVE-NEXT: [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B2]]
854 ; GFX90A_ITERATIVE-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U64_e64 [[REG_SEQUENCE5]], [[COPY26]], implicit $exec
855 ; GFX90A_ITERATIVE-NEXT: $vcc = COPY [[V_CMP_NE_U64_e64_]]
856 ; GFX90A_ITERATIVE-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit $vcc
857 ; GFX90A_ITERATIVE-NEXT: S_BRANCH %bb.8
858 ; GFX90A_ITERATIVE-NEXT: {{ $}}
859 ; GFX90A_ITERATIVE-NEXT: bb.8.ComputeEnd:
860 ; GFX90A_ITERATIVE-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
861 ; GFX90A_ITERATIVE-NEXT: {{ $}}
862 ; GFX90A_ITERATIVE-NEXT: [[PHI5:%[0-9]+]]:vreg_64_align2 = PHI [[REG_SEQUENCE4]], %bb.7
863 ; GFX90A_ITERATIVE-NEXT: [[PHI6:%[0-9]+]]:vreg_64_align2 = PHI [[V_ADD_F64_e64_1]], %bb.7
864 ; GFX90A_ITERATIVE-NEXT: [[COPY27:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
865 ; GFX90A_ITERATIVE-NEXT: [[COPY28:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
866 ; GFX90A_ITERATIVE-NEXT: [[COPY29:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
867 ; GFX90A_ITERATIVE-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
868 ; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY29]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
869 ; GFX90A_ITERATIVE-NEXT: [[COPY30:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE6]].sub0
870 ; GFX90A_ITERATIVE-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
871 ; GFX90A_ITERATIVE-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY27]]
872 ; GFX90A_ITERATIVE-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
873 ; GFX90A_ITERATIVE-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY31]], [[COPY32]], implicit $exec
874 ; GFX90A_ITERATIVE-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY30]]
875 ; GFX90A_ITERATIVE-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY33]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
876 ; GFX90A_ITERATIVE-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
877 ; GFX90A_ITERATIVE-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY34]], implicit $exec
878 ; GFX90A_ITERATIVE-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
879 ; GFX90A_ITERATIVE-NEXT: S_BRANCH %bb.3
881 ; GFX90A_DPP-LABEL: name: global_atomic_fadd_f64_saddr_rtn_atomicrmw
882 ; GFX90A_DPP: bb.1 (%ir-block.0):
883 ; GFX90A_DPP-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
884 ; GFX90A_DPP-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
885 ; GFX90A_DPP-NEXT: {{ $}}
886 ; GFX90A_DPP-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
887 ; GFX90A_DPP-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
888 ; GFX90A_DPP-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
889 ; GFX90A_DPP-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
890 ; GFX90A_DPP-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
891 ; GFX90A_DPP-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
892 ; GFX90A_DPP-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
893 ; GFX90A_DPP-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
894 ; GFX90A_DPP-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
895 ; GFX90A_DPP-NEXT: S_BRANCH %bb.2
896 ; GFX90A_DPP-NEXT: {{ $}}
897 ; GFX90A_DPP-NEXT: bb.2 (%ir-block.5):
898 ; GFX90A_DPP-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
899 ; GFX90A_DPP-NEXT: {{ $}}
900 ; GFX90A_DPP-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
901 ; GFX90A_DPP-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
902 ; GFX90A_DPP-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
903 ; GFX90A_DPP-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
904 ; GFX90A_DPP-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
905 ; GFX90A_DPP-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
906 ; GFX90A_DPP-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE2]].sub0
907 ; GFX90A_DPP-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
908 ; GFX90A_DPP-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
909 ; GFX90A_DPP-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
910 ; GFX90A_DPP-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY9]], [[COPY10]], implicit $exec
911 ; GFX90A_DPP-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
912 ; GFX90A_DPP-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY11]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
913 ; GFX90A_DPP-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
914 ; GFX90A_DPP-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
915 ; GFX90A_DPP-NEXT: [[V_SET_INACTIVE_B64_:%[0-9]+]]:vreg_64_align2 = V_SET_INACTIVE_B64 [[REG_SEQUENCE1]], [[COPY12]], implicit-def dead $scc, implicit $exec
916 ; GFX90A_DPP-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
917 ; GFX90A_DPP-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY13]], [[V_SET_INACTIVE_B64_]], 273, 15, 15, 0, implicit $exec
918 ; GFX90A_DPP-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_SET_INACTIVE_B64_]], 0, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec
919 ; GFX90A_DPP-NEXT: [[COPY14:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
920 ; GFX90A_DPP-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY14]], [[V_ADD_F64_e64_]], 274, 15, 15, 0, implicit $exec
921 ; GFX90A_DPP-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_]], 0, [[V_MOV_B1]], 0, 0, implicit $mode, implicit $exec
922 ; GFX90A_DPP-NEXT: [[COPY15:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
923 ; GFX90A_DPP-NEXT: [[V_MOV_B2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY15]], [[V_ADD_F64_e64_1]], 276, 15, 15, 0, implicit $exec
924 ; GFX90A_DPP-NEXT: [[V_ADD_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_1]], 0, [[V_MOV_B2]], 0, 0, implicit $mode, implicit $exec
925 ; GFX90A_DPP-NEXT: [[COPY16:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
926 ; GFX90A_DPP-NEXT: [[V_MOV_B3:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY16]], [[V_ADD_F64_e64_2]], 280, 15, 15, 0, implicit $exec
927 ; GFX90A_DPP-NEXT: [[V_ADD_F64_e64_3:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_2]], 0, [[V_MOV_B3]], 0, 0, implicit $mode, implicit $exec
928 ; GFX90A_DPP-NEXT: [[COPY17:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
929 ; GFX90A_DPP-NEXT: [[V_MOV_B4:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY17]], [[V_ADD_F64_e64_3]], 322, 10, 15, 0, implicit $exec
930 ; GFX90A_DPP-NEXT: [[V_ADD_F64_e64_4:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_3]], 0, [[V_MOV_B4]], 0, 0, implicit $mode, implicit $exec
931 ; GFX90A_DPP-NEXT: [[COPY18:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
932 ; GFX90A_DPP-NEXT: [[V_MOV_B5:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY18]], [[V_ADD_F64_e64_4]], 323, 12, 15, 0, implicit $exec
933 ; GFX90A_DPP-NEXT: [[V_ADD_F64_e64_5:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_4]], 0, [[V_MOV_B5]], 0, 0, implicit $mode, implicit $exec
934 ; GFX90A_DPP-NEXT: [[COPY19:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
935 ; GFX90A_DPP-NEXT: [[V_MOV_B6:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY19]], [[V_ADD_F64_e64_5]], 312, 15, 15, 0, implicit $exec
936 ; GFX90A_DPP-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
937 ; GFX90A_DPP-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub0
938 ; GFX90A_DPP-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub1
939 ; GFX90A_DPP-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY20]], [[S_MOV_B32_2]]
940 ; GFX90A_DPP-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY21]], [[S_MOV_B32_2]]
941 ; GFX90A_DPP-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
942 ; GFX90A_DPP-NEXT: [[COPY22:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
943 ; GFX90A_DPP-NEXT: [[STRICT_WWM:%[0-9]+]]:vreg_64_align2 = STRICT_WWM [[COPY22]], implicit $exec
944 ; GFX90A_DPP-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
945 ; GFX90A_DPP-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY23]], implicit $exec
946 ; GFX90A_DPP-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
947 ; GFX90A_DPP-NEXT: S_BRANCH %bb.3
948 ; GFX90A_DPP-NEXT: {{ $}}
949 ; GFX90A_DPP-NEXT: bb.3 (%ir-block.32):
950 ; GFX90A_DPP-NEXT: successors: %bb.5(0x80000000)
951 ; GFX90A_DPP-NEXT: {{ $}}
952 ; GFX90A_DPP-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
953 ; GFX90A_DPP-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
954 ; GFX90A_DPP-NEXT: S_BRANCH %bb.5
955 ; GFX90A_DPP-NEXT: {{ $}}
956 ; GFX90A_DPP-NEXT: bb.4.Flow:
957 ; GFX90A_DPP-NEXT: successors: %bb.6(0x80000000)
958 ; GFX90A_DPP-NEXT: {{ $}}
959 ; GFX90A_DPP-NEXT: [[PHI:%[0-9]+]]:vreg_64_align2 = PHI %45, %bb.5, [[DEF]], %bb.1
960 ; GFX90A_DPP-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
961 ; GFX90A_DPP-NEXT: S_BRANCH %bb.6
962 ; GFX90A_DPP-NEXT: {{ $}}
963 ; GFX90A_DPP-NEXT: bb.5 (%ir-block.35):
964 ; GFX90A_DPP-NEXT: successors: %bb.4(0x80000000)
965 ; GFX90A_DPP-NEXT: {{ $}}
966 ; GFX90A_DPP-NEXT: [[PHI1:%[0-9]+]]:vreg_64_align2 = PHI [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]], %bb.3, [[DEF]], %bb.2
967 ; GFX90A_DPP-NEXT: SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec
968 ; GFX90A_DPP-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub0
969 ; GFX90A_DPP-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub1
970 ; GFX90A_DPP-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec
971 ; GFX90A_DPP-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec
972 ; GFX90A_DPP-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
973 ; GFX90A_DPP-NEXT: [[STRICT_WWM1:%[0-9]+]]:vreg_64_align2 = STRICT_WWM [[V_MOV_B6]], implicit $exec
974 ; GFX90A_DPP-NEXT: [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE4]]
975 ; GFX90A_DPP-NEXT: [[V_ADD_F64_e64_6:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[COPY26]], 0, [[STRICT_WWM1]], 0, 0, implicit $mode, implicit $exec
976 ; GFX90A_DPP-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]]
977 ; GFX90A_DPP-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_1]]
978 ; GFX90A_DPP-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_6]].sub0
979 ; GFX90A_DPP-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_6]].sub1
980 ; GFX90A_DPP-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY29]], 0, [[COPY27]], [[V_CMP_EQ_U32_e64_]], implicit $exec
981 ; GFX90A_DPP-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY30]], 0, [[COPY28]], [[V_CMP_EQ_U32_e64_]], implicit $exec
982 ; GFX90A_DPP-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_CNDMASK_B32_e64_]], %subreg.sub0, [[V_CNDMASK_B32_e64_1]], %subreg.sub1
983 ; GFX90A_DPP-NEXT: S_BRANCH %bb.4
984 ; GFX90A_DPP-NEXT: {{ $}}
985 ; GFX90A_DPP-NEXT: bb.6 (%ir-block.41):
986 ; GFX90A_DPP-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
987 ; GFX90A_DPP-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
988 ; GFX90A_DPP-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec
989 ; GFX90A_DPP-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]]
990 ; GFX90A_DPP-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec
991 ; GFX90A_DPP-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_3]]
992 ; GFX90A_DPP-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
994 ; GFX940_ITERATIVE-LABEL: name: global_atomic_fadd_f64_saddr_rtn_atomicrmw
995 ; GFX940_ITERATIVE: bb.1 (%ir-block.0):
996 ; GFX940_ITERATIVE-NEXT: successors: %bb.2(0x40000000), %bb.6(0x40000000)
997 ; GFX940_ITERATIVE-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
998 ; GFX940_ITERATIVE-NEXT: {{ $}}
999 ; GFX940_ITERATIVE-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1000 ; GFX940_ITERATIVE-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1001 ; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
1002 ; GFX940_ITERATIVE-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1003 ; GFX940_ITERATIVE-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1004 ; GFX940_ITERATIVE-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
1005 ; GFX940_ITERATIVE-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
1006 ; GFX940_ITERATIVE-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
1007 ; GFX940_ITERATIVE-NEXT: S_BRANCH %bb.2
1008 ; GFX940_ITERATIVE-NEXT: {{ $}}
1009 ; GFX940_ITERATIVE-NEXT: bb.2 (%ir-block.5):
1010 ; GFX940_ITERATIVE-NEXT: successors: %bb.7(0x80000000)
1011 ; GFX940_ITERATIVE-NEXT: {{ $}}
1012 ; GFX940_ITERATIVE-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
1013 ; GFX940_ITERATIVE-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
1014 ; GFX940_ITERATIVE-NEXT: S_BRANCH %bb.7
1015 ; GFX940_ITERATIVE-NEXT: {{ $}}
1016 ; GFX940_ITERATIVE-NEXT: bb.3 (%ir-block.7):
1017 ; GFX940_ITERATIVE-NEXT: successors: %bb.4(0x80000000)
1018 ; GFX940_ITERATIVE-NEXT: {{ $}}
1019 ; GFX940_ITERATIVE-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
1020 ; GFX940_ITERATIVE-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], %27, [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
1021 ; GFX940_ITERATIVE-NEXT: {{ $}}
1022 ; GFX940_ITERATIVE-NEXT: bb.4 (%ir-block.9):
1023 ; GFX940_ITERATIVE-NEXT: successors: %bb.6(0x80000000)
1024 ; GFX940_ITERATIVE-NEXT: {{ $}}
1025 ; GFX940_ITERATIVE-NEXT: [[PHI:%[0-9]+]]:vreg_64_align2 = PHI [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]], %bb.3, [[DEF]], %bb.8
1026 ; GFX940_ITERATIVE-NEXT: SI_END_CF %37, implicit-def $exec, implicit-def $scc, implicit $exec
1027 ; GFX940_ITERATIVE-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
1028 ; GFX940_ITERATIVE-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
1029 ; GFX940_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
1030 ; GFX940_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
1031 ; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
1032 ; GFX940_ITERATIVE-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
1033 ; GFX940_ITERATIVE-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[COPY7]], 0, %26, 0, 0, implicit $mode, implicit $exec
1034 ; GFX940_ITERATIVE-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]]
1035 ; GFX940_ITERATIVE-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_1]]
1036 ; GFX940_ITERATIVE-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_]].sub0
1037 ; GFX940_ITERATIVE-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_]].sub1
1038 ; GFX940_ITERATIVE-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY10]], 0, [[COPY8]], %35, implicit $exec
1039 ; GFX940_ITERATIVE-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY11]], 0, [[COPY9]], %35, implicit $exec
1040 ; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_CNDMASK_B32_e64_]], %subreg.sub0, [[V_CNDMASK_B32_e64_1]], %subreg.sub1
1041 ; GFX940_ITERATIVE-NEXT: S_BRANCH %bb.6
1042 ; GFX940_ITERATIVE-NEXT: {{ $}}
1043 ; GFX940_ITERATIVE-NEXT: bb.5 (%ir-block.14):
1044 ; GFX940_ITERATIVE-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY %43.sub0
1045 ; GFX940_ITERATIVE-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY %43.sub1
1046 ; GFX940_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
1047 ; GFX940_ITERATIVE-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]]
1048 ; GFX940_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
1049 ; GFX940_ITERATIVE-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_3]]
1050 ; GFX940_ITERATIVE-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
1051 ; GFX940_ITERATIVE-NEXT: {{ $}}
1052 ; GFX940_ITERATIVE-NEXT: bb.6.Flow:
1053 ; GFX940_ITERATIVE-NEXT: successors: %bb.5(0x80000000)
1054 ; GFX940_ITERATIVE-NEXT: {{ $}}
1055 ; GFX940_ITERATIVE-NEXT: [[PHI1:%[0-9]+]]:vreg_64_align2 = PHI [[REG_SEQUENCE2]], %bb.4, [[DEF]], %bb.1
1056 ; GFX940_ITERATIVE-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
1057 ; GFX940_ITERATIVE-NEXT: S_BRANCH %bb.5
1058 ; GFX940_ITERATIVE-NEXT: {{ $}}
1059 ; GFX940_ITERATIVE-NEXT: bb.7.ComputeLoop:
1060 ; GFX940_ITERATIVE-NEXT: successors: %bb.8(0x04000000), %bb.7(0x7c000000)
1061 ; GFX940_ITERATIVE-NEXT: {{ $}}
1062 ; GFX940_ITERATIVE-NEXT: [[PHI2:%[0-9]+]]:vreg_64_align2 = PHI %18, %bb.7, [[S_MOV_B]], %bb.2
1063 ; GFX940_ITERATIVE-NEXT: [[PHI3:%[0-9]+]]:vreg_64_align2 = PHI %17, %bb.7, [[DEF]], %bb.2
1064 ; GFX940_ITERATIVE-NEXT: [[PHI4:%[0-9]+]]:vreg_64_align2 = PHI %23, %bb.7, [[COPY4]], %bb.2
1065 ; GFX940_ITERATIVE-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[PHI4]].sub0
1066 ; GFX940_ITERATIVE-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[PHI4]].sub1
1067 ; GFX940_ITERATIVE-NEXT: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY14]], implicit $exec
1068 ; GFX940_ITERATIVE-NEXT: [[V_FFBL_B32_e64_1:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY15]], implicit $exec
1069 ; GFX940_ITERATIVE-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
1070 ; GFX940_ITERATIVE-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_FFBL_B32_e64_1]], [[V_MOV_B32_e32_1]], 0, implicit $exec
1071 ; GFX940_ITERATIVE-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_FFBL_B32_e64_]], [[V_ADD_U32_e64_]], implicit $exec
1072 ; GFX940_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
1073 ; GFX940_ITERATIVE-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY2]], [[V_READFIRSTLANE_B32_4]]
1074 ; GFX940_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
1075 ; GFX940_ITERATIVE-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY3]], [[V_READFIRSTLANE_B32_5]]
1076 ; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
1077 ; GFX940_ITERATIVE-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[PHI2]].sub0
1078 ; GFX940_ITERATIVE-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[PHI2]].sub1
1079 ; GFX940_ITERATIVE-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[PHI3]].sub0
1080 ; GFX940_ITERATIVE-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[PHI3]].sub1
1081 ; GFX940_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
1082 ; GFX940_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
1083 ; GFX940_ITERATIVE-NEXT: $m0 = COPY [[V_READFIRSTLANE_B32_7]]
1084 ; GFX940_ITERATIVE-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 [[V_READFIRSTLANE_B32_6]], $m0, [[COPY18]]
1085 ; GFX940_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
1086 ; GFX940_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
1087 ; GFX940_ITERATIVE-NEXT: $m0 = COPY [[V_READFIRSTLANE_B32_9]]
1088 ; GFX940_ITERATIVE-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 [[V_READFIRSTLANE_B32_8]], $m0, [[COPY19]]
1089 ; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_WRITELANE_B32_]], %subreg.sub0, [[V_WRITELANE_B32_1]], %subreg.sub1
1090 ; GFX940_ITERATIVE-NEXT: [[COPY20:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
1091 ; GFX940_ITERATIVE-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI2]], 0, [[COPY20]], 0, 0, implicit $mode, implicit $exec
1092 ; GFX940_ITERATIVE-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
1093 ; GFX940_ITERATIVE-NEXT: [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B1]]
1094 ; GFX940_ITERATIVE-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[V_MIN_U32_e64_]], [[COPY21]], implicit $exec
1095 ; GFX940_ITERATIVE-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
1096 ; GFX940_ITERATIVE-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
1097 ; GFX940_ITERATIVE-NEXT: [[V_NOT_B32_e32_:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[COPY22]], implicit $exec
1098 ; GFX940_ITERATIVE-NEXT: [[V_NOT_B32_e32_1:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[COPY23]], implicit $exec
1099 ; GFX940_ITERATIVE-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[PHI4]].sub0
1100 ; GFX940_ITERATIVE-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[PHI4]].sub1
1101 ; GFX940_ITERATIVE-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY24]], [[V_NOT_B32_e32_]], implicit $exec
1102 ; GFX940_ITERATIVE-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY25]], [[V_NOT_B32_e32_1]], implicit $exec
1103 ; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
1104 ; GFX940_ITERATIVE-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
1105 ; GFX940_ITERATIVE-NEXT: [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B2]]
1106 ; GFX940_ITERATIVE-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U64_e64 [[REG_SEQUENCE5]], [[COPY26]], implicit $exec
1107 ; GFX940_ITERATIVE-NEXT: $vcc = COPY [[V_CMP_NE_U64_e64_]]
1108 ; GFX940_ITERATIVE-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit $vcc
1109 ; GFX940_ITERATIVE-NEXT: S_BRANCH %bb.8
1110 ; GFX940_ITERATIVE-NEXT: {{ $}}
1111 ; GFX940_ITERATIVE-NEXT: bb.8.ComputeEnd:
1112 ; GFX940_ITERATIVE-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
1113 ; GFX940_ITERATIVE-NEXT: {{ $}}
1114 ; GFX940_ITERATIVE-NEXT: [[PHI5:%[0-9]+]]:vreg_64_align2 = PHI [[REG_SEQUENCE4]], %bb.7
1115 ; GFX940_ITERATIVE-NEXT: [[PHI6:%[0-9]+]]:vreg_64_align2 = PHI [[V_ADD_F64_e64_1]], %bb.7
1116 ; GFX940_ITERATIVE-NEXT: [[COPY27:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
1117 ; GFX940_ITERATIVE-NEXT: [[COPY28:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
1118 ; GFX940_ITERATIVE-NEXT: [[COPY29:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
1119 ; GFX940_ITERATIVE-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1120 ; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY29]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
1121 ; GFX940_ITERATIVE-NEXT: [[COPY30:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE6]].sub0
1122 ; GFX940_ITERATIVE-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1123 ; GFX940_ITERATIVE-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY27]]
1124 ; GFX940_ITERATIVE-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
1125 ; GFX940_ITERATIVE-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY31]], [[COPY32]], implicit $exec
1126 ; GFX940_ITERATIVE-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY30]]
1127 ; GFX940_ITERATIVE-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY33]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
1128 ; GFX940_ITERATIVE-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
1129 ; GFX940_ITERATIVE-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY34]], implicit $exec
1130 ; GFX940_ITERATIVE-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
1131 ; GFX940_ITERATIVE-NEXT: S_BRANCH %bb.3
1133 ; GFX940_DPP-LABEL: name: global_atomic_fadd_f64_saddr_rtn_atomicrmw
1134 ; GFX940_DPP: bb.1 (%ir-block.0):
1135 ; GFX940_DPP-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
1136 ; GFX940_DPP-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
1137 ; GFX940_DPP-NEXT: {{ $}}
1138 ; GFX940_DPP-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1139 ; GFX940_DPP-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1140 ; GFX940_DPP-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
1141 ; GFX940_DPP-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1142 ; GFX940_DPP-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1143 ; GFX940_DPP-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
1144 ; GFX940_DPP-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
1145 ; GFX940_DPP-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
1146 ; GFX940_DPP-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
1147 ; GFX940_DPP-NEXT: S_BRANCH %bb.2
1148 ; GFX940_DPP-NEXT: {{ $}}
1149 ; GFX940_DPP-NEXT: bb.2 (%ir-block.5):
1150 ; GFX940_DPP-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
1151 ; GFX940_DPP-NEXT: {{ $}}
1152 ; GFX940_DPP-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
1153 ; GFX940_DPP-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
1154 ; GFX940_DPP-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
1155 ; GFX940_DPP-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
1156 ; GFX940_DPP-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1157 ; GFX940_DPP-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
1158 ; GFX940_DPP-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE2]].sub0
1159 ; GFX940_DPP-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1160 ; GFX940_DPP-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
1161 ; GFX940_DPP-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
1162 ; GFX940_DPP-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY9]], [[COPY10]], implicit $exec
1163 ; GFX940_DPP-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
1164 ; GFX940_DPP-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY11]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
1165 ; GFX940_DPP-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
1166 ; GFX940_DPP-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
1167 ; GFX940_DPP-NEXT: [[V_SET_INACTIVE_B64_:%[0-9]+]]:vreg_64_align2 = V_SET_INACTIVE_B64 [[REG_SEQUENCE1]], [[COPY12]], implicit-def dead $scc, implicit $exec
1168 ; GFX940_DPP-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
1169 ; GFX940_DPP-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY13]], [[V_SET_INACTIVE_B64_]], 273, 15, 15, 0, implicit $exec
1170 ; GFX940_DPP-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_SET_INACTIVE_B64_]], 0, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec
1171 ; GFX940_DPP-NEXT: [[COPY14:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
1172 ; GFX940_DPP-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY14]], [[V_ADD_F64_e64_]], 274, 15, 15, 0, implicit $exec
1173 ; GFX940_DPP-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_]], 0, [[V_MOV_B1]], 0, 0, implicit $mode, implicit $exec
1174 ; GFX940_DPP-NEXT: [[COPY15:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
1175 ; GFX940_DPP-NEXT: [[V_MOV_B2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY15]], [[V_ADD_F64_e64_1]], 276, 15, 15, 0, implicit $exec
1176 ; GFX940_DPP-NEXT: [[V_ADD_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_1]], 0, [[V_MOV_B2]], 0, 0, implicit $mode, implicit $exec
1177 ; GFX940_DPP-NEXT: [[COPY16:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
1178 ; GFX940_DPP-NEXT: [[V_MOV_B3:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY16]], [[V_ADD_F64_e64_2]], 280, 15, 15, 0, implicit $exec
1179 ; GFX940_DPP-NEXT: [[V_ADD_F64_e64_3:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_2]], 0, [[V_MOV_B3]], 0, 0, implicit $mode, implicit $exec
1180 ; GFX940_DPP-NEXT: [[COPY17:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
1181 ; GFX940_DPP-NEXT: [[V_MOV_B4:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY17]], [[V_ADD_F64_e64_3]], 322, 10, 15, 0, implicit $exec
1182 ; GFX940_DPP-NEXT: [[V_ADD_F64_e64_4:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_3]], 0, [[V_MOV_B4]], 0, 0, implicit $mode, implicit $exec
1183 ; GFX940_DPP-NEXT: [[COPY18:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
1184 ; GFX940_DPP-NEXT: [[V_MOV_B5:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY18]], [[V_ADD_F64_e64_4]], 323, 12, 15, 0, implicit $exec
1185 ; GFX940_DPP-NEXT: [[V_ADD_F64_e64_5:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_4]], 0, [[V_MOV_B5]], 0, 0, implicit $mode, implicit $exec
1186 ; GFX940_DPP-NEXT: [[COPY19:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
1187 ; GFX940_DPP-NEXT: [[V_MOV_B6:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY19]], [[V_ADD_F64_e64_5]], 312, 15, 15, 0, implicit $exec
1188 ; GFX940_DPP-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
1189 ; GFX940_DPP-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub0
1190 ; GFX940_DPP-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub1
1191 ; GFX940_DPP-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY20]], [[S_MOV_B32_2]]
1192 ; GFX940_DPP-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY21]], [[S_MOV_B32_2]]
1193 ; GFX940_DPP-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
1194 ; GFX940_DPP-NEXT: [[COPY22:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
1195 ; GFX940_DPP-NEXT: [[STRICT_WWM:%[0-9]+]]:vreg_64_align2 = STRICT_WWM [[COPY22]], implicit $exec
1196 ; GFX940_DPP-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
1197 ; GFX940_DPP-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY23]], implicit $exec
1198 ; GFX940_DPP-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
1199 ; GFX940_DPP-NEXT: S_BRANCH %bb.3
1200 ; GFX940_DPP-NEXT: {{ $}}
1201 ; GFX940_DPP-NEXT: bb.3 (%ir-block.32):
1202 ; GFX940_DPP-NEXT: successors: %bb.5(0x80000000)
1203 ; GFX940_DPP-NEXT: {{ $}}
1204 ; GFX940_DPP-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
1205 ; GFX940_DPP-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
1206 ; GFX940_DPP-NEXT: S_BRANCH %bb.5
1207 ; GFX940_DPP-NEXT: {{ $}}
1208 ; GFX940_DPP-NEXT: bb.4.Flow:
1209 ; GFX940_DPP-NEXT: successors: %bb.6(0x80000000)
1210 ; GFX940_DPP-NEXT: {{ $}}
1211 ; GFX940_DPP-NEXT: [[PHI:%[0-9]+]]:vreg_64_align2 = PHI %44, %bb.5, [[DEF]], %bb.1
1212 ; GFX940_DPP-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
1213 ; GFX940_DPP-NEXT: S_BRANCH %bb.6
1214 ; GFX940_DPP-NEXT: {{ $}}
1215 ; GFX940_DPP-NEXT: bb.5 (%ir-block.35):
1216 ; GFX940_DPP-NEXT: successors: %bb.4(0x80000000)
1217 ; GFX940_DPP-NEXT: {{ $}}
1218 ; GFX940_DPP-NEXT: [[PHI1:%[0-9]+]]:vreg_64_align2 = PHI [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]], %bb.3, [[DEF]], %bb.2
1219 ; GFX940_DPP-NEXT: SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec
1220 ; GFX940_DPP-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub0
1221 ; GFX940_DPP-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub1
1222 ; GFX940_DPP-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec
1223 ; GFX940_DPP-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec
1224 ; GFX940_DPP-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
1225 ; GFX940_DPP-NEXT: [[STRICT_WWM1:%[0-9]+]]:vreg_64_align2 = STRICT_WWM [[V_MOV_B6]], implicit $exec
1226 ; GFX940_DPP-NEXT: [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE4]]
1227 ; GFX940_DPP-NEXT: [[V_ADD_F64_e64_6:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[COPY26]], 0, [[STRICT_WWM1]], 0, 0, implicit $mode, implicit $exec
1228 ; GFX940_DPP-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]]
1229 ; GFX940_DPP-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_1]]
1230 ; GFX940_DPP-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_6]].sub0
1231 ; GFX940_DPP-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_6]].sub1
1232 ; GFX940_DPP-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY29]], 0, [[COPY27]], [[V_CMP_EQ_U32_e64_]], implicit $exec
1233 ; GFX940_DPP-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY30]], 0, [[COPY28]], [[V_CMP_EQ_U32_e64_]], implicit $exec
1234 ; GFX940_DPP-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_CNDMASK_B32_e64_]], %subreg.sub0, [[V_CNDMASK_B32_e64_1]], %subreg.sub1
1235 ; GFX940_DPP-NEXT: S_BRANCH %bb.4
1236 ; GFX940_DPP-NEXT: {{ $}}
1237 ; GFX940_DPP-NEXT: bb.6 (%ir-block.41):
1238 ; GFX940_DPP-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
1239 ; GFX940_DPP-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
1240 ; GFX940_DPP-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec
1241 ; GFX940_DPP-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]]
1242 ; GFX940_DPP-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec
1243 ; GFX940_DPP-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_3]]
1244 ; GFX940_DPP-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
1245 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic
1249 declare double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1), double)
1250 declare double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1), double)
1252 attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }