Bump version to 19.1.0-rc3
[llvm-project.git] / llvm / test / CodeGen / AMDGPU / GlobalISel / global-atomic-fadd.f64.ll
blobb54aec935bd5ff804e6abc312e4f8f4a734cc1bc
1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefixes=GFX90A,GFX90A_ITERATIVE %s
3 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefixes=GFX90A,GFX90A_DPP %s
4 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefixes=GFX940,GFX940_ITERATIVE %s
5 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefixes=GFX940,GFX940_DPP %s
7 define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_intrinsic(ptr addrspace(1) %ptr, double %data) {
8   ; GFX90A-LABEL: name: global_atomic_fadd_f64_no_rtn_intrinsic
9   ; GFX90A: bb.1 (%ir-block.0):
10   ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
11   ; GFX90A-NEXT: {{  $}}
12   ; GFX90A-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
13   ; GFX90A-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
14   ; GFX90A-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
15   ; GFX90A-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
16   ; GFX90A-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
17   ; GFX90A-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
18   ; GFX90A-NEXT:   GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
19   ; GFX90A-NEXT:   S_ENDPGM 0
20   ;
21   ; GFX940-LABEL: name: global_atomic_fadd_f64_no_rtn_intrinsic
22   ; GFX940: bb.1 (%ir-block.0):
23   ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
24   ; GFX940-NEXT: {{  $}}
25   ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
26   ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
27   ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
28   ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
29   ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
30   ; GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
31   ; GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
32   ; GFX940-NEXT:   S_ENDPGM 0
33   %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
34   ret void
37 define amdgpu_ps double @global_atomic_fadd_f64_rtn_intrinsic(ptr addrspace(1) %ptr, double %data) {
38   ; GFX90A-LABEL: name: global_atomic_fadd_f64_rtn_intrinsic
39   ; GFX90A: bb.1 (%ir-block.0):
40   ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
41   ; GFX90A-NEXT: {{  $}}
42   ; GFX90A-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
43   ; GFX90A-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
44   ; GFX90A-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
45   ; GFX90A-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
46   ; GFX90A-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
47   ; GFX90A-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
48   ; GFX90A-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
49   ; GFX90A-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0
50   ; GFX90A-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1
51   ; GFX90A-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
52   ; GFX90A-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
53   ; GFX90A-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
54   ; GFX90A-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
55   ; GFX90A-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
56   ;
57   ; GFX940-LABEL: name: global_atomic_fadd_f64_rtn_intrinsic
58   ; GFX940: bb.1 (%ir-block.0):
59   ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
60   ; GFX940-NEXT: {{  $}}
61   ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
62   ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
63   ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
64   ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
65   ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
66   ; GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
67   ; GFX940-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
68   ; GFX940-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0
69   ; GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1
70   ; GFX940-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
71   ; GFX940-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
72   ; GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
73   ; GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
74   ; GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
75   %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
76   ret double %ret
79 define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_intrinsic(ptr addrspace(1) inreg %ptr, double %data) {
80   ; GFX90A-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_intrinsic
81   ; GFX90A: bb.1 (%ir-block.0):
82   ; GFX90A-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
83   ; GFX90A-NEXT: {{  $}}
84   ; GFX90A-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
85   ; GFX90A-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
86   ; GFX90A-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
87   ; GFX90A-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
88   ; GFX90A-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
89   ; GFX90A-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
90   ; GFX90A-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
91   ; GFX90A-NEXT:   GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
92   ; GFX90A-NEXT:   S_ENDPGM 0
93   ;
94   ; GFX940-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_intrinsic
95   ; GFX940: bb.1 (%ir-block.0):
96   ; GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
97   ; GFX940-NEXT: {{  $}}
98   ; GFX940-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
99   ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
100   ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
101   ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
102   ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
103   ; GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
104   ; GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
105   ; GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
106   ; GFX940-NEXT:   S_ENDPGM 0
107   %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
108   ret void
111 define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_intrinsic(ptr addrspace(1) inreg %ptr, double %data) {
112   ; GFX90A-LABEL: name: global_atomic_fadd_f64_saddr_rtn_intrinsic
113   ; GFX90A: bb.1 (%ir-block.0):
114   ; GFX90A-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
115   ; GFX90A-NEXT: {{  $}}
116   ; GFX90A-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
117   ; GFX90A-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
118   ; GFX90A-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
119   ; GFX90A-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
120   ; GFX90A-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
121   ; GFX90A-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
122   ; GFX90A-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
123   ; GFX90A-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
124   ; GFX90A-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0
125   ; GFX90A-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1
126   ; GFX90A-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
127   ; GFX90A-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
128   ; GFX90A-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
129   ; GFX90A-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
130   ; GFX90A-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
131   ;
132   ; GFX940-LABEL: name: global_atomic_fadd_f64_saddr_rtn_intrinsic
133   ; GFX940: bb.1 (%ir-block.0):
134   ; GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
135   ; GFX940-NEXT: {{  $}}
136   ; GFX940-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
137   ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
138   ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
139   ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
140   ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
141   ; GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
142   ; GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
143   ; GFX940-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
144   ; GFX940-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0
145   ; GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1
146   ; GFX940-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
147   ; GFX940-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
148   ; GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
149   ; GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
150   ; GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
151   %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
152   ret double %ret
155 define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_flat_intrinsic(ptr addrspace(1) %ptr, double %data) {
156   ; GFX90A-LABEL: name: global_atomic_fadd_f64_no_rtn_flat_intrinsic
157   ; GFX90A: bb.1 (%ir-block.0):
158   ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
159   ; GFX90A-NEXT: {{  $}}
160   ; GFX90A-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
161   ; GFX90A-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
162   ; GFX90A-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
163   ; GFX90A-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
164   ; GFX90A-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
165   ; GFX90A-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
166   ; GFX90A-NEXT:   GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
167   ; GFX90A-NEXT:   S_ENDPGM 0
168   ;
169   ; GFX940-LABEL: name: global_atomic_fadd_f64_no_rtn_flat_intrinsic
170   ; GFX940: bb.1 (%ir-block.0):
171   ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
172   ; GFX940-NEXT: {{  $}}
173   ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
174   ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
175   ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
176   ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
177   ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
178   ; GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
179   ; GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
180   ; GFX940-NEXT:   S_ENDPGM 0
181   %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
182   ret void
185 define amdgpu_ps double @global_atomic_fadd_f64_rtn_flat_intrinsic(ptr addrspace(1) %ptr, double %data) {
186   ; GFX90A-LABEL: name: global_atomic_fadd_f64_rtn_flat_intrinsic
187   ; GFX90A: bb.1 (%ir-block.0):
188   ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
189   ; GFX90A-NEXT: {{  $}}
190   ; GFX90A-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
191   ; GFX90A-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
192   ; GFX90A-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
193   ; GFX90A-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
194   ; GFX90A-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
195   ; GFX90A-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
196   ; GFX90A-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
197   ; GFX90A-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0
198   ; GFX90A-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1
199   ; GFX90A-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
200   ; GFX90A-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
201   ; GFX90A-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
202   ; GFX90A-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
203   ; GFX90A-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
204   ;
205   ; GFX940-LABEL: name: global_atomic_fadd_f64_rtn_flat_intrinsic
206   ; GFX940: bb.1 (%ir-block.0):
207   ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
208   ; GFX940-NEXT: {{  $}}
209   ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
210   ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
211   ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
212   ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
213   ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
214   ; GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
215   ; GFX940-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
216   ; GFX940-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0
217   ; GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1
218   ; GFX940-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
219   ; GFX940-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
220   ; GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
221   ; GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
222   ; GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
223   %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
224   ret double %ret
227 define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_flat_intrinsic(ptr addrspace(1) inreg %ptr, double %data) {
228   ; GFX90A-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_flat_intrinsic
229   ; GFX90A: bb.1 (%ir-block.0):
230   ; GFX90A-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
231   ; GFX90A-NEXT: {{  $}}
232   ; GFX90A-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
233   ; GFX90A-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
234   ; GFX90A-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
235   ; GFX90A-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
236   ; GFX90A-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
237   ; GFX90A-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
238   ; GFX90A-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
239   ; GFX90A-NEXT:   GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
240   ; GFX90A-NEXT:   S_ENDPGM 0
241   ;
242   ; GFX940-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_flat_intrinsic
243   ; GFX940: bb.1 (%ir-block.0):
244   ; GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
245   ; GFX940-NEXT: {{  $}}
246   ; GFX940-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
247   ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
248   ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
249   ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
250   ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
251   ; GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
252   ; GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
253   ; GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
254   ; GFX940-NEXT:   S_ENDPGM 0
255   %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
256   ret void
259 define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_flat_intrinsic(ptr addrspace(1) inreg %ptr, double %data) {
260   ; GFX90A-LABEL: name: global_atomic_fadd_f64_saddr_rtn_flat_intrinsic
261   ; GFX90A: bb.1 (%ir-block.0):
262   ; GFX90A-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
263   ; GFX90A-NEXT: {{  $}}
264   ; GFX90A-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
265   ; GFX90A-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
266   ; GFX90A-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
267   ; GFX90A-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
268   ; GFX90A-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
269   ; GFX90A-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
270   ; GFX90A-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
271   ; GFX90A-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
272   ; GFX90A-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0
273   ; GFX90A-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1
274   ; GFX90A-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
275   ; GFX90A-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
276   ; GFX90A-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
277   ; GFX90A-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
278   ; GFX90A-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
279   ;
280   ; GFX940-LABEL: name: global_atomic_fadd_f64_saddr_rtn_flat_intrinsic
281   ; GFX940: bb.1 (%ir-block.0):
282   ; GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
283   ; GFX940-NEXT: {{  $}}
284   ; GFX940-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
285   ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
286   ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
287   ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
288   ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
289   ; GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
290   ; GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
291   ; GFX940-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
292   ; GFX940-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0
293   ; GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1
294   ; GFX940-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
295   ; GFX940-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
296   ; GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
297   ; GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
298   ; GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
299   %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
300   ret double %ret
303 define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) #0 {
304   ; GFX90A-LABEL: name: global_atomic_fadd_f64_no_rtn_atomicrmw
305   ; GFX90A: bb.1 (%ir-block.0):
306   ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
307   ; GFX90A-NEXT: {{  $}}
308   ; GFX90A-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
309   ; GFX90A-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
310   ; GFX90A-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
311   ; GFX90A-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
312   ; GFX90A-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
313   ; GFX90A-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
314   ; GFX90A-NEXT:   GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
315   ; GFX90A-NEXT:   S_ENDPGM 0
316   ;
317   ; GFX940-LABEL: name: global_atomic_fadd_f64_no_rtn_atomicrmw
318   ; GFX940: bb.1 (%ir-block.0):
319   ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
320   ; GFX940-NEXT: {{  $}}
321   ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
322   ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
323   ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
324   ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
325   ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
326   ; GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
327   ; GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
328   ; GFX940-NEXT:   S_ENDPGM 0
329   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic
330   ret void
333 define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) #0 {
334   ; GFX90A-LABEL: name: global_atomic_fadd_f64_rtn_atomicrmw
335   ; GFX90A: bb.1 (%ir-block.0):
336   ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
337   ; GFX90A-NEXT: {{  $}}
338   ; GFX90A-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
339   ; GFX90A-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
340   ; GFX90A-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
341   ; GFX90A-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
342   ; GFX90A-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
343   ; GFX90A-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
344   ; GFX90A-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
345   ; GFX90A-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0
346   ; GFX90A-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1
347   ; GFX90A-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
348   ; GFX90A-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
349   ; GFX90A-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
350   ; GFX90A-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
351   ; GFX90A-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
352   ;
353   ; GFX940-LABEL: name: global_atomic_fadd_f64_rtn_atomicrmw
354   ; GFX940: bb.1 (%ir-block.0):
355   ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
356   ; GFX940-NEXT: {{  $}}
357   ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
358   ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
359   ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
360   ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
361   ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
362   ; GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
363   ; GFX940-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
364   ; GFX940-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0
365   ; GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1
366   ; GFX940-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
367   ; GFX940-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
368   ; GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
369   ; GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
370   ; GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
371   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic
372   ret double %ret
375 define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) #0 {
376   ; GFX90A_ITERATIVE-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_atomicrmw
377   ; GFX90A_ITERATIVE: bb.1 (%ir-block.0):
378   ; GFX90A_ITERATIVE-NEXT:   successors: %bb.2(0x40000000), %bb.6(0x40000000)
379   ; GFX90A_ITERATIVE-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
380   ; GFX90A_ITERATIVE-NEXT: {{  $}}
381   ; GFX90A_ITERATIVE-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
382   ; GFX90A_ITERATIVE-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
383   ; GFX90A_ITERATIVE-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
384   ; GFX90A_ITERATIVE-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
385   ; GFX90A_ITERATIVE-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
386   ; GFX90A_ITERATIVE-NEXT:   [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
387   ; GFX90A_ITERATIVE-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
388   ; GFX90A_ITERATIVE-NEXT:   S_BRANCH %bb.2
389   ; GFX90A_ITERATIVE-NEXT: {{  $}}
390   ; GFX90A_ITERATIVE-NEXT: bb.2 (%ir-block.5):
391   ; GFX90A_ITERATIVE-NEXT:   successors: %bb.7(0x80000000)
392   ; GFX90A_ITERATIVE-NEXT: {{  $}}
393   ; GFX90A_ITERATIVE-NEXT:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
394   ; GFX90A_ITERATIVE-NEXT:   [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
395   ; GFX90A_ITERATIVE-NEXT:   S_BRANCH %bb.7
396   ; GFX90A_ITERATIVE-NEXT: {{  $}}
397   ; GFX90A_ITERATIVE-NEXT: bb.3 (%ir-block.7):
398   ; GFX90A_ITERATIVE-NEXT:   successors: %bb.4(0x80000000)
399   ; GFX90A_ITERATIVE-NEXT: {{  $}}
400   ; GFX90A_ITERATIVE-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
401   ; GFX90A_ITERATIVE-NEXT:   GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], %25, [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
402   ; GFX90A_ITERATIVE-NEXT: {{  $}}
403   ; GFX90A_ITERATIVE-NEXT: bb.4.Flow:
404   ; GFX90A_ITERATIVE-NEXT:   successors: %bb.6(0x80000000)
405   ; GFX90A_ITERATIVE-NEXT: {{  $}}
406   ; GFX90A_ITERATIVE-NEXT:   SI_END_CF %35, implicit-def $exec, implicit-def $scc, implicit $exec
407   ; GFX90A_ITERATIVE-NEXT:   S_BRANCH %bb.6
408   ; GFX90A_ITERATIVE-NEXT: {{  $}}
409   ; GFX90A_ITERATIVE-NEXT: bb.5 (%ir-block.9):
410   ; GFX90A_ITERATIVE-NEXT:   S_ENDPGM 0
411   ; GFX90A_ITERATIVE-NEXT: {{  $}}
412   ; GFX90A_ITERATIVE-NEXT: bb.6.Flow1:
413   ; GFX90A_ITERATIVE-NEXT:   successors: %bb.5(0x80000000)
414   ; GFX90A_ITERATIVE-NEXT: {{  $}}
415   ; GFX90A_ITERATIVE-NEXT:   SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
416   ; GFX90A_ITERATIVE-NEXT:   S_BRANCH %bb.5
417   ; GFX90A_ITERATIVE-NEXT: {{  $}}
418   ; GFX90A_ITERATIVE-NEXT: bb.7.ComputeLoop:
419   ; GFX90A_ITERATIVE-NEXT:   successors: %bb.8(0x04000000), %bb.7(0x7c000000)
420   ; GFX90A_ITERATIVE-NEXT: {{  $}}
421   ; GFX90A_ITERATIVE-NEXT:   [[PHI:%[0-9]+]]:vreg_64_align2 = PHI %17, %bb.7, [[S_MOV_B]], %bb.2
422   ; GFX90A_ITERATIVE-NEXT:   [[PHI1:%[0-9]+]]:vreg_64_align2 = PHI %22, %bb.7, [[COPY4]], %bb.2
423   ; GFX90A_ITERATIVE-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub0
424   ; GFX90A_ITERATIVE-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub1
425   ; GFX90A_ITERATIVE-NEXT:   [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY5]], implicit $exec
426   ; GFX90A_ITERATIVE-NEXT:   [[V_FFBL_B32_e64_1:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY6]], implicit $exec
427   ; GFX90A_ITERATIVE-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
428   ; GFX90A_ITERATIVE-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_FFBL_B32_e64_1]], [[V_MOV_B32_e32_1]], 0, implicit $exec
429   ; GFX90A_ITERATIVE-NEXT:   [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_FFBL_B32_e64_]], [[V_ADD_U32_e64_]], implicit $exec
430   ; GFX90A_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
431   ; GFX90A_ITERATIVE-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY2]], [[V_READFIRSTLANE_B32_]]
432   ; GFX90A_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
433   ; GFX90A_ITERATIVE-NEXT:   [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY3]], [[V_READFIRSTLANE_B32_1]]
434   ; GFX90A_ITERATIVE-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
435   ; GFX90A_ITERATIVE-NEXT:   [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
436   ; GFX90A_ITERATIVE-NEXT:   [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI]], 0, [[COPY7]], 0, 0, implicit $mode, implicit $exec
437   ; GFX90A_ITERATIVE-NEXT:   [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
438   ; GFX90A_ITERATIVE-NEXT:   [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B1]]
439   ; GFX90A_ITERATIVE-NEXT:   [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[V_MIN_U32_e64_]], [[COPY8]], implicit $exec
440   ; GFX90A_ITERATIVE-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
441   ; GFX90A_ITERATIVE-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
442   ; GFX90A_ITERATIVE-NEXT:   [[V_NOT_B32_e32_:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[COPY9]], implicit $exec
443   ; GFX90A_ITERATIVE-NEXT:   [[V_NOT_B32_e32_1:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[COPY10]], implicit $exec
444   ; GFX90A_ITERATIVE-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub0
445   ; GFX90A_ITERATIVE-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub1
446   ; GFX90A_ITERATIVE-NEXT:   [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY11]], [[V_NOT_B32_e32_]], implicit $exec
447   ; GFX90A_ITERATIVE-NEXT:   [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY12]], [[V_NOT_B32_e32_1]], implicit $exec
448   ; GFX90A_ITERATIVE-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
449   ; GFX90A_ITERATIVE-NEXT:   [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
450   ; GFX90A_ITERATIVE-NEXT:   [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B2]]
451   ; GFX90A_ITERATIVE-NEXT:   [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U64_e64 [[REG_SEQUENCE2]], [[COPY13]], implicit $exec
452   ; GFX90A_ITERATIVE-NEXT:   $vcc = COPY [[V_CMP_NE_U64_e64_]]
453   ; GFX90A_ITERATIVE-NEXT:   S_CBRANCH_VCCNZ %bb.7, implicit $vcc
454   ; GFX90A_ITERATIVE-NEXT:   S_BRANCH %bb.8
455   ; GFX90A_ITERATIVE-NEXT: {{  $}}
456   ; GFX90A_ITERATIVE-NEXT: bb.8.ComputeEnd:
457   ; GFX90A_ITERATIVE-NEXT:   successors: %bb.3(0x40000000), %bb.4(0x40000000)
458   ; GFX90A_ITERATIVE-NEXT: {{  $}}
459   ; GFX90A_ITERATIVE-NEXT:   [[PHI2:%[0-9]+]]:vreg_64_align2 = PHI [[V_ADD_F64_e64_]], %bb.7
460   ; GFX90A_ITERATIVE-NEXT:   [[COPY14:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
461   ; GFX90A_ITERATIVE-NEXT:   [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
462   ; GFX90A_ITERATIVE-NEXT:   [[COPY16:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
463   ; GFX90A_ITERATIVE-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
464   ; GFX90A_ITERATIVE-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY16]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
465   ; GFX90A_ITERATIVE-NEXT:   [[COPY17:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE3]].sub0
466   ; GFX90A_ITERATIVE-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
467   ; GFX90A_ITERATIVE-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
468   ; GFX90A_ITERATIVE-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
469   ; GFX90A_ITERATIVE-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY18]], [[COPY19]], implicit $exec
470   ; GFX90A_ITERATIVE-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY17]]
471   ; GFX90A_ITERATIVE-NEXT:   [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY20]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
472   ; GFX90A_ITERATIVE-NEXT:   [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
473   ; GFX90A_ITERATIVE-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY21]], implicit $exec
474   ; GFX90A_ITERATIVE-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
475   ; GFX90A_ITERATIVE-NEXT:   S_BRANCH %bb.3
476   ;
477   ; GFX90A_DPP-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_atomicrmw
478   ; GFX90A_DPP: bb.1 (%ir-block.0):
479   ; GFX90A_DPP-NEXT:   successors: %bb.2(0x40000000), %bb.5(0x40000000)
480   ; GFX90A_DPP-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
481   ; GFX90A_DPP-NEXT: {{  $}}
482   ; GFX90A_DPP-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
483   ; GFX90A_DPP-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
484   ; GFX90A_DPP-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
485   ; GFX90A_DPP-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
486   ; GFX90A_DPP-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
487   ; GFX90A_DPP-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
488   ; GFX90A_DPP-NEXT:   [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
489   ; GFX90A_DPP-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
490   ; GFX90A_DPP-NEXT:   S_BRANCH %bb.2
491   ; GFX90A_DPP-NEXT: {{  $}}
492   ; GFX90A_DPP-NEXT: bb.2 (%ir-block.5):
493   ; GFX90A_DPP-NEXT:   successors: %bb.3(0x40000000), %bb.4(0x40000000)
494   ; GFX90A_DPP-NEXT: {{  $}}
495   ; GFX90A_DPP-NEXT:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
496   ; GFX90A_DPP-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
497   ; GFX90A_DPP-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
498   ; GFX90A_DPP-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
499   ; GFX90A_DPP-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
500   ; GFX90A_DPP-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
501   ; GFX90A_DPP-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE2]].sub0
502   ; GFX90A_DPP-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
503   ; GFX90A_DPP-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
504   ; GFX90A_DPP-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
505   ; GFX90A_DPP-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY9]], [[COPY10]], implicit $exec
506   ; GFX90A_DPP-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
507   ; GFX90A_DPP-NEXT:   [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY11]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
508   ; GFX90A_DPP-NEXT:   [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
509   ; GFX90A_DPP-NEXT:   [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
510   ; GFX90A_DPP-NEXT:   [[V_SET_INACTIVE_B64_:%[0-9]+]]:vreg_64_align2 = V_SET_INACTIVE_B64 [[REG_SEQUENCE1]], [[COPY12]], implicit-def dead $scc, implicit $exec
511   ; GFX90A_DPP-NEXT:   [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
512   ; GFX90A_DPP-NEXT:   [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY13]], [[V_SET_INACTIVE_B64_]], 273, 15, 15, 0, implicit $exec
513   ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_SET_INACTIVE_B64_]], 0, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec
514   ; GFX90A_DPP-NEXT:   [[COPY14:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
515   ; GFX90A_DPP-NEXT:   [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY14]], [[V_ADD_F64_e64_]], 274, 15, 15, 0, implicit $exec
516   ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_]], 0, [[V_MOV_B1]], 0, 0, implicit $mode, implicit $exec
517   ; GFX90A_DPP-NEXT:   [[COPY15:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
518   ; GFX90A_DPP-NEXT:   [[V_MOV_B2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY15]], [[V_ADD_F64_e64_1]], 276, 15, 15, 0, implicit $exec
519   ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_1]], 0, [[V_MOV_B2]], 0, 0, implicit $mode, implicit $exec
520   ; GFX90A_DPP-NEXT:   [[COPY16:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
521   ; GFX90A_DPP-NEXT:   [[V_MOV_B3:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY16]], [[V_ADD_F64_e64_2]], 280, 15, 15, 0, implicit $exec
522   ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_3:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_2]], 0, [[V_MOV_B3]], 0, 0, implicit $mode, implicit $exec
523   ; GFX90A_DPP-NEXT:   [[COPY17:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
524   ; GFX90A_DPP-NEXT:   [[V_MOV_B4:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY17]], [[V_ADD_F64_e64_3]], 322, 10, 15, 0, implicit $exec
525   ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_4:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_3]], 0, [[V_MOV_B4]], 0, 0, implicit $mode, implicit $exec
526   ; GFX90A_DPP-NEXT:   [[COPY18:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
527   ; GFX90A_DPP-NEXT:   [[V_MOV_B5:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY18]], [[V_ADD_F64_e64_4]], 323, 12, 15, 0, implicit $exec
528   ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_5:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_4]], 0, [[V_MOV_B5]], 0, 0, implicit $mode, implicit $exec
529   ; GFX90A_DPP-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
530   ; GFX90A_DPP-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub0
531   ; GFX90A_DPP-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub1
532   ; GFX90A_DPP-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY19]], [[S_MOV_B32_2]]
533   ; GFX90A_DPP-NEXT:   [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY20]], [[S_MOV_B32_2]]
534   ; GFX90A_DPP-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
535   ; GFX90A_DPP-NEXT:   [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
536   ; GFX90A_DPP-NEXT:   [[STRICT_WWM:%[0-9]+]]:vreg_64_align2 = STRICT_WWM [[COPY21]], implicit $exec
537   ; GFX90A_DPP-NEXT:   [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
538   ; GFX90A_DPP-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY22]], implicit $exec
539   ; GFX90A_DPP-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
540   ; GFX90A_DPP-NEXT:   S_BRANCH %bb.3
541   ; GFX90A_DPP-NEXT: {{  $}}
542   ; GFX90A_DPP-NEXT: bb.3 (%ir-block.31):
543   ; GFX90A_DPP-NEXT:   successors: %bb.4(0x80000000)
544   ; GFX90A_DPP-NEXT: {{  $}}
545   ; GFX90A_DPP-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
546   ; GFX90A_DPP-NEXT:   GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
547   ; GFX90A_DPP-NEXT: {{  $}}
548   ; GFX90A_DPP-NEXT: bb.4.Flow:
549   ; GFX90A_DPP-NEXT:   successors: %bb.5(0x80000000)
550   ; GFX90A_DPP-NEXT: {{  $}}
551   ; GFX90A_DPP-NEXT:   SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec
552   ; GFX90A_DPP-NEXT: {{  $}}
553   ; GFX90A_DPP-NEXT: bb.5 (%ir-block.33):
554   ; GFX90A_DPP-NEXT:   SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
555   ; GFX90A_DPP-NEXT:   S_ENDPGM 0
556   ;
557   ; GFX940_ITERATIVE-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_atomicrmw
558   ; GFX940_ITERATIVE: bb.1 (%ir-block.0):
559   ; GFX940_ITERATIVE-NEXT:   successors: %bb.2(0x40000000), %bb.6(0x40000000)
560   ; GFX940_ITERATIVE-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
561   ; GFX940_ITERATIVE-NEXT: {{  $}}
562   ; GFX940_ITERATIVE-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
563   ; GFX940_ITERATIVE-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
564   ; GFX940_ITERATIVE-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
565   ; GFX940_ITERATIVE-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
566   ; GFX940_ITERATIVE-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
567   ; GFX940_ITERATIVE-NEXT:   [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
568   ; GFX940_ITERATIVE-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
569   ; GFX940_ITERATIVE-NEXT:   S_BRANCH %bb.2
570   ; GFX940_ITERATIVE-NEXT: {{  $}}
571   ; GFX940_ITERATIVE-NEXT: bb.2 (%ir-block.5):
572   ; GFX940_ITERATIVE-NEXT:   successors: %bb.7(0x80000000)
573   ; GFX940_ITERATIVE-NEXT: {{  $}}
574   ; GFX940_ITERATIVE-NEXT:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
575   ; GFX940_ITERATIVE-NEXT:   [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
576   ; GFX940_ITERATIVE-NEXT:   S_BRANCH %bb.7
577   ; GFX940_ITERATIVE-NEXT: {{  $}}
578   ; GFX940_ITERATIVE-NEXT: bb.3 (%ir-block.7):
579   ; GFX940_ITERATIVE-NEXT:   successors: %bb.4(0x80000000)
580   ; GFX940_ITERATIVE-NEXT: {{  $}}
581   ; GFX940_ITERATIVE-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
582   ; GFX940_ITERATIVE-NEXT:   GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], %24, [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
583   ; GFX940_ITERATIVE-NEXT: {{  $}}
584   ; GFX940_ITERATIVE-NEXT: bb.4.Flow:
585   ; GFX940_ITERATIVE-NEXT:   successors: %bb.6(0x80000000)
586   ; GFX940_ITERATIVE-NEXT: {{  $}}
587   ; GFX940_ITERATIVE-NEXT:   SI_END_CF %34, implicit-def $exec, implicit-def $scc, implicit $exec
588   ; GFX940_ITERATIVE-NEXT:   S_BRANCH %bb.6
589   ; GFX940_ITERATIVE-NEXT: {{  $}}
590   ; GFX940_ITERATIVE-NEXT: bb.5 (%ir-block.9):
591   ; GFX940_ITERATIVE-NEXT:   S_ENDPGM 0
592   ; GFX940_ITERATIVE-NEXT: {{  $}}
593   ; GFX940_ITERATIVE-NEXT: bb.6.Flow1:
594   ; GFX940_ITERATIVE-NEXT:   successors: %bb.5(0x80000000)
595   ; GFX940_ITERATIVE-NEXT: {{  $}}
596   ; GFX940_ITERATIVE-NEXT:   SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
597   ; GFX940_ITERATIVE-NEXT:   S_BRANCH %bb.5
598   ; GFX940_ITERATIVE-NEXT: {{  $}}
599   ; GFX940_ITERATIVE-NEXT: bb.7.ComputeLoop:
600   ; GFX940_ITERATIVE-NEXT:   successors: %bb.8(0x04000000), %bb.7(0x7c000000)
601   ; GFX940_ITERATIVE-NEXT: {{  $}}
602   ; GFX940_ITERATIVE-NEXT:   [[PHI:%[0-9]+]]:vreg_64_align2 = PHI %16, %bb.7, [[S_MOV_B]], %bb.2
603   ; GFX940_ITERATIVE-NEXT:   [[PHI1:%[0-9]+]]:vreg_64_align2 = PHI %21, %bb.7, [[COPY4]], %bb.2
604   ; GFX940_ITERATIVE-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub0
605   ; GFX940_ITERATIVE-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub1
606   ; GFX940_ITERATIVE-NEXT:   [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY5]], implicit $exec
607   ; GFX940_ITERATIVE-NEXT:   [[V_FFBL_B32_e64_1:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY6]], implicit $exec
608   ; GFX940_ITERATIVE-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
609   ; GFX940_ITERATIVE-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_FFBL_B32_e64_1]], [[V_MOV_B32_e32_1]], 0, implicit $exec
610   ; GFX940_ITERATIVE-NEXT:   [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_FFBL_B32_e64_]], [[V_ADD_U32_e64_]], implicit $exec
611   ; GFX940_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
612   ; GFX940_ITERATIVE-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY2]], [[V_READFIRSTLANE_B32_]]
613   ; GFX940_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
614   ; GFX940_ITERATIVE-NEXT:   [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY3]], [[V_READFIRSTLANE_B32_1]]
615   ; GFX940_ITERATIVE-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
616   ; GFX940_ITERATIVE-NEXT:   [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
617   ; GFX940_ITERATIVE-NEXT:   [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI]], 0, [[COPY7]], 0, 0, implicit $mode, implicit $exec
618   ; GFX940_ITERATIVE-NEXT:   [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
619   ; GFX940_ITERATIVE-NEXT:   [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B1]]
620   ; GFX940_ITERATIVE-NEXT:   [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[V_MIN_U32_e64_]], [[COPY8]], implicit $exec
621   ; GFX940_ITERATIVE-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
622   ; GFX940_ITERATIVE-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
623   ; GFX940_ITERATIVE-NEXT:   [[V_NOT_B32_e32_:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[COPY9]], implicit $exec
624   ; GFX940_ITERATIVE-NEXT:   [[V_NOT_B32_e32_1:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[COPY10]], implicit $exec
625   ; GFX940_ITERATIVE-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub0
626   ; GFX940_ITERATIVE-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub1
627   ; GFX940_ITERATIVE-NEXT:   [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY11]], [[V_NOT_B32_e32_]], implicit $exec
628   ; GFX940_ITERATIVE-NEXT:   [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY12]], [[V_NOT_B32_e32_1]], implicit $exec
629   ; GFX940_ITERATIVE-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
630   ; GFX940_ITERATIVE-NEXT:   [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
631   ; GFX940_ITERATIVE-NEXT:   [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B2]]
632   ; GFX940_ITERATIVE-NEXT:   [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U64_e64 [[REG_SEQUENCE2]], [[COPY13]], implicit $exec
633   ; GFX940_ITERATIVE-NEXT:   $vcc = COPY [[V_CMP_NE_U64_e64_]]
634   ; GFX940_ITERATIVE-NEXT:   S_CBRANCH_VCCNZ %bb.7, implicit $vcc
635   ; GFX940_ITERATIVE-NEXT:   S_BRANCH %bb.8
636   ; GFX940_ITERATIVE-NEXT: {{  $}}
637   ; GFX940_ITERATIVE-NEXT: bb.8.ComputeEnd:
638   ; GFX940_ITERATIVE-NEXT:   successors: %bb.3(0x40000000), %bb.4(0x40000000)
639   ; GFX940_ITERATIVE-NEXT: {{  $}}
640   ; GFX940_ITERATIVE-NEXT:   [[PHI2:%[0-9]+]]:vreg_64_align2 = PHI [[V_ADD_F64_e64_]], %bb.7
641   ; GFX940_ITERATIVE-NEXT:   [[COPY14:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
642   ; GFX940_ITERATIVE-NEXT:   [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
643   ; GFX940_ITERATIVE-NEXT:   [[COPY16:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
644   ; GFX940_ITERATIVE-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
645   ; GFX940_ITERATIVE-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY16]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
646   ; GFX940_ITERATIVE-NEXT:   [[COPY17:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE3]].sub0
647   ; GFX940_ITERATIVE-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
648   ; GFX940_ITERATIVE-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
649   ; GFX940_ITERATIVE-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
650   ; GFX940_ITERATIVE-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY18]], [[COPY19]], implicit $exec
651   ; GFX940_ITERATIVE-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY17]]
652   ; GFX940_ITERATIVE-NEXT:   [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY20]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
653   ; GFX940_ITERATIVE-NEXT:   [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
654   ; GFX940_ITERATIVE-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY21]], implicit $exec
655   ; GFX940_ITERATIVE-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
656   ; GFX940_ITERATIVE-NEXT:   S_BRANCH %bb.3
657   ;
658   ; GFX940_DPP-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_atomicrmw
659   ; GFX940_DPP: bb.1 (%ir-block.0):
660   ; GFX940_DPP-NEXT:   successors: %bb.2(0x40000000), %bb.5(0x40000000)
661   ; GFX940_DPP-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
662   ; GFX940_DPP-NEXT: {{  $}}
663   ; GFX940_DPP-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
664   ; GFX940_DPP-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
665   ; GFX940_DPP-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
666   ; GFX940_DPP-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
667   ; GFX940_DPP-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
668   ; GFX940_DPP-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
669   ; GFX940_DPP-NEXT:   [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
670   ; GFX940_DPP-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
671   ; GFX940_DPP-NEXT:   S_BRANCH %bb.2
672   ; GFX940_DPP-NEXT: {{  $}}
673   ; GFX940_DPP-NEXT: bb.2 (%ir-block.5):
674   ; GFX940_DPP-NEXT:   successors: %bb.3(0x40000000), %bb.4(0x40000000)
675   ; GFX940_DPP-NEXT: {{  $}}
676   ; GFX940_DPP-NEXT:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
677   ; GFX940_DPP-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
678   ; GFX940_DPP-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
679   ; GFX940_DPP-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
680   ; GFX940_DPP-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
681   ; GFX940_DPP-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
682   ; GFX940_DPP-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE2]].sub0
683   ; GFX940_DPP-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
684   ; GFX940_DPP-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
685   ; GFX940_DPP-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
686   ; GFX940_DPP-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY9]], [[COPY10]], implicit $exec
687   ; GFX940_DPP-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
688   ; GFX940_DPP-NEXT:   [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY11]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
689   ; GFX940_DPP-NEXT:   [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
690   ; GFX940_DPP-NEXT:   [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
691   ; GFX940_DPP-NEXT:   [[V_SET_INACTIVE_B64_:%[0-9]+]]:vreg_64_align2 = V_SET_INACTIVE_B64 [[REG_SEQUENCE1]], [[COPY12]], implicit-def dead $scc, implicit $exec
692   ; GFX940_DPP-NEXT:   [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
693   ; GFX940_DPP-NEXT:   [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY13]], [[V_SET_INACTIVE_B64_]], 273, 15, 15, 0, implicit $exec
694   ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_SET_INACTIVE_B64_]], 0, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec
695   ; GFX940_DPP-NEXT:   [[COPY14:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
696   ; GFX940_DPP-NEXT:   [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY14]], [[V_ADD_F64_e64_]], 274, 15, 15, 0, implicit $exec
697   ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_]], 0, [[V_MOV_B1]], 0, 0, implicit $mode, implicit $exec
698   ; GFX940_DPP-NEXT:   [[COPY15:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
699   ; GFX940_DPP-NEXT:   [[V_MOV_B2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY15]], [[V_ADD_F64_e64_1]], 276, 15, 15, 0, implicit $exec
700   ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_1]], 0, [[V_MOV_B2]], 0, 0, implicit $mode, implicit $exec
701   ; GFX940_DPP-NEXT:   [[COPY16:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
702   ; GFX940_DPP-NEXT:   [[V_MOV_B3:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY16]], [[V_ADD_F64_e64_2]], 280, 15, 15, 0, implicit $exec
703   ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_3:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_2]], 0, [[V_MOV_B3]], 0, 0, implicit $mode, implicit $exec
704   ; GFX940_DPP-NEXT:   [[COPY17:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
705   ; GFX940_DPP-NEXT:   [[V_MOV_B4:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY17]], [[V_ADD_F64_e64_3]], 322, 10, 15, 0, implicit $exec
706   ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_4:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_3]], 0, [[V_MOV_B4]], 0, 0, implicit $mode, implicit $exec
707   ; GFX940_DPP-NEXT:   [[COPY18:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
708   ; GFX940_DPP-NEXT:   [[V_MOV_B5:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY18]], [[V_ADD_F64_e64_4]], 323, 12, 15, 0, implicit $exec
709   ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_5:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_4]], 0, [[V_MOV_B5]], 0, 0, implicit $mode, implicit $exec
710   ; GFX940_DPP-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
711   ; GFX940_DPP-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub0
712   ; GFX940_DPP-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub1
713   ; GFX940_DPP-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY19]], [[S_MOV_B32_2]]
714   ; GFX940_DPP-NEXT:   [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY20]], [[S_MOV_B32_2]]
715   ; GFX940_DPP-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
716   ; GFX940_DPP-NEXT:   [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
717   ; GFX940_DPP-NEXT:   [[STRICT_WWM:%[0-9]+]]:vreg_64_align2 = STRICT_WWM [[COPY21]], implicit $exec
718   ; GFX940_DPP-NEXT:   [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
719   ; GFX940_DPP-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY22]], implicit $exec
720   ; GFX940_DPP-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
721   ; GFX940_DPP-NEXT:   S_BRANCH %bb.3
722   ; GFX940_DPP-NEXT: {{  $}}
723   ; GFX940_DPP-NEXT: bb.3 (%ir-block.31):
724   ; GFX940_DPP-NEXT:   successors: %bb.4(0x80000000)
725   ; GFX940_DPP-NEXT: {{  $}}
726   ; GFX940_DPP-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
727   ; GFX940_DPP-NEXT:   GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
728   ; GFX940_DPP-NEXT: {{  $}}
729   ; GFX940_DPP-NEXT: bb.4.Flow:
730   ; GFX940_DPP-NEXT:   successors: %bb.5(0x80000000)
731   ; GFX940_DPP-NEXT: {{  $}}
732   ; GFX940_DPP-NEXT:   SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec
733   ; GFX940_DPP-NEXT: {{  $}}
734   ; GFX940_DPP-NEXT: bb.5 (%ir-block.33):
735   ; GFX940_DPP-NEXT:   SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
736   ; GFX940_DPP-NEXT:   S_ENDPGM 0
737   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic
738   ret void
741 define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) #0 {
742   ; GFX90A_ITERATIVE-LABEL: name: global_atomic_fadd_f64_saddr_rtn_atomicrmw
743   ; GFX90A_ITERATIVE: bb.1 (%ir-block.0):
744   ; GFX90A_ITERATIVE-NEXT:   successors: %bb.2(0x40000000), %bb.6(0x40000000)
745   ; GFX90A_ITERATIVE-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
746   ; GFX90A_ITERATIVE-NEXT: {{  $}}
747   ; GFX90A_ITERATIVE-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
748   ; GFX90A_ITERATIVE-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
749   ; GFX90A_ITERATIVE-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
750   ; GFX90A_ITERATIVE-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
751   ; GFX90A_ITERATIVE-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
752   ; GFX90A_ITERATIVE-NEXT:   [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
753   ; GFX90A_ITERATIVE-NEXT:   [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
754   ; GFX90A_ITERATIVE-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
755   ; GFX90A_ITERATIVE-NEXT:   S_BRANCH %bb.2
756   ; GFX90A_ITERATIVE-NEXT: {{  $}}
757   ; GFX90A_ITERATIVE-NEXT: bb.2 (%ir-block.5):
758   ; GFX90A_ITERATIVE-NEXT:   successors: %bb.7(0x80000000)
759   ; GFX90A_ITERATIVE-NEXT: {{  $}}
760   ; GFX90A_ITERATIVE-NEXT:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
761   ; GFX90A_ITERATIVE-NEXT:   [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
762   ; GFX90A_ITERATIVE-NEXT:   S_BRANCH %bb.7
763   ; GFX90A_ITERATIVE-NEXT: {{  $}}
764   ; GFX90A_ITERATIVE-NEXT: bb.3 (%ir-block.7):
765   ; GFX90A_ITERATIVE-NEXT:   successors: %bb.4(0x80000000)
766   ; GFX90A_ITERATIVE-NEXT: {{  $}}
767   ; GFX90A_ITERATIVE-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
768   ; GFX90A_ITERATIVE-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], %28, [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
769   ; GFX90A_ITERATIVE-NEXT: {{  $}}
770   ; GFX90A_ITERATIVE-NEXT: bb.4 (%ir-block.9):
771   ; GFX90A_ITERATIVE-NEXT:   successors: %bb.6(0x80000000)
772   ; GFX90A_ITERATIVE-NEXT: {{  $}}
773   ; GFX90A_ITERATIVE-NEXT:   [[PHI:%[0-9]+]]:vreg_64_align2 = PHI [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]], %bb.3, [[DEF]], %bb.8
774   ; GFX90A_ITERATIVE-NEXT:   SI_END_CF %38, implicit-def $exec, implicit-def $scc, implicit $exec
775   ; GFX90A_ITERATIVE-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
776   ; GFX90A_ITERATIVE-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
777   ; GFX90A_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
778   ; GFX90A_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
779   ; GFX90A_ITERATIVE-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
780   ; GFX90A_ITERATIVE-NEXT:   [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
781   ; GFX90A_ITERATIVE-NEXT:   [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[COPY7]], 0, %27, 0, 0, implicit $mode, implicit $exec
782   ; GFX90A_ITERATIVE-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]]
783   ; GFX90A_ITERATIVE-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_1]]
784   ; GFX90A_ITERATIVE-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_]].sub0
785   ; GFX90A_ITERATIVE-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_]].sub1
786   ; GFX90A_ITERATIVE-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY10]], 0, [[COPY8]], %36, implicit $exec
787   ; GFX90A_ITERATIVE-NEXT:   [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY11]], 0, [[COPY9]], %36, implicit $exec
788   ; GFX90A_ITERATIVE-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_CNDMASK_B32_e64_]], %subreg.sub0, [[V_CNDMASK_B32_e64_1]], %subreg.sub1
789   ; GFX90A_ITERATIVE-NEXT:   S_BRANCH %bb.6
790   ; GFX90A_ITERATIVE-NEXT: {{  $}}
791   ; GFX90A_ITERATIVE-NEXT: bb.5 (%ir-block.14):
792   ; GFX90A_ITERATIVE-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY %44.sub0
793   ; GFX90A_ITERATIVE-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY %44.sub1
794   ; GFX90A_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
795   ; GFX90A_ITERATIVE-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]]
796   ; GFX90A_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
797   ; GFX90A_ITERATIVE-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_3]]
798   ; GFX90A_ITERATIVE-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
799   ; GFX90A_ITERATIVE-NEXT: {{  $}}
800   ; GFX90A_ITERATIVE-NEXT: bb.6.Flow:
801   ; GFX90A_ITERATIVE-NEXT:   successors: %bb.5(0x80000000)
802   ; GFX90A_ITERATIVE-NEXT: {{  $}}
803   ; GFX90A_ITERATIVE-NEXT:   [[PHI1:%[0-9]+]]:vreg_64_align2 = PHI [[REG_SEQUENCE2]], %bb.4, [[DEF]], %bb.1
804   ; GFX90A_ITERATIVE-NEXT:   SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
805   ; GFX90A_ITERATIVE-NEXT:   S_BRANCH %bb.5
806   ; GFX90A_ITERATIVE-NEXT: {{  $}}
807   ; GFX90A_ITERATIVE-NEXT: bb.7.ComputeLoop:
808   ; GFX90A_ITERATIVE-NEXT:   successors: %bb.8(0x04000000), %bb.7(0x7c000000)
809   ; GFX90A_ITERATIVE-NEXT: {{  $}}
810   ; GFX90A_ITERATIVE-NEXT:   [[PHI2:%[0-9]+]]:vreg_64_align2 = PHI %19, %bb.7, [[S_MOV_B]], %bb.2
811   ; GFX90A_ITERATIVE-NEXT:   [[PHI3:%[0-9]+]]:vreg_64_align2 = PHI %18, %bb.7, [[DEF]], %bb.2
812   ; GFX90A_ITERATIVE-NEXT:   [[PHI4:%[0-9]+]]:vreg_64_align2 = PHI %24, %bb.7, [[COPY4]], %bb.2
813   ; GFX90A_ITERATIVE-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[PHI4]].sub0
814   ; GFX90A_ITERATIVE-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[PHI4]].sub1
815   ; GFX90A_ITERATIVE-NEXT:   [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY14]], implicit $exec
816   ; GFX90A_ITERATIVE-NEXT:   [[V_FFBL_B32_e64_1:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY15]], implicit $exec
817   ; GFX90A_ITERATIVE-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
818   ; GFX90A_ITERATIVE-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_FFBL_B32_e64_1]], [[V_MOV_B32_e32_1]], 0, implicit $exec
819   ; GFX90A_ITERATIVE-NEXT:   [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_FFBL_B32_e64_]], [[V_ADD_U32_e64_]], implicit $exec
820   ; GFX90A_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
821   ; GFX90A_ITERATIVE-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY2]], [[V_READFIRSTLANE_B32_4]]
822   ; GFX90A_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
823   ; GFX90A_ITERATIVE-NEXT:   [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY3]], [[V_READFIRSTLANE_B32_5]]
824   ; GFX90A_ITERATIVE-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
825   ; GFX90A_ITERATIVE-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[PHI2]].sub0
826   ; GFX90A_ITERATIVE-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[PHI2]].sub1
827   ; GFX90A_ITERATIVE-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[PHI3]].sub0
828   ; GFX90A_ITERATIVE-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[PHI3]].sub1
829   ; GFX90A_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
830   ; GFX90A_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
831   ; GFX90A_ITERATIVE-NEXT:   $m0 = COPY [[V_READFIRSTLANE_B32_7]]
832   ; GFX90A_ITERATIVE-NEXT:   [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 [[V_READFIRSTLANE_B32_6]], $m0, [[COPY18]]
833   ; GFX90A_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
834   ; GFX90A_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
835   ; GFX90A_ITERATIVE-NEXT:   $m0 = COPY [[V_READFIRSTLANE_B32_9]]
836   ; GFX90A_ITERATIVE-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 [[V_READFIRSTLANE_B32_8]], $m0, [[COPY19]]
837   ; GFX90A_ITERATIVE-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_WRITELANE_B32_]], %subreg.sub0, [[V_WRITELANE_B32_1]], %subreg.sub1
838   ; GFX90A_ITERATIVE-NEXT:   [[COPY20:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
839   ; GFX90A_ITERATIVE-NEXT:   [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI2]], 0, [[COPY20]], 0, 0, implicit $mode, implicit $exec
840   ; GFX90A_ITERATIVE-NEXT:   [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
841   ; GFX90A_ITERATIVE-NEXT:   [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B1]]
842   ; GFX90A_ITERATIVE-NEXT:   [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[V_MIN_U32_e64_]], [[COPY21]], implicit $exec
843   ; GFX90A_ITERATIVE-NEXT:   [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
844   ; GFX90A_ITERATIVE-NEXT:   [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
845   ; GFX90A_ITERATIVE-NEXT:   [[V_NOT_B32_e32_:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[COPY22]], implicit $exec
846   ; GFX90A_ITERATIVE-NEXT:   [[V_NOT_B32_e32_1:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[COPY23]], implicit $exec
847   ; GFX90A_ITERATIVE-NEXT:   [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[PHI4]].sub0
848   ; GFX90A_ITERATIVE-NEXT:   [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[PHI4]].sub1
849   ; GFX90A_ITERATIVE-NEXT:   [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY24]], [[V_NOT_B32_e32_]], implicit $exec
850   ; GFX90A_ITERATIVE-NEXT:   [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY25]], [[V_NOT_B32_e32_1]], implicit $exec
851   ; GFX90A_ITERATIVE-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
852   ; GFX90A_ITERATIVE-NEXT:   [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
853   ; GFX90A_ITERATIVE-NEXT:   [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B2]]
854   ; GFX90A_ITERATIVE-NEXT:   [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U64_e64 [[REG_SEQUENCE5]], [[COPY26]], implicit $exec
855   ; GFX90A_ITERATIVE-NEXT:   $vcc = COPY [[V_CMP_NE_U64_e64_]]
856   ; GFX90A_ITERATIVE-NEXT:   S_CBRANCH_VCCNZ %bb.7, implicit $vcc
857   ; GFX90A_ITERATIVE-NEXT:   S_BRANCH %bb.8
858   ; GFX90A_ITERATIVE-NEXT: {{  $}}
859   ; GFX90A_ITERATIVE-NEXT: bb.8.ComputeEnd:
860   ; GFX90A_ITERATIVE-NEXT:   successors: %bb.3(0x40000000), %bb.4(0x40000000)
861   ; GFX90A_ITERATIVE-NEXT: {{  $}}
862   ; GFX90A_ITERATIVE-NEXT:   [[PHI5:%[0-9]+]]:vreg_64_align2 = PHI [[REG_SEQUENCE4]], %bb.7
863   ; GFX90A_ITERATIVE-NEXT:   [[PHI6:%[0-9]+]]:vreg_64_align2 = PHI [[V_ADD_F64_e64_1]], %bb.7
864   ; GFX90A_ITERATIVE-NEXT:   [[COPY27:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
865   ; GFX90A_ITERATIVE-NEXT:   [[COPY28:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
866   ; GFX90A_ITERATIVE-NEXT:   [[COPY29:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
867   ; GFX90A_ITERATIVE-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
868   ; GFX90A_ITERATIVE-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY29]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
869   ; GFX90A_ITERATIVE-NEXT:   [[COPY30:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE6]].sub0
870   ; GFX90A_ITERATIVE-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
871   ; GFX90A_ITERATIVE-NEXT:   [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY27]]
872   ; GFX90A_ITERATIVE-NEXT:   [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
873   ; GFX90A_ITERATIVE-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY31]], [[COPY32]], implicit $exec
874   ; GFX90A_ITERATIVE-NEXT:   [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY30]]
875   ; GFX90A_ITERATIVE-NEXT:   [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY33]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
876   ; GFX90A_ITERATIVE-NEXT:   [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
877   ; GFX90A_ITERATIVE-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY34]], implicit $exec
878   ; GFX90A_ITERATIVE-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
879   ; GFX90A_ITERATIVE-NEXT:   S_BRANCH %bb.3
880   ;
881   ; GFX90A_DPP-LABEL: name: global_atomic_fadd_f64_saddr_rtn_atomicrmw
882   ; GFX90A_DPP: bb.1 (%ir-block.0):
883   ; GFX90A_DPP-NEXT:   successors: %bb.2(0x40000000), %bb.4(0x40000000)
884   ; GFX90A_DPP-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
885   ; GFX90A_DPP-NEXT: {{  $}}
886   ; GFX90A_DPP-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
887   ; GFX90A_DPP-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
888   ; GFX90A_DPP-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
889   ; GFX90A_DPP-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
890   ; GFX90A_DPP-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
891   ; GFX90A_DPP-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
892   ; GFX90A_DPP-NEXT:   [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
893   ; GFX90A_DPP-NEXT:   [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
894   ; GFX90A_DPP-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
895   ; GFX90A_DPP-NEXT:   S_BRANCH %bb.2
896   ; GFX90A_DPP-NEXT: {{  $}}
897   ; GFX90A_DPP-NEXT: bb.2 (%ir-block.5):
898   ; GFX90A_DPP-NEXT:   successors: %bb.3(0x40000000), %bb.5(0x40000000)
899   ; GFX90A_DPP-NEXT: {{  $}}
900   ; GFX90A_DPP-NEXT:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
901   ; GFX90A_DPP-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
902   ; GFX90A_DPP-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
903   ; GFX90A_DPP-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
904   ; GFX90A_DPP-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
905   ; GFX90A_DPP-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
906   ; GFX90A_DPP-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE2]].sub0
907   ; GFX90A_DPP-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
908   ; GFX90A_DPP-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
909   ; GFX90A_DPP-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
910   ; GFX90A_DPP-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY9]], [[COPY10]], implicit $exec
911   ; GFX90A_DPP-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
912   ; GFX90A_DPP-NEXT:   [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY11]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
913   ; GFX90A_DPP-NEXT:   [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
914   ; GFX90A_DPP-NEXT:   [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
915   ; GFX90A_DPP-NEXT:   [[V_SET_INACTIVE_B64_:%[0-9]+]]:vreg_64_align2 = V_SET_INACTIVE_B64 [[REG_SEQUENCE1]], [[COPY12]], implicit-def dead $scc, implicit $exec
916   ; GFX90A_DPP-NEXT:   [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
917   ; GFX90A_DPP-NEXT:   [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY13]], [[V_SET_INACTIVE_B64_]], 273, 15, 15, 0, implicit $exec
918   ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_SET_INACTIVE_B64_]], 0, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec
919   ; GFX90A_DPP-NEXT:   [[COPY14:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
920   ; GFX90A_DPP-NEXT:   [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY14]], [[V_ADD_F64_e64_]], 274, 15, 15, 0, implicit $exec
921   ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_]], 0, [[V_MOV_B1]], 0, 0, implicit $mode, implicit $exec
922   ; GFX90A_DPP-NEXT:   [[COPY15:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
923   ; GFX90A_DPP-NEXT:   [[V_MOV_B2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY15]], [[V_ADD_F64_e64_1]], 276, 15, 15, 0, implicit $exec
924   ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_1]], 0, [[V_MOV_B2]], 0, 0, implicit $mode, implicit $exec
925   ; GFX90A_DPP-NEXT:   [[COPY16:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
926   ; GFX90A_DPP-NEXT:   [[V_MOV_B3:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY16]], [[V_ADD_F64_e64_2]], 280, 15, 15, 0, implicit $exec
927   ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_3:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_2]], 0, [[V_MOV_B3]], 0, 0, implicit $mode, implicit $exec
928   ; GFX90A_DPP-NEXT:   [[COPY17:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
929   ; GFX90A_DPP-NEXT:   [[V_MOV_B4:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY17]], [[V_ADD_F64_e64_3]], 322, 10, 15, 0, implicit $exec
930   ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_4:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_3]], 0, [[V_MOV_B4]], 0, 0, implicit $mode, implicit $exec
931   ; GFX90A_DPP-NEXT:   [[COPY18:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
932   ; GFX90A_DPP-NEXT:   [[V_MOV_B5:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY18]], [[V_ADD_F64_e64_4]], 323, 12, 15, 0, implicit $exec
933   ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_5:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_4]], 0, [[V_MOV_B5]], 0, 0, implicit $mode, implicit $exec
934   ; GFX90A_DPP-NEXT:   [[COPY19:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
935   ; GFX90A_DPP-NEXT:   [[V_MOV_B6:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY19]], [[V_ADD_F64_e64_5]], 312, 15, 15, 0, implicit $exec
936   ; GFX90A_DPP-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
937   ; GFX90A_DPP-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub0
938   ; GFX90A_DPP-NEXT:   [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub1
939   ; GFX90A_DPP-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY20]], [[S_MOV_B32_2]]
940   ; GFX90A_DPP-NEXT:   [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY21]], [[S_MOV_B32_2]]
941   ; GFX90A_DPP-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
942   ; GFX90A_DPP-NEXT:   [[COPY22:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
943   ; GFX90A_DPP-NEXT:   [[STRICT_WWM:%[0-9]+]]:vreg_64_align2 = STRICT_WWM [[COPY22]], implicit $exec
944   ; GFX90A_DPP-NEXT:   [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
945   ; GFX90A_DPP-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY23]], implicit $exec
946   ; GFX90A_DPP-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
947   ; GFX90A_DPP-NEXT:   S_BRANCH %bb.3
948   ; GFX90A_DPP-NEXT: {{  $}}
949   ; GFX90A_DPP-NEXT: bb.3 (%ir-block.32):
950   ; GFX90A_DPP-NEXT:   successors: %bb.5(0x80000000)
951   ; GFX90A_DPP-NEXT: {{  $}}
952   ; GFX90A_DPP-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
953   ; GFX90A_DPP-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
954   ; GFX90A_DPP-NEXT:   S_BRANCH %bb.5
955   ; GFX90A_DPP-NEXT: {{  $}}
956   ; GFX90A_DPP-NEXT: bb.4.Flow:
957   ; GFX90A_DPP-NEXT:   successors: %bb.6(0x80000000)
958   ; GFX90A_DPP-NEXT: {{  $}}
959   ; GFX90A_DPP-NEXT:   [[PHI:%[0-9]+]]:vreg_64_align2 = PHI %45, %bb.5, [[DEF]], %bb.1
960   ; GFX90A_DPP-NEXT:   SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
961   ; GFX90A_DPP-NEXT:   S_BRANCH %bb.6
962   ; GFX90A_DPP-NEXT: {{  $}}
963   ; GFX90A_DPP-NEXT: bb.5 (%ir-block.35):
964   ; GFX90A_DPP-NEXT:   successors: %bb.4(0x80000000)
965   ; GFX90A_DPP-NEXT: {{  $}}
966   ; GFX90A_DPP-NEXT:   [[PHI1:%[0-9]+]]:vreg_64_align2 = PHI [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]], %bb.3, [[DEF]], %bb.2
967   ; GFX90A_DPP-NEXT:   SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec
968   ; GFX90A_DPP-NEXT:   [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub0
969   ; GFX90A_DPP-NEXT:   [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub1
970   ; GFX90A_DPP-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec
971   ; GFX90A_DPP-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec
972   ; GFX90A_DPP-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
973   ; GFX90A_DPP-NEXT:   [[STRICT_WWM1:%[0-9]+]]:vreg_64_align2 = STRICT_WWM [[V_MOV_B6]], implicit $exec
974   ; GFX90A_DPP-NEXT:   [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE4]]
975   ; GFX90A_DPP-NEXT:   [[V_ADD_F64_e64_6:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[COPY26]], 0, [[STRICT_WWM1]], 0, 0, implicit $mode, implicit $exec
976   ; GFX90A_DPP-NEXT:   [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]]
977   ; GFX90A_DPP-NEXT:   [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_1]]
978   ; GFX90A_DPP-NEXT:   [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_6]].sub0
979   ; GFX90A_DPP-NEXT:   [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_6]].sub1
980   ; GFX90A_DPP-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY29]], 0, [[COPY27]], [[V_CMP_EQ_U32_e64_]], implicit $exec
981   ; GFX90A_DPP-NEXT:   [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY30]], 0, [[COPY28]], [[V_CMP_EQ_U32_e64_]], implicit $exec
982   ; GFX90A_DPP-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_CNDMASK_B32_e64_]], %subreg.sub0, [[V_CNDMASK_B32_e64_1]], %subreg.sub1
983   ; GFX90A_DPP-NEXT:   S_BRANCH %bb.4
984   ; GFX90A_DPP-NEXT: {{  $}}
985   ; GFX90A_DPP-NEXT: bb.6 (%ir-block.41):
986   ; GFX90A_DPP-NEXT:   [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
987   ; GFX90A_DPP-NEXT:   [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
988   ; GFX90A_DPP-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec
989   ; GFX90A_DPP-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]]
990   ; GFX90A_DPP-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec
991   ; GFX90A_DPP-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_3]]
992   ; GFX90A_DPP-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
993   ;
994   ; GFX940_ITERATIVE-LABEL: name: global_atomic_fadd_f64_saddr_rtn_atomicrmw
995   ; GFX940_ITERATIVE: bb.1 (%ir-block.0):
996   ; GFX940_ITERATIVE-NEXT:   successors: %bb.2(0x40000000), %bb.6(0x40000000)
997   ; GFX940_ITERATIVE-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
998   ; GFX940_ITERATIVE-NEXT: {{  $}}
999   ; GFX940_ITERATIVE-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1000   ; GFX940_ITERATIVE-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1001   ; GFX940_ITERATIVE-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
1002   ; GFX940_ITERATIVE-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1003   ; GFX940_ITERATIVE-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1004   ; GFX940_ITERATIVE-NEXT:   [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
1005   ; GFX940_ITERATIVE-NEXT:   [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
1006   ; GFX940_ITERATIVE-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
1007   ; GFX940_ITERATIVE-NEXT:   S_BRANCH %bb.2
1008   ; GFX940_ITERATIVE-NEXT: {{  $}}
1009   ; GFX940_ITERATIVE-NEXT: bb.2 (%ir-block.5):
1010   ; GFX940_ITERATIVE-NEXT:   successors: %bb.7(0x80000000)
1011   ; GFX940_ITERATIVE-NEXT: {{  $}}
1012   ; GFX940_ITERATIVE-NEXT:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
1013   ; GFX940_ITERATIVE-NEXT:   [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
1014   ; GFX940_ITERATIVE-NEXT:   S_BRANCH %bb.7
1015   ; GFX940_ITERATIVE-NEXT: {{  $}}
1016   ; GFX940_ITERATIVE-NEXT: bb.3 (%ir-block.7):
1017   ; GFX940_ITERATIVE-NEXT:   successors: %bb.4(0x80000000)
1018   ; GFX940_ITERATIVE-NEXT: {{  $}}
1019   ; GFX940_ITERATIVE-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
1020   ; GFX940_ITERATIVE-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], %27, [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
1021   ; GFX940_ITERATIVE-NEXT: {{  $}}
1022   ; GFX940_ITERATIVE-NEXT: bb.4 (%ir-block.9):
1023   ; GFX940_ITERATIVE-NEXT:   successors: %bb.6(0x80000000)
1024   ; GFX940_ITERATIVE-NEXT: {{  $}}
1025   ; GFX940_ITERATIVE-NEXT:   [[PHI:%[0-9]+]]:vreg_64_align2 = PHI [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]], %bb.3, [[DEF]], %bb.8
1026   ; GFX940_ITERATIVE-NEXT:   SI_END_CF %37, implicit-def $exec, implicit-def $scc, implicit $exec
1027   ; GFX940_ITERATIVE-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
1028   ; GFX940_ITERATIVE-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
1029   ; GFX940_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
1030   ; GFX940_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
1031   ; GFX940_ITERATIVE-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
1032   ; GFX940_ITERATIVE-NEXT:   [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
1033   ; GFX940_ITERATIVE-NEXT:   [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[COPY7]], 0, %26, 0, 0, implicit $mode, implicit $exec
1034   ; GFX940_ITERATIVE-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]]
1035   ; GFX940_ITERATIVE-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_1]]
1036   ; GFX940_ITERATIVE-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_]].sub0
1037   ; GFX940_ITERATIVE-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_]].sub1
1038   ; GFX940_ITERATIVE-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY10]], 0, [[COPY8]], %35, implicit $exec
1039   ; GFX940_ITERATIVE-NEXT:   [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY11]], 0, [[COPY9]], %35, implicit $exec
1040   ; GFX940_ITERATIVE-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_CNDMASK_B32_e64_]], %subreg.sub0, [[V_CNDMASK_B32_e64_1]], %subreg.sub1
1041   ; GFX940_ITERATIVE-NEXT:   S_BRANCH %bb.6
1042   ; GFX940_ITERATIVE-NEXT: {{  $}}
1043   ; GFX940_ITERATIVE-NEXT: bb.5 (%ir-block.14):
1044   ; GFX940_ITERATIVE-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY %43.sub0
1045   ; GFX940_ITERATIVE-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY %43.sub1
1046   ; GFX940_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
1047   ; GFX940_ITERATIVE-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]]
1048   ; GFX940_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
1049   ; GFX940_ITERATIVE-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_3]]
1050   ; GFX940_ITERATIVE-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
1051   ; GFX940_ITERATIVE-NEXT: {{  $}}
1052   ; GFX940_ITERATIVE-NEXT: bb.6.Flow:
1053   ; GFX940_ITERATIVE-NEXT:   successors: %bb.5(0x80000000)
1054   ; GFX940_ITERATIVE-NEXT: {{  $}}
1055   ; GFX940_ITERATIVE-NEXT:   [[PHI1:%[0-9]+]]:vreg_64_align2 = PHI [[REG_SEQUENCE2]], %bb.4, [[DEF]], %bb.1
1056   ; GFX940_ITERATIVE-NEXT:   SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
1057   ; GFX940_ITERATIVE-NEXT:   S_BRANCH %bb.5
1058   ; GFX940_ITERATIVE-NEXT: {{  $}}
1059   ; GFX940_ITERATIVE-NEXT: bb.7.ComputeLoop:
1060   ; GFX940_ITERATIVE-NEXT:   successors: %bb.8(0x04000000), %bb.7(0x7c000000)
1061   ; GFX940_ITERATIVE-NEXT: {{  $}}
1062   ; GFX940_ITERATIVE-NEXT:   [[PHI2:%[0-9]+]]:vreg_64_align2 = PHI %18, %bb.7, [[S_MOV_B]], %bb.2
1063   ; GFX940_ITERATIVE-NEXT:   [[PHI3:%[0-9]+]]:vreg_64_align2 = PHI %17, %bb.7, [[DEF]], %bb.2
1064   ; GFX940_ITERATIVE-NEXT:   [[PHI4:%[0-9]+]]:vreg_64_align2 = PHI %23, %bb.7, [[COPY4]], %bb.2
1065   ; GFX940_ITERATIVE-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[PHI4]].sub0
1066   ; GFX940_ITERATIVE-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[PHI4]].sub1
1067   ; GFX940_ITERATIVE-NEXT:   [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY14]], implicit $exec
1068   ; GFX940_ITERATIVE-NEXT:   [[V_FFBL_B32_e64_1:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY15]], implicit $exec
1069   ; GFX940_ITERATIVE-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
1070   ; GFX940_ITERATIVE-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_FFBL_B32_e64_1]], [[V_MOV_B32_e32_1]], 0, implicit $exec
1071   ; GFX940_ITERATIVE-NEXT:   [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_FFBL_B32_e64_]], [[V_ADD_U32_e64_]], implicit $exec
1072   ; GFX940_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
1073   ; GFX940_ITERATIVE-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY2]], [[V_READFIRSTLANE_B32_4]]
1074   ; GFX940_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
1075   ; GFX940_ITERATIVE-NEXT:   [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY3]], [[V_READFIRSTLANE_B32_5]]
1076   ; GFX940_ITERATIVE-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
1077   ; GFX940_ITERATIVE-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[PHI2]].sub0
1078   ; GFX940_ITERATIVE-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[PHI2]].sub1
1079   ; GFX940_ITERATIVE-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[PHI3]].sub0
1080   ; GFX940_ITERATIVE-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[PHI3]].sub1
1081   ; GFX940_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
1082   ; GFX940_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
1083   ; GFX940_ITERATIVE-NEXT:   $m0 = COPY [[V_READFIRSTLANE_B32_7]]
1084   ; GFX940_ITERATIVE-NEXT:   [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 [[V_READFIRSTLANE_B32_6]], $m0, [[COPY18]]
1085   ; GFX940_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
1086   ; GFX940_ITERATIVE-NEXT:   [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
1087   ; GFX940_ITERATIVE-NEXT:   $m0 = COPY [[V_READFIRSTLANE_B32_9]]
1088   ; GFX940_ITERATIVE-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 [[V_READFIRSTLANE_B32_8]], $m0, [[COPY19]]
1089   ; GFX940_ITERATIVE-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_WRITELANE_B32_]], %subreg.sub0, [[V_WRITELANE_B32_1]], %subreg.sub1
1090   ; GFX940_ITERATIVE-NEXT:   [[COPY20:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
1091   ; GFX940_ITERATIVE-NEXT:   [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI2]], 0, [[COPY20]], 0, 0, implicit $mode, implicit $exec
1092   ; GFX940_ITERATIVE-NEXT:   [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
1093   ; GFX940_ITERATIVE-NEXT:   [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B1]]
1094   ; GFX940_ITERATIVE-NEXT:   [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[V_MIN_U32_e64_]], [[COPY21]], implicit $exec
1095   ; GFX940_ITERATIVE-NEXT:   [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
1096   ; GFX940_ITERATIVE-NEXT:   [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
1097   ; GFX940_ITERATIVE-NEXT:   [[V_NOT_B32_e32_:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[COPY22]], implicit $exec
1098   ; GFX940_ITERATIVE-NEXT:   [[V_NOT_B32_e32_1:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[COPY23]], implicit $exec
1099   ; GFX940_ITERATIVE-NEXT:   [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[PHI4]].sub0
1100   ; GFX940_ITERATIVE-NEXT:   [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[PHI4]].sub1
1101   ; GFX940_ITERATIVE-NEXT:   [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY24]], [[V_NOT_B32_e32_]], implicit $exec
1102   ; GFX940_ITERATIVE-NEXT:   [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY25]], [[V_NOT_B32_e32_1]], implicit $exec
1103   ; GFX940_ITERATIVE-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
1104   ; GFX940_ITERATIVE-NEXT:   [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
1105   ; GFX940_ITERATIVE-NEXT:   [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B2]]
1106   ; GFX940_ITERATIVE-NEXT:   [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U64_e64 [[REG_SEQUENCE5]], [[COPY26]], implicit $exec
1107   ; GFX940_ITERATIVE-NEXT:   $vcc = COPY [[V_CMP_NE_U64_e64_]]
1108   ; GFX940_ITERATIVE-NEXT:   S_CBRANCH_VCCNZ %bb.7, implicit $vcc
1109   ; GFX940_ITERATIVE-NEXT:   S_BRANCH %bb.8
1110   ; GFX940_ITERATIVE-NEXT: {{  $}}
1111   ; GFX940_ITERATIVE-NEXT: bb.8.ComputeEnd:
1112   ; GFX940_ITERATIVE-NEXT:   successors: %bb.3(0x40000000), %bb.4(0x40000000)
1113   ; GFX940_ITERATIVE-NEXT: {{  $}}
1114   ; GFX940_ITERATIVE-NEXT:   [[PHI5:%[0-9]+]]:vreg_64_align2 = PHI [[REG_SEQUENCE4]], %bb.7
1115   ; GFX940_ITERATIVE-NEXT:   [[PHI6:%[0-9]+]]:vreg_64_align2 = PHI [[V_ADD_F64_e64_1]], %bb.7
1116   ; GFX940_ITERATIVE-NEXT:   [[COPY27:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
1117   ; GFX940_ITERATIVE-NEXT:   [[COPY28:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
1118   ; GFX940_ITERATIVE-NEXT:   [[COPY29:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
1119   ; GFX940_ITERATIVE-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1120   ; GFX940_ITERATIVE-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY29]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
1121   ; GFX940_ITERATIVE-NEXT:   [[COPY30:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE6]].sub0
1122   ; GFX940_ITERATIVE-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1123   ; GFX940_ITERATIVE-NEXT:   [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY27]]
1124   ; GFX940_ITERATIVE-NEXT:   [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
1125   ; GFX940_ITERATIVE-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY31]], [[COPY32]], implicit $exec
1126   ; GFX940_ITERATIVE-NEXT:   [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY30]]
1127   ; GFX940_ITERATIVE-NEXT:   [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY33]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
1128   ; GFX940_ITERATIVE-NEXT:   [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
1129   ; GFX940_ITERATIVE-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY34]], implicit $exec
1130   ; GFX940_ITERATIVE-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
1131   ; GFX940_ITERATIVE-NEXT:   S_BRANCH %bb.3
1132   ;
1133   ; GFX940_DPP-LABEL: name: global_atomic_fadd_f64_saddr_rtn_atomicrmw
1134   ; GFX940_DPP: bb.1 (%ir-block.0):
1135   ; GFX940_DPP-NEXT:   successors: %bb.2(0x40000000), %bb.4(0x40000000)
1136   ; GFX940_DPP-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
1137   ; GFX940_DPP-NEXT: {{  $}}
1138   ; GFX940_DPP-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1139   ; GFX940_DPP-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1140   ; GFX940_DPP-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
1141   ; GFX940_DPP-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1142   ; GFX940_DPP-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1143   ; GFX940_DPP-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
1144   ; GFX940_DPP-NEXT:   [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
1145   ; GFX940_DPP-NEXT:   [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
1146   ; GFX940_DPP-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
1147   ; GFX940_DPP-NEXT:   S_BRANCH %bb.2
1148   ; GFX940_DPP-NEXT: {{  $}}
1149   ; GFX940_DPP-NEXT: bb.2 (%ir-block.5):
1150   ; GFX940_DPP-NEXT:   successors: %bb.3(0x40000000), %bb.5(0x40000000)
1151   ; GFX940_DPP-NEXT: {{  $}}
1152   ; GFX940_DPP-NEXT:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
1153   ; GFX940_DPP-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
1154   ; GFX940_DPP-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
1155   ; GFX940_DPP-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
1156   ; GFX940_DPP-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1157   ; GFX940_DPP-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
1158   ; GFX940_DPP-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE2]].sub0
1159   ; GFX940_DPP-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1160   ; GFX940_DPP-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
1161   ; GFX940_DPP-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
1162   ; GFX940_DPP-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY9]], [[COPY10]], implicit $exec
1163   ; GFX940_DPP-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
1164   ; GFX940_DPP-NEXT:   [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY11]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
1165   ; GFX940_DPP-NEXT:   [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
1166   ; GFX940_DPP-NEXT:   [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
1167   ; GFX940_DPP-NEXT:   [[V_SET_INACTIVE_B64_:%[0-9]+]]:vreg_64_align2 = V_SET_INACTIVE_B64 [[REG_SEQUENCE1]], [[COPY12]], implicit-def dead $scc, implicit $exec
1168   ; GFX940_DPP-NEXT:   [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
1169   ; GFX940_DPP-NEXT:   [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY13]], [[V_SET_INACTIVE_B64_]], 273, 15, 15, 0, implicit $exec
1170   ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_SET_INACTIVE_B64_]], 0, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec
1171   ; GFX940_DPP-NEXT:   [[COPY14:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
1172   ; GFX940_DPP-NEXT:   [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY14]], [[V_ADD_F64_e64_]], 274, 15, 15, 0, implicit $exec
1173   ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_]], 0, [[V_MOV_B1]], 0, 0, implicit $mode, implicit $exec
1174   ; GFX940_DPP-NEXT:   [[COPY15:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
1175   ; GFX940_DPP-NEXT:   [[V_MOV_B2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY15]], [[V_ADD_F64_e64_1]], 276, 15, 15, 0, implicit $exec
1176   ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_1]], 0, [[V_MOV_B2]], 0, 0, implicit $mode, implicit $exec
1177   ; GFX940_DPP-NEXT:   [[COPY16:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
1178   ; GFX940_DPP-NEXT:   [[V_MOV_B3:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY16]], [[V_ADD_F64_e64_2]], 280, 15, 15, 0, implicit $exec
1179   ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_3:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_2]], 0, [[V_MOV_B3]], 0, 0, implicit $mode, implicit $exec
1180   ; GFX940_DPP-NEXT:   [[COPY17:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
1181   ; GFX940_DPP-NEXT:   [[V_MOV_B4:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY17]], [[V_ADD_F64_e64_3]], 322, 10, 15, 0, implicit $exec
1182   ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_4:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_3]], 0, [[V_MOV_B4]], 0, 0, implicit $mode, implicit $exec
1183   ; GFX940_DPP-NEXT:   [[COPY18:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
1184   ; GFX940_DPP-NEXT:   [[V_MOV_B5:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY18]], [[V_ADD_F64_e64_4]], 323, 12, 15, 0, implicit $exec
1185   ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_5:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_4]], 0, [[V_MOV_B5]], 0, 0, implicit $mode, implicit $exec
1186   ; GFX940_DPP-NEXT:   [[COPY19:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B]]
1187   ; GFX940_DPP-NEXT:   [[V_MOV_B6:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_DPP_PSEUDO [[COPY19]], [[V_ADD_F64_e64_5]], 312, 15, 15, 0, implicit $exec
1188   ; GFX940_DPP-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
1189   ; GFX940_DPP-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub0
1190   ; GFX940_DPP-NEXT:   [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub1
1191   ; GFX940_DPP-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY20]], [[S_MOV_B32_2]]
1192   ; GFX940_DPP-NEXT:   [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY21]], [[S_MOV_B32_2]]
1193   ; GFX940_DPP-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
1194   ; GFX940_DPP-NEXT:   [[COPY22:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
1195   ; GFX940_DPP-NEXT:   [[STRICT_WWM:%[0-9]+]]:vreg_64_align2 = STRICT_WWM [[COPY22]], implicit $exec
1196   ; GFX940_DPP-NEXT:   [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
1197   ; GFX940_DPP-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY23]], implicit $exec
1198   ; GFX940_DPP-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
1199   ; GFX940_DPP-NEXT:   S_BRANCH %bb.3
1200   ; GFX940_DPP-NEXT: {{  $}}
1201   ; GFX940_DPP-NEXT: bb.3 (%ir-block.32):
1202   ; GFX940_DPP-NEXT:   successors: %bb.5(0x80000000)
1203   ; GFX940_DPP-NEXT: {{  $}}
1204   ; GFX940_DPP-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
1205   ; GFX940_DPP-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
1206   ; GFX940_DPP-NEXT:   S_BRANCH %bb.5
1207   ; GFX940_DPP-NEXT: {{  $}}
1208   ; GFX940_DPP-NEXT: bb.4.Flow:
1209   ; GFX940_DPP-NEXT:   successors: %bb.6(0x80000000)
1210   ; GFX940_DPP-NEXT: {{  $}}
1211   ; GFX940_DPP-NEXT:   [[PHI:%[0-9]+]]:vreg_64_align2 = PHI %44, %bb.5, [[DEF]], %bb.1
1212   ; GFX940_DPP-NEXT:   SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
1213   ; GFX940_DPP-NEXT:   S_BRANCH %bb.6
1214   ; GFX940_DPP-NEXT: {{  $}}
1215   ; GFX940_DPP-NEXT: bb.5 (%ir-block.35):
1216   ; GFX940_DPP-NEXT:   successors: %bb.4(0x80000000)
1217   ; GFX940_DPP-NEXT: {{  $}}
1218   ; GFX940_DPP-NEXT:   [[PHI1:%[0-9]+]]:vreg_64_align2 = PHI [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]], %bb.3, [[DEF]], %bb.2
1219   ; GFX940_DPP-NEXT:   SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec
1220   ; GFX940_DPP-NEXT:   [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub0
1221   ; GFX940_DPP-NEXT:   [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub1
1222   ; GFX940_DPP-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec
1223   ; GFX940_DPP-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec
1224   ; GFX940_DPP-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
1225   ; GFX940_DPP-NEXT:   [[STRICT_WWM1:%[0-9]+]]:vreg_64_align2 = STRICT_WWM [[V_MOV_B6]], implicit $exec
1226   ; GFX940_DPP-NEXT:   [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE4]]
1227   ; GFX940_DPP-NEXT:   [[V_ADD_F64_e64_6:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[COPY26]], 0, [[STRICT_WWM1]], 0, 0, implicit $mode, implicit $exec
1228   ; GFX940_DPP-NEXT:   [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]]
1229   ; GFX940_DPP-NEXT:   [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_1]]
1230   ; GFX940_DPP-NEXT:   [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_6]].sub0
1231   ; GFX940_DPP-NEXT:   [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_6]].sub1
1232   ; GFX940_DPP-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY29]], 0, [[COPY27]], [[V_CMP_EQ_U32_e64_]], implicit $exec
1233   ; GFX940_DPP-NEXT:   [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY30]], 0, [[COPY28]], [[V_CMP_EQ_U32_e64_]], implicit $exec
1234   ; GFX940_DPP-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_CNDMASK_B32_e64_]], %subreg.sub0, [[V_CNDMASK_B32_e64_1]], %subreg.sub1
1235   ; GFX940_DPP-NEXT:   S_BRANCH %bb.4
1236   ; GFX940_DPP-NEXT: {{  $}}
1237   ; GFX940_DPP-NEXT: bb.6 (%ir-block.41):
1238   ; GFX940_DPP-NEXT:   [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
1239   ; GFX940_DPP-NEXT:   [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
1240   ; GFX940_DPP-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec
1241   ; GFX940_DPP-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]]
1242   ; GFX940_DPP-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec
1243   ; GFX940_DPP-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_3]]
1244   ; GFX940_DPP-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
1245   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic
1246   ret double %ret
1249 declare double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1), double)
1250 declare double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1), double)
1252 attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }