1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10NSA %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10NSA %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -verify-machineinstrs -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX12 %s
7 define amdgpu_ps float @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
8 ; GFX9-LABEL: name: atomic_swap_1d
9 ; GFX9: bb.1.main_body:
10 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
12 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
13 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
14 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
15 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
16 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
17 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
18 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
19 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
20 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
21 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
22 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
23 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
24 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
25 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
26 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
27 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
28 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
30 ; GFX10NSA-LABEL: name: atomic_swap_1d
31 ; GFX10NSA: bb.1.main_body:
32 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
33 ; GFX10NSA-NEXT: {{ $}}
34 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
35 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
36 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
37 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
38 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
39 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
40 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
41 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
42 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
43 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
44 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
45 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
46 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
47 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
48 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
49 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
50 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
52 ; GFX12-LABEL: name: atomic_swap_1d
53 ; GFX12: bb.1.main_body:
54 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
56 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
57 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
58 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
59 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
60 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
61 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
62 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
63 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
64 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
65 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
66 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
67 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
68 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
69 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
70 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
71 ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
72 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
74 %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
75 %out = bitcast i32 %v to float
79 define amdgpu_ps float @atomic_add_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
80 ; GFX9-LABEL: name: atomic_add_1d
81 ; GFX9: bb.1.main_body:
82 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
84 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
85 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
86 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
87 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
88 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
89 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
90 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
91 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
92 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
93 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
94 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
95 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
96 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
97 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
98 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
99 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
100 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
102 ; GFX10NSA-LABEL: name: atomic_add_1d
103 ; GFX10NSA: bb.1.main_body:
104 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
105 ; GFX10NSA-NEXT: {{ $}}
106 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
107 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
108 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
109 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
110 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
111 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
112 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
113 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
114 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
115 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
116 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
117 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
118 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
119 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
120 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
121 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
122 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
124 ; GFX12-LABEL: name: atomic_add_1d
125 ; GFX12: bb.1.main_body:
126 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
128 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
129 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
130 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
131 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
132 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
133 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
134 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
135 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
136 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
137 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
138 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
139 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
140 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
141 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
142 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
143 ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
144 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
146 %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
147 %out = bitcast i32 %v to float
151 define amdgpu_ps float @atomic_sub_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
152 ; GFX9-LABEL: name: atomic_sub_1d
153 ; GFX9: bb.1.main_body:
154 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
156 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
157 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
158 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
159 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
160 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
161 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
162 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
163 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
164 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
165 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
166 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
167 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
168 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
169 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
170 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
171 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
172 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
174 ; GFX10NSA-LABEL: name: atomic_sub_1d
175 ; GFX10NSA: bb.1.main_body:
176 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
177 ; GFX10NSA-NEXT: {{ $}}
178 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
179 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
180 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
181 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
182 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
183 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
184 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
185 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
186 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
187 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
188 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
189 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
190 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
191 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
192 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
193 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
194 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
196 ; GFX12-LABEL: name: atomic_sub_1d
197 ; GFX12: bb.1.main_body:
198 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
200 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
201 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
202 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
203 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
204 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
205 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
206 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
207 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
208 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
209 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
210 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
211 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
212 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
213 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
214 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
215 ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
216 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
218 %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
219 %out = bitcast i32 %v to float
223 define amdgpu_ps float @atomic_smin_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
224 ; GFX9-LABEL: name: atomic_smin_1d
225 ; GFX9: bb.1.main_body:
226 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
228 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
229 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
230 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
231 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
232 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
233 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
234 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
235 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
236 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
237 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
238 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
239 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
240 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
241 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
242 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
243 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
244 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
246 ; GFX10NSA-LABEL: name: atomic_smin_1d
247 ; GFX10NSA: bb.1.main_body:
248 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
249 ; GFX10NSA-NEXT: {{ $}}
250 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
251 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
252 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
253 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
254 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
255 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
256 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
257 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
258 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
259 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
260 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
261 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
262 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
263 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
264 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
265 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
266 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
268 ; GFX12-LABEL: name: atomic_smin_1d
269 ; GFX12: bb.1.main_body:
270 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
272 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
273 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
274 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
275 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
276 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
277 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
278 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
279 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
280 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
281 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
282 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
283 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
284 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
285 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
286 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
287 ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
288 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
290 %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
291 %out = bitcast i32 %v to float
296 define amdgpu_ps float @atomic_umin_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
297 ; GFX9-LABEL: name: atomic_umin_1d
298 ; GFX9: bb.1.main_body:
299 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
301 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
302 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
303 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
304 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
305 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
306 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
307 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
308 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
309 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
310 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
311 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
312 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
313 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
314 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
315 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
316 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
317 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
319 ; GFX10NSA-LABEL: name: atomic_umin_1d
320 ; GFX10NSA: bb.1.main_body:
321 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
322 ; GFX10NSA-NEXT: {{ $}}
323 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
324 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
325 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
326 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
327 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
328 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
329 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
330 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
331 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
332 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
333 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
334 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
335 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
336 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
337 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
338 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
339 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
341 ; GFX12-LABEL: name: atomic_umin_1d
342 ; GFX12: bb.1.main_body:
343 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
345 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
346 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
347 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
348 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
349 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
350 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
351 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
352 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
353 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
354 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
355 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
356 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
357 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
358 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
359 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
360 ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
361 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
363 %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
364 %out = bitcast i32 %v to float
368 define amdgpu_ps float @atomic_smax_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
369 ; GFX9-LABEL: name: atomic_smax_1d
370 ; GFX9: bb.1.main_body:
371 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
373 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
374 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
375 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
376 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
377 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
378 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
379 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
380 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
381 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
382 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
383 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
384 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
385 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
386 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
387 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
388 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
389 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
391 ; GFX10NSA-LABEL: name: atomic_smax_1d
392 ; GFX10NSA: bb.1.main_body:
393 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
394 ; GFX10NSA-NEXT: {{ $}}
395 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
396 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
397 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
398 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
399 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
400 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
401 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
402 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
403 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
404 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
405 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
406 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
407 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
408 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
409 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
410 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
411 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
413 ; GFX12-LABEL: name: atomic_smax_1d
414 ; GFX12: bb.1.main_body:
415 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
417 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
418 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
419 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
420 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
421 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
422 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
423 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
424 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
425 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
426 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
427 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
428 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
429 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
430 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
431 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
432 ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
433 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
435 %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
436 %out = bitcast i32 %v to float
440 define amdgpu_ps float @atomic_umax_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
441 ; GFX9-LABEL: name: atomic_umax_1d
442 ; GFX9: bb.1.main_body:
443 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
445 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
446 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
447 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
448 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
449 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
450 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
451 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
452 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
453 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
454 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
455 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
456 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
457 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
458 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
459 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
460 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
461 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
463 ; GFX10NSA-LABEL: name: atomic_umax_1d
464 ; GFX10NSA: bb.1.main_body:
465 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
466 ; GFX10NSA-NEXT: {{ $}}
467 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
468 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
469 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
470 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
471 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
472 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
473 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
474 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
475 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
476 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
477 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
478 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
479 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
480 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
481 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
482 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
483 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
485 ; GFX12-LABEL: name: atomic_umax_1d
486 ; GFX12: bb.1.main_body:
487 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
489 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
490 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
491 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
492 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
493 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
494 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
495 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
496 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
497 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
498 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
499 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
500 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
501 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
502 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
503 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
504 ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
505 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
507 %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
508 %out = bitcast i32 %v to float
512 define amdgpu_ps float @atomic_and_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
513 ; GFX9-LABEL: name: atomic_and_1d
514 ; GFX9: bb.1.main_body:
515 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
517 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
518 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
519 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
520 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
521 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
522 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
523 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
524 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
525 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
526 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
527 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
528 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
529 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
530 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
531 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
532 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
533 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
535 ; GFX10NSA-LABEL: name: atomic_and_1d
536 ; GFX10NSA: bb.1.main_body:
537 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
538 ; GFX10NSA-NEXT: {{ $}}
539 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
540 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
541 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
542 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
543 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
544 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
545 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
546 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
547 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
548 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
549 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
550 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
551 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
552 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
553 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
554 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
555 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
557 ; GFX12-LABEL: name: atomic_and_1d
558 ; GFX12: bb.1.main_body:
559 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
561 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
562 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
563 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
564 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
565 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
566 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
567 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
568 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
569 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
570 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
571 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
572 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
573 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
574 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
575 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
576 ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
577 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
579 %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
580 %out = bitcast i32 %v to float
584 define amdgpu_ps float @atomic_or_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
585 ; GFX9-LABEL: name: atomic_or_1d
586 ; GFX9: bb.1.main_body:
587 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
589 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
590 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
591 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
592 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
593 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
594 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
595 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
596 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
597 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
598 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
599 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
600 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
601 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
602 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
603 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
604 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
605 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
607 ; GFX10NSA-LABEL: name: atomic_or_1d
608 ; GFX10NSA: bb.1.main_body:
609 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
610 ; GFX10NSA-NEXT: {{ $}}
611 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
612 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
613 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
614 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
615 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
616 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
617 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
618 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
619 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
620 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
621 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
622 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
623 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
624 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
625 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
626 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
627 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
629 ; GFX12-LABEL: name: atomic_or_1d
630 ; GFX12: bb.1.main_body:
631 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
633 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
634 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
635 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
636 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
637 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
638 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
639 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
640 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
641 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
642 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
643 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
644 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
645 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
646 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
647 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
648 ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
649 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
651 %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
652 %out = bitcast i32 %v to float
656 define amdgpu_ps float @atomic_xor_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
657 ; GFX9-LABEL: name: atomic_xor_1d
658 ; GFX9: bb.1.main_body:
659 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
661 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
662 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
663 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
664 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
665 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
666 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
667 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
668 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
669 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
670 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
671 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
672 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
673 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
674 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
675 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
676 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
677 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
679 ; GFX10NSA-LABEL: name: atomic_xor_1d
680 ; GFX10NSA: bb.1.main_body:
681 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
682 ; GFX10NSA-NEXT: {{ $}}
683 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
684 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
685 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
686 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
687 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
688 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
689 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
690 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
691 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
692 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
693 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
694 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
695 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
696 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
697 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
698 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
699 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
701 ; GFX12-LABEL: name: atomic_xor_1d
702 ; GFX12: bb.1.main_body:
703 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
705 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
706 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
707 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
708 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
709 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
710 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
711 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
712 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
713 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
714 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
715 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
716 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
717 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
718 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
719 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
720 ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
721 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
723 %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
724 %out = bitcast i32 %v to float
728 define amdgpu_ps float @atomic_inc_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
729 ; GFX9-LABEL: name: atomic_inc_1d
730 ; GFX9: bb.1.main_body:
731 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
733 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
734 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
735 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
736 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
737 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
738 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
739 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
740 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
741 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
742 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
743 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
744 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
745 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
746 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
747 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
748 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
749 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
751 ; GFX10NSA-LABEL: name: atomic_inc_1d
752 ; GFX10NSA: bb.1.main_body:
753 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
754 ; GFX10NSA-NEXT: {{ $}}
755 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
756 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
757 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
758 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
759 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
760 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
761 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
762 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
763 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
764 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
765 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
766 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
767 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
768 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
769 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
770 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
771 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
773 ; GFX12-LABEL: name: atomic_inc_1d
774 ; GFX12: bb.1.main_body:
775 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
777 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
778 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
779 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
780 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
781 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
782 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
783 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
784 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
785 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
786 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
787 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
788 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
789 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
790 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
791 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
792 ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
793 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
795 %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
796 %out = bitcast i32 %v to float
800 define amdgpu_ps float @atomic_dec_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
801 ; GFX9-LABEL: name: atomic_dec_1d
802 ; GFX9: bb.1.main_body:
803 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
805 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
806 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
807 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
808 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
809 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
810 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
811 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
812 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
813 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
814 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
815 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
816 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
817 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
818 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
819 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
820 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
821 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
823 ; GFX10NSA-LABEL: name: atomic_dec_1d
824 ; GFX10NSA: bb.1.main_body:
825 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
826 ; GFX10NSA-NEXT: {{ $}}
827 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
828 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
829 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
830 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
831 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
832 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
833 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
834 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
835 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
836 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
837 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
838 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
839 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
840 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
841 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
842 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
843 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
845 ; GFX12-LABEL: name: atomic_dec_1d
846 ; GFX12: bb.1.main_body:
847 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
849 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
850 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
851 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
852 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
853 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
854 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
855 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
856 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
857 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
858 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
859 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
860 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
861 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
862 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
863 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
864 ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
865 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
867 %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
868 %out = bitcast i32 %v to float
872 define amdgpu_ps float @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s) {
873 ; GFX9-LABEL: name: atomic_cmpswap_1d
874 ; GFX9: bb.1.main_body:
875 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
877 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
878 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
879 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
880 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
881 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
882 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
883 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
884 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
885 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
886 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
887 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
888 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
889 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
890 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
891 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
892 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
893 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
894 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
895 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
897 ; GFX10NSA-LABEL: name: atomic_cmpswap_1d
898 ; GFX10NSA: bb.1.main_body:
899 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
900 ; GFX10NSA-NEXT: {{ $}}
901 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
902 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
903 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
904 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
905 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
906 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
907 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
908 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
909 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
910 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
911 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
912 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
913 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
914 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
915 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
916 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
917 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
918 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
919 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
921 ; GFX12-LABEL: name: atomic_cmpswap_1d
922 ; GFX12: bb.1.main_body:
923 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
925 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
926 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
927 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
928 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
929 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
930 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
931 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
932 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
933 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
934 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
935 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
936 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
937 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
938 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
939 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
940 ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
941 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
942 ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
943 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
945 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32 %cmp, i32 %swap, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
946 %out = bitcast i32 %v to float
950 define amdgpu_ps float @atomic_add_2d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t) {
951 ; GFX9-LABEL: name: atomic_add_2d
952 ; GFX9: bb.1.main_body:
953 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
955 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
956 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
957 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
958 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
959 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
960 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
961 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
962 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
963 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
964 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
965 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
966 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
967 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
968 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
969 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
970 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
971 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
972 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
974 ; GFX10NSA-LABEL: name: atomic_add_2d
975 ; GFX10NSA: bb.1.main_body:
976 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
977 ; GFX10NSA-NEXT: {{ $}}
978 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
979 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
980 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
981 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
982 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
983 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
984 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
985 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
986 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
987 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
988 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
989 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
990 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
991 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
992 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
993 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
994 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
995 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
997 ; GFX12-LABEL: name: atomic_add_2d
998 ; GFX12: bb.1.main_body:
999 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
1000 ; GFX12-NEXT: {{ $}}
1001 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1002 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1003 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1004 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1005 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1006 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1007 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1008 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1009 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1010 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1011 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1012 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1013 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1014 ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1015 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1016 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1017 ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1018 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1020 %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32 %data, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
1021 %out = bitcast i32 %v to float
1025 define amdgpu_ps float @atomic_add_3d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %r) {
1026 ; GFX9-LABEL: name: atomic_add_3d
1027 ; GFX9: bb.1.main_body:
1028 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1030 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1031 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1032 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1033 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1034 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1035 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1036 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1037 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1038 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1039 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1040 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1041 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1042 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1043 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1044 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1045 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1046 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1047 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1048 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1049 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
1050 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
1051 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1052 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1054 ; GFX10NSA-LABEL: name: atomic_add_3d
1055 ; GFX10NSA: bb.1.main_body:
1056 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1057 ; GFX10NSA-NEXT: {{ $}}
1058 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1059 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1060 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1061 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1062 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1063 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1064 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1065 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1066 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1067 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1068 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1069 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1070 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1071 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1072 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1073 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1074 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1075 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1076 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1077 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
1078 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1079 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1080 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1082 ; GFX12-LABEL: name: atomic_add_3d
1083 ; GFX12: bb.1.main_body:
1084 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1085 ; GFX12-NEXT: {{ $}}
1086 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1087 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1088 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1089 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1090 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1091 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1092 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1093 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1094 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1095 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1096 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1097 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1098 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1099 ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1100 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1101 ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1102 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1103 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1104 ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1105 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1106 ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1107 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1109 %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32 %data, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
1110 %out = bitcast i32 %v to float
1114 define amdgpu_ps float @atomic_add_cube(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %face) {
1115 ; GFX9-LABEL: name: atomic_add_cube
1116 ; GFX9: bb.1.main_body:
1117 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1119 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1120 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1121 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1122 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1123 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1124 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1125 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1126 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1127 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1128 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1129 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1130 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1131 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1132 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1133 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1134 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1135 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1136 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1137 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1138 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
1139 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
1140 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1141 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1143 ; GFX10NSA-LABEL: name: atomic_add_cube
1144 ; GFX10NSA: bb.1.main_body:
1145 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1146 ; GFX10NSA-NEXT: {{ $}}
1147 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1148 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1149 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1150 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1151 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1152 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1153 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1154 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1155 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1156 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1157 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1158 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1159 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1160 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1161 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1162 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1163 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1164 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1165 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1166 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
1167 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1168 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1169 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1171 ; GFX12-LABEL: name: atomic_add_cube
1172 ; GFX12: bb.1.main_body:
1173 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1174 ; GFX12-NEXT: {{ $}}
1175 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1176 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1177 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1178 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1179 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1180 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1181 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1182 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1183 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1184 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1185 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1186 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1187 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1188 ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1189 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1190 ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1191 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1192 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1193 ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1194 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1195 ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1196 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1198 %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32 %data, i16 %s, i16 %t, i16 %face, <8 x i32> %rsrc, i32 0, i32 0)
1199 %out = bitcast i32 %v to float
1203 define amdgpu_ps float @atomic_add_1darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %slice) {
1204 ; GFX9-LABEL: name: atomic_add_1darray
1205 ; GFX9: bb.1.main_body:
1206 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
1208 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1209 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1210 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1211 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1212 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1213 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1214 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1215 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1216 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1217 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1218 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1219 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1220 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1221 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1222 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1223 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
1224 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1225 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1227 ; GFX10NSA-LABEL: name: atomic_add_1darray
1228 ; GFX10NSA: bb.1.main_body:
1229 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
1230 ; GFX10NSA-NEXT: {{ $}}
1231 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1232 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1233 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1234 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1235 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1236 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1237 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1238 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1239 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1240 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1241 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1242 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1243 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1244 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1245 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1246 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1247 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1248 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1250 ; GFX12-LABEL: name: atomic_add_1darray
1251 ; GFX12: bb.1.main_body:
1252 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
1253 ; GFX12-NEXT: {{ $}}
1254 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1255 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1256 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1257 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1258 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1259 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1260 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1261 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1262 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1263 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1264 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1265 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1266 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1267 ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1268 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1269 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1270 ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1271 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1273 %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32 %data, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1274 %out = bitcast i32 %v to float
1278 define amdgpu_ps float @atomic_add_2darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice) {
1279 ; GFX9-LABEL: name: atomic_add_2darray
1280 ; GFX9: bb.1.main_body:
1281 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1283 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1284 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1285 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1286 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1287 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1288 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1289 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1290 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1291 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1292 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1293 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1294 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1295 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1296 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1297 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1298 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1299 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1300 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1301 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1302 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
1303 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
1304 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1305 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1307 ; GFX10NSA-LABEL: name: atomic_add_2darray
1308 ; GFX10NSA: bb.1.main_body:
1309 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1310 ; GFX10NSA-NEXT: {{ $}}
1311 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1312 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1313 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1314 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1315 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1316 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1317 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1318 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1319 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1320 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1321 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1322 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1323 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1324 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1325 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1326 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1327 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1328 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1329 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1330 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
1331 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1332 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1333 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1335 ; GFX12-LABEL: name: atomic_add_2darray
1336 ; GFX12: bb.1.main_body:
1337 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1338 ; GFX12-NEXT: {{ $}}
1339 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1340 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1341 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1342 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1343 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1344 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1345 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1346 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1347 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1348 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1349 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1350 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1351 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1352 ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1353 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1354 ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1355 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1356 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1357 ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1358 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1359 ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1360 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1362 %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1363 %out = bitcast i32 %v to float
1367 define amdgpu_ps float @atomic_add_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %fragid) {
1368 ; GFX9-LABEL: name: atomic_add_2dmsaa
1369 ; GFX9: bb.1.main_body:
1370 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1372 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1373 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1374 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1375 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1376 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1377 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1378 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1379 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1380 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1381 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1382 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1383 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1384 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1385 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1386 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1387 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1388 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1389 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1390 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1391 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
1392 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
1393 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1394 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1396 ; GFX10NSA-LABEL: name: atomic_add_2dmsaa
1397 ; GFX10NSA: bb.1.main_body:
1398 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1399 ; GFX10NSA-NEXT: {{ $}}
1400 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1401 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1402 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1403 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1404 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1405 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1406 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1407 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1408 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1409 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1410 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1411 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1412 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1413 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1414 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1415 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1416 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1417 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1418 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1419 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
1420 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1421 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1422 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1424 ; GFX12-LABEL: name: atomic_add_2dmsaa
1425 ; GFX12: bb.1.main_body:
1426 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1427 ; GFX12-NEXT: {{ $}}
1428 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1429 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1430 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1431 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1432 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1433 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1434 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1435 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1436 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1437 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1438 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1439 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1440 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1441 ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1442 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1443 ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1444 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1445 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1446 ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1447 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1448 ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1449 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1451 %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1452 %out = bitcast i32 %v to float
1456 define amdgpu_ps float @atomic_add_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
1457 ; GFX9-LABEL: name: atomic_add_2darraymsaa
1458 ; GFX9: bb.1.main_body:
1459 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1461 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1462 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1463 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1464 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1465 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1466 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1467 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1468 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1469 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1470 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1471 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1472 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1473 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1474 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1475 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1476 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1477 ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
1478 ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
1479 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1480 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
1481 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
1482 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
1483 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1484 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1486 ; GFX10NSA-LABEL: name: atomic_add_2darraymsaa
1487 ; GFX10NSA: bb.1.main_body:
1488 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1489 ; GFX10NSA-NEXT: {{ $}}
1490 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1491 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1492 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1493 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1494 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1495 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1496 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1497 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1498 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1499 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1500 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1501 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1502 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1503 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1504 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1505 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1506 ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
1507 ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
1508 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1509 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
1510 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
1511 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1512 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1513 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1515 ; GFX12-LABEL: name: atomic_add_2darraymsaa
1516 ; GFX12: bb.1.main_body:
1517 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1518 ; GFX12-NEXT: {{ $}}
1519 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1520 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1521 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1522 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1523 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1524 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1525 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1526 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1527 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1528 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1529 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1530 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1531 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1532 ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1533 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1534 ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1535 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
1536 ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
1537 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1538 ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
1539 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1540 ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1541 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1543 %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1544 %out = bitcast i32 %v to float
1548 define amdgpu_ps float @atomic_add_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
1549 ; GFX9-LABEL: name: atomic_add_1d_slc
1550 ; GFX9: bb.1.main_body:
1551 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1553 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1554 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1555 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1556 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1557 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1558 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1559 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1560 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1561 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1562 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1563 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1564 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1565 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1566 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
1567 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
1568 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1569 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1571 ; GFX10NSA-LABEL: name: atomic_add_1d_slc
1572 ; GFX10NSA: bb.1.main_body:
1573 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1574 ; GFX10NSA-NEXT: {{ $}}
1575 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1576 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1577 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1578 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1579 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1580 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1581 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1582 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1583 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1584 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1585 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1586 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1587 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1588 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
1589 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1590 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1591 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1593 ; GFX12-LABEL: name: atomic_add_1d_slc
1594 ; GFX12: bb.1.main_body:
1595 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1596 ; GFX12-NEXT: {{ $}}
1597 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1598 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1599 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1600 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1601 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1602 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1603 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1604 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1605 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1606 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1607 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1608 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1609 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1610 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
1611 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1612 ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1613 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1615 %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 2)
1616 %out = bitcast i32 %v to float
1620 define amdgpu_ps float @atomic_cmpswap_2d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s, i16 %t) {
1621 ; GFX9-LABEL: name: atomic_cmpswap_2d
1622 ; GFX9: bb.1.main_body:
1623 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1625 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1626 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1627 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1628 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1629 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1630 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1631 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1632 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1633 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1634 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1635 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1636 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1637 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1638 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1639 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1640 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1641 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1642 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
1643 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1644 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1646 ; GFX10NSA-LABEL: name: atomic_cmpswap_2d
1647 ; GFX10NSA: bb.1.main_body:
1648 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1649 ; GFX10NSA-NEXT: {{ $}}
1650 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1651 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1652 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1653 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1654 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1655 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1656 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1657 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1658 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1659 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1660 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1661 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1662 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1663 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1664 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1665 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1666 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1667 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1668 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1669 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1671 ; GFX12-LABEL: name: atomic_cmpswap_2d
1672 ; GFX12: bb.1.main_body:
1673 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1674 ; GFX12-NEXT: {{ $}}
1675 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1676 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1677 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1678 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1679 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1680 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1681 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1682 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1683 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1684 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1685 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1686 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1687 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1688 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1689 ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1690 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1691 ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1692 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1693 ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1694 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1696 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.2d.i32.i16(i32 %cmp, i32 %swap, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
1697 %out = bitcast i32 %v to float
1701 define amdgpu_ps float @atomic_cmpswap_3d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %r) {
1702 ; GFX9-LABEL: name: atomic_cmpswap_3d
1703 ; GFX9: bb.1.main_body:
1704 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1706 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1707 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1708 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1709 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1710 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1711 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1712 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1713 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1714 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1715 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1716 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1717 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1718 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1719 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1720 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1721 ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
1722 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
1723 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1724 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1725 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1726 ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1727 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
1728 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
1729 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1730 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1732 ; GFX10NSA-LABEL: name: atomic_cmpswap_3d
1733 ; GFX10NSA: bb.1.main_body:
1734 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1735 ; GFX10NSA-NEXT: {{ $}}
1736 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1737 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1738 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1739 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1740 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1741 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1742 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1743 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1744 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1745 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1746 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1747 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1748 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1749 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1750 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1751 ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
1752 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
1753 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1754 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1755 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1756 ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1757 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
1758 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1759 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1760 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1762 ; GFX12-LABEL: name: atomic_cmpswap_3d
1763 ; GFX12: bb.1.main_body:
1764 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1765 ; GFX12-NEXT: {{ $}}
1766 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1767 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1768 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1769 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1770 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1771 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1772 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1773 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1774 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1775 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1776 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1777 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1778 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1779 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1780 ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1781 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
1782 ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
1783 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1784 ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1785 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1786 ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1787 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1788 ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1789 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1791 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.3d.i32.i16(i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
1792 %out = bitcast i32 %v to float
1796 define amdgpu_ps float @atomic_cmpswap_2darraymsaa(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
1797 ; GFX9-LABEL: name: atomic_cmpswap_2darraymsaa
1798 ; GFX9: bb.1.main_body:
1799 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1801 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1802 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1803 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1804 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1805 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1806 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1807 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1808 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1809 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1810 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1811 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1812 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1813 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1814 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1815 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1816 ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
1817 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
1818 ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
1819 ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32)
1820 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1821 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1822 ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
1823 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
1824 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
1825 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1826 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1828 ; GFX10NSA-LABEL: name: atomic_cmpswap_2darraymsaa
1829 ; GFX10NSA: bb.1.main_body:
1830 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1831 ; GFX10NSA-NEXT: {{ $}}
1832 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1833 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1834 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1835 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1836 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1837 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1838 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1839 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1840 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1841 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1842 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1843 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1844 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1845 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1846 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1847 ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
1848 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
1849 ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
1850 ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32)
1851 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1852 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1853 ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
1854 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
1855 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1856 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1857 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1859 ; GFX12-LABEL: name: atomic_cmpswap_2darraymsaa
1860 ; GFX12: bb.1.main_body:
1861 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1862 ; GFX12-NEXT: {{ $}}
1863 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1864 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1865 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1866 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1867 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1868 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1869 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1870 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1871 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1872 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1873 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1874 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1875 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1876 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1877 ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1878 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
1879 ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
1880 ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
1881 ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32)
1882 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1883 ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1884 ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
1885 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1886 ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1887 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1889 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.2darraymsaa.i32.i16(i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1890 %out = bitcast i32 %v to float
1894 declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1895 declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1896 declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1897 declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1898 declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1899 declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1900 declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1901 declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1902 declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1903 declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1904 declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1905 declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1906 declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32, i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1907 declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1908 declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1909 declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1910 declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1911 declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1912 declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1913 declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1914 declare i32 @llvm.amdgcn.image.atomic.cmpswap.2d.i32.i16(i32, i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1915 declare i32 @llvm.amdgcn.image.atomic.cmpswap.3d.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1916 declare i32 @llvm.amdgcn.image.atomic.cmpswap.cube.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1917 declare i32 @llvm.amdgcn.image.atomic.cmpswap.1darray.i32.i16(i32, i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1918 declare i32 @llvm.amdgcn.image.atomic.cmpswap.2darray.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1919 declare i32 @llvm.amdgcn.image.atomic.cmpswap.2dmsaa.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1920 declare i32 @llvm.amdgcn.image.atomic.cmpswap.2darraymsaa.i32.i16(i32, i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1922 attributes #0 = { nounwind }