1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10NSA %s
5 define amdgpu_ps float @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
6 ; GFX9-LABEL: name: atomic_swap_1d
7 ; GFX9: bb.1.main_body:
8 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
9 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
10 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
11 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
12 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
13 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
14 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
15 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
16 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
17 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
18 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
19 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
20 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
21 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
22 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
23 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
24 ; GFX10NSA-LABEL: name: atomic_swap_1d
25 ; GFX10NSA: bb.1.main_body:
26 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
27 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
28 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
29 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
30 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
31 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
32 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
33 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
34 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
35 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
36 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
37 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
38 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
39 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
40 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
41 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
43 %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
44 %out = bitcast i32 %v to float
48 define amdgpu_ps float @atomic_add_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
49 ; GFX9-LABEL: name: atomic_add_1d
50 ; GFX9: bb.1.main_body:
51 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
52 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
53 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
54 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
55 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
56 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
57 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
58 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
59 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
60 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
61 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
62 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
63 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
64 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
65 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
66 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
67 ; GFX10NSA-LABEL: name: atomic_add_1d
68 ; GFX10NSA: bb.1.main_body:
69 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
70 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
71 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
72 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
73 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
74 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
75 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
76 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
77 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
78 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
79 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
80 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
81 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
82 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
83 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
84 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
86 %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
87 %out = bitcast i32 %v to float
91 define amdgpu_ps float @atomic_sub_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
92 ; GFX9-LABEL: name: atomic_sub_1d
93 ; GFX9: bb.1.main_body:
94 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
95 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
96 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
97 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
98 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
99 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
100 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
101 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
102 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
103 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
104 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
105 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
106 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
107 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
108 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
109 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
110 ; GFX10NSA-LABEL: name: atomic_sub_1d
111 ; GFX10NSA: bb.1.main_body:
112 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
113 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
114 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
115 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
116 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
117 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
118 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
119 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
120 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
121 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
122 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
123 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
124 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
125 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
126 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
127 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
129 %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
130 %out = bitcast i32 %v to float
134 define amdgpu_ps float @atomic_smin_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
135 ; GFX9-LABEL: name: atomic_smin_1d
136 ; GFX9: bb.1.main_body:
137 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
138 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
139 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
140 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
141 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
142 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
143 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
144 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
145 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
146 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
147 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
148 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
149 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
150 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
151 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
152 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
153 ; GFX10NSA-LABEL: name: atomic_smin_1d
154 ; GFX10NSA: bb.1.main_body:
155 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
156 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
157 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
158 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
159 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
160 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
161 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
162 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
163 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
164 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
165 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
166 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
167 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
168 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
169 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
170 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
172 %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
173 %out = bitcast i32 %v to float
178 define amdgpu_ps float @atomic_umin_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
179 ; GFX9-LABEL: name: atomic_umin_1d
180 ; GFX9: bb.1.main_body:
181 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
182 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
183 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
184 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
185 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
186 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
187 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
188 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
189 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
190 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
191 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
192 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
193 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
194 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
195 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
196 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
197 ; GFX10NSA-LABEL: name: atomic_umin_1d
198 ; GFX10NSA: bb.1.main_body:
199 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
200 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
201 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
202 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
203 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
204 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
205 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
206 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
207 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
208 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
209 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
210 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
211 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
212 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
213 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
214 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
216 %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
217 %out = bitcast i32 %v to float
221 define amdgpu_ps float @atomic_smax_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
222 ; GFX9-LABEL: name: atomic_smax_1d
223 ; GFX9: bb.1.main_body:
224 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
225 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
226 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
227 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
228 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
229 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
230 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
231 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
232 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
233 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
234 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
235 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
236 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
237 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
238 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
239 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
240 ; GFX10NSA-LABEL: name: atomic_smax_1d
241 ; GFX10NSA: bb.1.main_body:
242 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
243 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
244 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
245 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
246 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
247 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
248 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
249 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
250 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
251 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
252 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
253 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
254 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
255 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
256 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
257 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
259 %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
260 %out = bitcast i32 %v to float
264 define amdgpu_ps float @atomic_umax_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
265 ; GFX9-LABEL: name: atomic_umax_1d
266 ; GFX9: bb.1.main_body:
267 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
268 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
269 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
270 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
271 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
272 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
273 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
274 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
275 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
276 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
277 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
278 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
279 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
280 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
281 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
282 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
283 ; GFX10NSA-LABEL: name: atomic_umax_1d
284 ; GFX10NSA: bb.1.main_body:
285 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
286 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
287 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
288 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
289 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
290 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
291 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
292 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
293 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
294 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
295 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
296 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
297 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
298 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
299 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
300 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
302 %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
303 %out = bitcast i32 %v to float
307 define amdgpu_ps float @atomic_and_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
308 ; GFX9-LABEL: name: atomic_and_1d
309 ; GFX9: bb.1.main_body:
310 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
311 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
312 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
313 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
314 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
315 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
316 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
317 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
318 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
319 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
320 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
321 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
322 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
323 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
324 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
325 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
326 ; GFX10NSA-LABEL: name: atomic_and_1d
327 ; GFX10NSA: bb.1.main_body:
328 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
329 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
330 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
331 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
332 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
333 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
334 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
335 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
336 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
337 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
338 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
339 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
340 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
341 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
342 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
343 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
345 %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
346 %out = bitcast i32 %v to float
350 define amdgpu_ps float @atomic_or_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
351 ; GFX9-LABEL: name: atomic_or_1d
352 ; GFX9: bb.1.main_body:
353 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
354 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
355 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
356 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
357 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
358 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
359 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
360 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
361 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
362 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
363 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
364 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
365 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
366 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
367 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
368 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
369 ; GFX10NSA-LABEL: name: atomic_or_1d
370 ; GFX10NSA: bb.1.main_body:
371 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
372 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
373 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
374 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
375 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
376 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
377 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
378 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
379 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
380 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
381 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
382 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
383 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
384 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
385 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
386 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
388 %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
389 %out = bitcast i32 %v to float
393 define amdgpu_ps float @atomic_xor_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
394 ; GFX9-LABEL: name: atomic_xor_1d
395 ; GFX9: bb.1.main_body:
396 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
397 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
398 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
399 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
400 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
401 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
402 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
403 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
404 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
405 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
406 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
407 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
408 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
409 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
410 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
411 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
412 ; GFX10NSA-LABEL: name: atomic_xor_1d
413 ; GFX10NSA: bb.1.main_body:
414 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
415 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
416 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
417 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
418 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
419 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
420 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
421 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
422 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
423 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
424 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
425 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
426 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
427 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
428 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
429 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
431 %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
432 %out = bitcast i32 %v to float
436 define amdgpu_ps float @atomic_inc_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
437 ; GFX9-LABEL: name: atomic_inc_1d
438 ; GFX9: bb.1.main_body:
439 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
440 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
441 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
442 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
443 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
444 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
445 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
446 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
447 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
448 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
449 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
450 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
451 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
452 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
453 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
454 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
455 ; GFX10NSA-LABEL: name: atomic_inc_1d
456 ; GFX10NSA: bb.1.main_body:
457 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
458 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
459 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
460 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
461 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
462 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
463 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
464 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
465 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
466 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
467 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
468 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
469 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
470 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
471 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
472 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
474 %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
475 %out = bitcast i32 %v to float
479 define amdgpu_ps float @atomic_dec_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
480 ; GFX9-LABEL: name: atomic_dec_1d
481 ; GFX9: bb.1.main_body:
482 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
483 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
484 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
485 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
486 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
487 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
488 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
489 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
490 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
491 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
492 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
493 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
494 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
495 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
496 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
497 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
498 ; GFX10NSA-LABEL: name: atomic_dec_1d
499 ; GFX10NSA: bb.1.main_body:
500 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
501 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
502 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
503 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
504 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
505 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
506 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
507 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
508 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
509 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
510 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
511 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
512 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
513 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
514 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
515 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
517 %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
518 %out = bitcast i32 %v to float
522 define amdgpu_ps float @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s) {
523 ; GFX9-LABEL: name: atomic_cmpswap_1d
524 ; GFX9: bb.1.main_body:
525 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
526 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
527 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
528 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
529 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
530 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
531 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
532 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
533 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
534 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
535 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
536 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
537 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
538 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
539 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
540 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
541 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
542 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
543 ; GFX10NSA-LABEL: name: atomic_cmpswap_1d
544 ; GFX10NSA: bb.1.main_body:
545 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
546 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
547 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
548 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
549 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
550 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
551 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
552 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
553 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
554 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
555 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
556 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
557 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
558 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
559 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
560 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
561 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
562 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
564 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32 %cmp, i32 %swap, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
565 %out = bitcast i32 %v to float
569 define amdgpu_ps float @atomic_add_2d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t) {
570 ; GFX9-LABEL: name: atomic_add_2d
571 ; GFX9: bb.1.main_body:
572 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
573 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
574 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
575 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
576 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
577 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
578 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
579 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
580 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
581 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
582 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
583 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
584 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
585 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
586 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
587 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
588 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
589 ; GFX10NSA-LABEL: name: atomic_add_2d
590 ; GFX10NSA: bb.1.main_body:
591 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
592 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
593 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
594 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
595 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
596 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
597 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
598 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
599 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
600 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
601 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
602 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
603 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
604 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
605 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
606 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
607 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
609 %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32 %data, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
610 %out = bitcast i32 %v to float
614 define amdgpu_ps float @atomic_add_3d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %r) {
615 ; GFX9-LABEL: name: atomic_add_3d
616 ; GFX9: bb.1.main_body:
617 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
618 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
619 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
620 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
621 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
622 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
623 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
624 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
625 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
626 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
627 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
628 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
629 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
630 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
631 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
632 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
633 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[DEF]](s32)
634 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
635 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
636 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
637 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
638 ; GFX10NSA-LABEL: name: atomic_add_3d
639 ; GFX10NSA: bb.1.main_body:
640 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
641 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
642 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
643 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
644 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
645 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
646 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
647 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
648 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
649 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
650 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
651 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
652 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
653 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
654 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
655 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
656 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[DEF]](s32)
657 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
658 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
659 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
660 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
662 %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32 %data, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
663 %out = bitcast i32 %v to float
667 define amdgpu_ps float @atomic_add_cube(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %face) {
668 ; GFX9-LABEL: name: atomic_add_cube
669 ; GFX9: bb.1.main_body:
670 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
671 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
672 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
673 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
674 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
675 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
676 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
677 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
678 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
679 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
680 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
681 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
682 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
683 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
684 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
685 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
686 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[DEF]](s32)
687 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
688 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
689 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
690 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
691 ; GFX10NSA-LABEL: name: atomic_add_cube
692 ; GFX10NSA: bb.1.main_body:
693 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
694 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
695 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
696 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
697 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
698 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
699 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
700 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
701 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
702 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
703 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
704 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
705 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
706 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
707 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
708 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
709 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[DEF]](s32)
710 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
711 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
712 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
713 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
715 %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32 %data, i16 %s, i16 %t, i16 %face, <8 x i32> %rsrc, i32 0, i32 0)
716 %out = bitcast i32 %v to float
720 define amdgpu_ps float @atomic_add_1darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %slice) {
721 ; GFX9-LABEL: name: atomic_add_1darray
722 ; GFX9: bb.1.main_body:
723 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
724 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
725 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
726 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
727 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
728 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
729 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
730 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
731 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
732 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
733 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
734 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
735 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
736 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
737 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
738 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
739 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
740 ; GFX10NSA-LABEL: name: atomic_add_1darray
741 ; GFX10NSA: bb.1.main_body:
742 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
743 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
744 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
745 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
746 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
747 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
748 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
749 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
750 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
751 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
752 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
753 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
754 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
755 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
756 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
757 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
758 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
760 %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32 %data, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
761 %out = bitcast i32 %v to float
765 define amdgpu_ps float @atomic_add_2darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice) {
766 ; GFX9-LABEL: name: atomic_add_2darray
767 ; GFX9: bb.1.main_body:
768 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
769 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
770 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
771 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
772 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
773 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
774 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
775 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
776 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
777 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
778 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
779 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
780 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
781 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
782 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
783 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
784 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[DEF]](s32)
785 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
786 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
787 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
788 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
789 ; GFX10NSA-LABEL: name: atomic_add_2darray
790 ; GFX10NSA: bb.1.main_body:
791 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
792 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
793 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
794 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
795 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
796 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
797 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
798 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
799 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
800 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
801 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
802 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
803 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
804 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
805 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
806 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
807 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[DEF]](s32)
808 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
809 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
810 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
811 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
813 %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
814 %out = bitcast i32 %v to float
818 define amdgpu_ps float @atomic_add_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %fragid) {
819 ; GFX9-LABEL: name: atomic_add_2dmsaa
820 ; GFX9: bb.1.main_body:
821 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
822 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
823 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
824 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
825 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
826 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
827 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
828 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
829 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
830 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
831 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
832 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
833 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
834 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
835 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
836 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
837 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[DEF]](s32)
838 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
839 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
840 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
841 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
842 ; GFX10NSA-LABEL: name: atomic_add_2dmsaa
843 ; GFX10NSA: bb.1.main_body:
844 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
845 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
846 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
847 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
848 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
849 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
850 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
851 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
852 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
853 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
854 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
855 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
856 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
857 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
858 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
859 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
860 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[DEF]](s32)
861 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
862 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
863 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
864 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
866 %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
867 %out = bitcast i32 %v to float
871 define amdgpu_ps float @atomic_add_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
872 ; GFX9-LABEL: name: atomic_add_2darraymsaa
873 ; GFX9: bb.1.main_body:
874 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
875 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
876 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
877 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
878 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
879 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
880 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
881 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
882 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
883 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
884 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
885 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
886 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
887 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
888 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
889 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
890 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
891 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
892 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
893 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
894 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
895 ; GFX10NSA-LABEL: name: atomic_add_2darraymsaa
896 ; GFX10NSA: bb.1.main_body:
897 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
898 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
899 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
900 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
901 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
902 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
903 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
904 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
905 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
906 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
907 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
908 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
909 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
910 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
911 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
912 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
913 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
914 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
915 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
916 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
917 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
919 %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
920 %out = bitcast i32 %v to float
924 define amdgpu_ps float @atomic_add_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
925 ; GFX9-LABEL: name: atomic_add_1d_slc
926 ; GFX9: bb.1.main_body:
927 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
928 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
929 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
930 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
931 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
932 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
933 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
934 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
935 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
936 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
937 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
938 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
939 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
940 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
941 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
942 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
943 ; GFX10NSA-LABEL: name: atomic_add_1d_slc
944 ; GFX10NSA: bb.1.main_body:
945 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
946 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
947 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
948 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
949 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
950 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
951 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
952 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
953 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
954 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
955 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
956 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
957 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
958 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
959 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
960 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
962 %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 2)
963 %out = bitcast i32 %v to float
967 define amdgpu_ps float @atomic_cmpswap_2d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s, i16 %t) {
968 ; GFX9-LABEL: name: atomic_cmpswap_2d
969 ; GFX9: bb.1.main_body:
970 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
971 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
972 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
973 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
974 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
975 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
976 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
977 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
978 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
979 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
980 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
981 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
982 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
983 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
984 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
985 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
986 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
987 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
988 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
989 ; GFX10NSA-LABEL: name: atomic_cmpswap_2d
990 ; GFX10NSA: bb.1.main_body:
991 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
992 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
993 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
994 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
995 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
996 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
997 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
998 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
999 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1000 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1001 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1002 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1003 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1004 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1005 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1006 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
1007 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
1008 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1009 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
1011 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.2d.i32.i16(i32 %cmp, i32 %swap, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
1012 %out = bitcast i32 %v to float
1016 define amdgpu_ps float @atomic_cmpswap_3d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %r) {
1017 ; GFX9-LABEL: name: atomic_cmpswap_3d
1018 ; GFX9: bb.1.main_body:
1019 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1020 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1021 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1022 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1023 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1024 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1025 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1026 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1027 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1028 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1029 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1030 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1031 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1032 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1033 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
1034 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1035 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
1036 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1037 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
1038 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1039 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
1040 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1041 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
1042 ; GFX10NSA-LABEL: name: atomic_cmpswap_3d
1043 ; GFX10NSA: bb.1.main_body:
1044 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1045 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1046 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1047 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1048 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1049 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1050 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1051 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1052 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1053 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1054 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1055 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1056 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1057 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1058 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
1059 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1060 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
1061 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1062 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
1063 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1064 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
1065 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1066 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
1068 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.3d.i32.i16(i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
1069 %out = bitcast i32 %v to float
1073 define amdgpu_ps float @atomic_cmpswap_2darraymsaa(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
1074 ; GFX9-LABEL: name: atomic_cmpswap_2darraymsaa
1075 ; GFX9: bb.1.main_body:
1076 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1077 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1078 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1079 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1080 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1081 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1082 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1083 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1084 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1085 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1086 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1087 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1088 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1089 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1090 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
1091 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
1092 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1093 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
1094 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
1095 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1096 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
1097 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1098 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
1099 ; GFX10NSA-LABEL: name: atomic_cmpswap_2darraymsaa
1100 ; GFX10NSA: bb.1.main_body:
1101 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1102 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1103 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1104 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1105 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1106 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1107 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1108 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1109 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1110 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1111 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1112 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1113 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1114 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1115 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
1116 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
1117 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1118 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
1119 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
1120 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1121 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
1122 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1123 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
1125 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.2darraymsaa.i32.i16(i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1126 %out = bitcast i32 %v to float
1130 declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1131 declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1132 declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1133 declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1134 declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1135 declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1136 declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1137 declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1138 declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1139 declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1140 declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1141 declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1142 declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32, i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1143 declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1144 declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1145 declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1146 declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1147 declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1148 declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1149 declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1150 declare i32 @llvm.amdgcn.image.atomic.cmpswap.2d.i32.i16(i32, i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1151 declare i32 @llvm.amdgcn.image.atomic.cmpswap.3d.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1152 declare i32 @llvm.amdgcn.image.atomic.cmpswap.cube.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1153 declare i32 @llvm.amdgcn.image.atomic.cmpswap.1darray.i32.i16(i32, i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1154 declare i32 @llvm.amdgcn.image.atomic.cmpswap.2darray.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1155 declare i32 @llvm.amdgcn.image.atomic.cmpswap.2dmsaa.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1156 declare i32 @llvm.amdgcn.image.atomic.cmpswap.2darraymsaa.i32.i16(i32, i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1158 attributes #0 = { nounwind }