1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10NSA %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10NSA %s
6 define amdgpu_ps float @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
7 ; GFX9-LABEL: name: atomic_swap_1d
8 ; GFX9: bb.1.main_body:
9 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
11 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
12 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
13 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
14 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
15 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
16 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
17 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
18 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
19 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
20 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
21 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
22 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
23 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
24 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
25 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
26 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
27 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
29 ; GFX10NSA-LABEL: name: atomic_swap_1d
30 ; GFX10NSA: bb.1.main_body:
31 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
32 ; GFX10NSA-NEXT: {{ $}}
33 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
34 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
35 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
36 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
37 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
38 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
39 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
40 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
41 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
42 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
43 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
44 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
45 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
46 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
47 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
48 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
49 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
51 %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
52 %out = bitcast i32 %v to float
56 define amdgpu_ps float @atomic_add_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
57 ; GFX9-LABEL: name: atomic_add_1d
58 ; GFX9: bb.1.main_body:
59 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
61 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
62 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
63 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
64 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
65 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
66 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
67 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
68 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
69 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
70 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
71 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
72 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
73 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
74 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
75 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
76 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
77 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
79 ; GFX10NSA-LABEL: name: atomic_add_1d
80 ; GFX10NSA: bb.1.main_body:
81 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
82 ; GFX10NSA-NEXT: {{ $}}
83 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
84 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
85 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
86 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
87 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
88 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
89 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
90 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
91 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
92 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
93 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
94 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
95 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
96 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
97 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
98 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
99 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
101 %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
102 %out = bitcast i32 %v to float
106 define amdgpu_ps float @atomic_sub_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
107 ; GFX9-LABEL: name: atomic_sub_1d
108 ; GFX9: bb.1.main_body:
109 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
111 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
112 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
113 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
114 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
115 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
116 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
117 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
118 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
119 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
120 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
121 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
122 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
123 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
124 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
125 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
126 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
127 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
129 ; GFX10NSA-LABEL: name: atomic_sub_1d
130 ; GFX10NSA: bb.1.main_body:
131 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
132 ; GFX10NSA-NEXT: {{ $}}
133 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
134 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
135 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
136 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
137 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
138 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
139 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
140 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
141 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
142 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
143 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
144 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
145 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
146 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
147 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
148 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
149 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
151 %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
152 %out = bitcast i32 %v to float
156 define amdgpu_ps float @atomic_smin_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
157 ; GFX9-LABEL: name: atomic_smin_1d
158 ; GFX9: bb.1.main_body:
159 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
161 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
162 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
163 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
164 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
165 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
166 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
167 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
168 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
169 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
170 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
171 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
172 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
173 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
174 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
175 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
176 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
177 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
179 ; GFX10NSA-LABEL: name: atomic_smin_1d
180 ; GFX10NSA: bb.1.main_body:
181 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
182 ; GFX10NSA-NEXT: {{ $}}
183 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
184 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
185 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
186 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
187 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
188 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
189 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
190 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
191 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
192 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
193 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
194 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
195 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
196 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
197 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
198 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
199 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
201 %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
202 %out = bitcast i32 %v to float
207 define amdgpu_ps float @atomic_umin_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
208 ; GFX9-LABEL: name: atomic_umin_1d
209 ; GFX9: bb.1.main_body:
210 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
212 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
213 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
214 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
215 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
216 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
217 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
218 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
219 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
220 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
221 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
222 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
223 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
224 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
225 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
226 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
227 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
228 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
230 ; GFX10NSA-LABEL: name: atomic_umin_1d
231 ; GFX10NSA: bb.1.main_body:
232 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
233 ; GFX10NSA-NEXT: {{ $}}
234 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
235 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
236 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
237 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
238 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
239 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
240 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
241 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
242 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
243 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
244 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
245 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
246 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
247 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
248 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
249 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
250 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
252 %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
253 %out = bitcast i32 %v to float
257 define amdgpu_ps float @atomic_smax_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
258 ; GFX9-LABEL: name: atomic_smax_1d
259 ; GFX9: bb.1.main_body:
260 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
262 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
263 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
264 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
265 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
266 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
267 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
268 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
269 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
270 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
271 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
272 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
273 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
274 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
275 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
276 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
277 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
278 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
280 ; GFX10NSA-LABEL: name: atomic_smax_1d
281 ; GFX10NSA: bb.1.main_body:
282 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
283 ; GFX10NSA-NEXT: {{ $}}
284 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
285 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
286 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
287 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
288 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
289 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
290 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
291 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
292 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
293 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
294 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
295 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
296 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
297 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
298 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
299 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
300 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
302 %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
303 %out = bitcast i32 %v to float
307 define amdgpu_ps float @atomic_umax_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
308 ; GFX9-LABEL: name: atomic_umax_1d
309 ; GFX9: bb.1.main_body:
310 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
312 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
313 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
314 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
315 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
316 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
317 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
318 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
319 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
320 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
321 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
322 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
323 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
324 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
325 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
326 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
327 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
328 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
330 ; GFX10NSA-LABEL: name: atomic_umax_1d
331 ; GFX10NSA: bb.1.main_body:
332 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
333 ; GFX10NSA-NEXT: {{ $}}
334 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
335 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
336 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
337 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
338 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
339 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
340 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
341 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
342 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
343 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
344 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
345 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
346 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
347 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
348 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
349 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
350 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
352 %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
353 %out = bitcast i32 %v to float
357 define amdgpu_ps float @atomic_and_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
358 ; GFX9-LABEL: name: atomic_and_1d
359 ; GFX9: bb.1.main_body:
360 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
362 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
363 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
364 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
365 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
366 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
367 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
368 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
369 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
370 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
371 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
372 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
373 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
374 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
375 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
376 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
377 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
378 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
380 ; GFX10NSA-LABEL: name: atomic_and_1d
381 ; GFX10NSA: bb.1.main_body:
382 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
383 ; GFX10NSA-NEXT: {{ $}}
384 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
385 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
386 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
387 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
388 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
389 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
390 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
391 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
392 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
393 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
394 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
395 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
396 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
397 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
398 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
399 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
400 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
402 %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
403 %out = bitcast i32 %v to float
407 define amdgpu_ps float @atomic_or_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
408 ; GFX9-LABEL: name: atomic_or_1d
409 ; GFX9: bb.1.main_body:
410 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
412 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
413 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
414 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
415 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
416 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
417 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
418 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
419 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
420 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
421 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
422 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
423 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
424 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
425 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
426 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
427 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
428 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
430 ; GFX10NSA-LABEL: name: atomic_or_1d
431 ; GFX10NSA: bb.1.main_body:
432 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
433 ; GFX10NSA-NEXT: {{ $}}
434 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
435 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
436 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
437 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
438 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
439 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
440 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
441 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
442 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
443 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
444 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
445 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
446 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
447 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
448 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
449 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
450 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
452 %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
453 %out = bitcast i32 %v to float
457 define amdgpu_ps float @atomic_xor_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
458 ; GFX9-LABEL: name: atomic_xor_1d
459 ; GFX9: bb.1.main_body:
460 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
462 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
463 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
464 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
465 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
466 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
467 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
468 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
469 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
470 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
471 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
472 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
473 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
474 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
475 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
476 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
477 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
478 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
480 ; GFX10NSA-LABEL: name: atomic_xor_1d
481 ; GFX10NSA: bb.1.main_body:
482 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
483 ; GFX10NSA-NEXT: {{ $}}
484 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
485 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
486 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
487 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
488 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
489 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
490 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
491 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
492 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
493 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
494 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
495 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
496 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
497 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
498 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
499 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
500 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
502 %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
503 %out = bitcast i32 %v to float
507 define amdgpu_ps float @atomic_inc_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
508 ; GFX9-LABEL: name: atomic_inc_1d
509 ; GFX9: bb.1.main_body:
510 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
512 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
513 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
514 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
515 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
516 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
517 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
518 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
519 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
520 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
521 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
522 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
523 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
524 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
525 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
526 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
527 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
528 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
530 ; GFX10NSA-LABEL: name: atomic_inc_1d
531 ; GFX10NSA: bb.1.main_body:
532 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
533 ; GFX10NSA-NEXT: {{ $}}
534 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
535 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
536 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
537 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
538 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
539 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
540 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
541 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
542 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
543 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
544 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
545 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
546 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
547 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
548 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
549 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
550 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
552 %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
553 %out = bitcast i32 %v to float
557 define amdgpu_ps float @atomic_dec_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
558 ; GFX9-LABEL: name: atomic_dec_1d
559 ; GFX9: bb.1.main_body:
560 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
562 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
563 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
564 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
565 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
566 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
567 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
568 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
569 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
570 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
571 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
572 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
573 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
574 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
575 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
576 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
577 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
578 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
580 ; GFX10NSA-LABEL: name: atomic_dec_1d
581 ; GFX10NSA: bb.1.main_body:
582 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
583 ; GFX10NSA-NEXT: {{ $}}
584 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
585 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
586 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
587 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
588 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
589 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
590 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
591 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
592 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
593 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
594 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
595 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
596 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
597 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
598 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
599 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
600 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
602 %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
603 %out = bitcast i32 %v to float
607 define amdgpu_ps float @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s) {
608 ; GFX9-LABEL: name: atomic_cmpswap_1d
609 ; GFX9: bb.1.main_body:
610 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
612 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
613 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
614 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
615 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
616 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
617 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
618 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
619 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
620 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
621 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
622 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
623 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
624 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
625 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
626 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
627 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
628 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
629 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
630 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
632 ; GFX10NSA-LABEL: name: atomic_cmpswap_1d
633 ; GFX10NSA: bb.1.main_body:
634 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
635 ; GFX10NSA-NEXT: {{ $}}
636 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
637 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
638 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
639 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
640 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
641 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
642 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
643 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
644 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
645 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
646 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
647 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
648 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
649 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
650 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
651 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
652 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
653 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
654 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
656 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32 %cmp, i32 %swap, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
657 %out = bitcast i32 %v to float
661 define amdgpu_ps float @atomic_add_2d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t) {
662 ; GFX9-LABEL: name: atomic_add_2d
663 ; GFX9: bb.1.main_body:
664 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
666 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
667 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
668 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
669 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
670 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
671 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
672 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
673 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
674 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
675 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
676 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
677 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
678 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
679 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
680 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
681 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
682 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
683 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
685 ; GFX10NSA-LABEL: name: atomic_add_2d
686 ; GFX10NSA: bb.1.main_body:
687 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
688 ; GFX10NSA-NEXT: {{ $}}
689 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
690 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
691 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
692 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
693 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
694 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
695 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
696 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
697 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
698 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
699 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
700 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
701 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
702 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
703 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
704 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
705 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
706 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
708 %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32 %data, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
709 %out = bitcast i32 %v to float
713 define amdgpu_ps float @atomic_add_3d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %r) {
714 ; GFX9-LABEL: name: atomic_add_3d
715 ; GFX9: bb.1.main_body:
716 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
718 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
719 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
720 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
721 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
722 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
723 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
724 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
725 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
726 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
727 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
728 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
729 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
730 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
731 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
732 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
733 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
734 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
735 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
736 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
737 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
738 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
739 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
740 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
742 ; GFX10NSA-LABEL: name: atomic_add_3d
743 ; GFX10NSA: bb.1.main_body:
744 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
745 ; GFX10NSA-NEXT: {{ $}}
746 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
747 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
748 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
749 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
750 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
751 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
752 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
753 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
754 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
755 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
756 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
757 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
758 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
759 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
760 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
761 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
762 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
763 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
764 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
765 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
766 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
767 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
768 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
770 %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32 %data, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
771 %out = bitcast i32 %v to float
775 define amdgpu_ps float @atomic_add_cube(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %face) {
776 ; GFX9-LABEL: name: atomic_add_cube
777 ; GFX9: bb.1.main_body:
778 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
780 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
781 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
782 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
783 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
784 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
785 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
786 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
787 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
788 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
789 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
790 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
791 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
792 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
793 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
794 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
795 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
796 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
797 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
798 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
799 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
800 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
801 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
802 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
804 ; GFX10NSA-LABEL: name: atomic_add_cube
805 ; GFX10NSA: bb.1.main_body:
806 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
807 ; GFX10NSA-NEXT: {{ $}}
808 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
809 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
810 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
811 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
812 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
813 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
814 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
815 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
816 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
817 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
818 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
819 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
820 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
821 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
822 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
823 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
824 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
825 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
826 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
827 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
828 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
829 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
830 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
832 %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32 %data, i16 %s, i16 %t, i16 %face, <8 x i32> %rsrc, i32 0, i32 0)
833 %out = bitcast i32 %v to float
837 define amdgpu_ps float @atomic_add_1darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %slice) {
838 ; GFX9-LABEL: name: atomic_add_1darray
839 ; GFX9: bb.1.main_body:
840 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
842 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
843 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
844 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
845 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
846 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
847 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
848 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
849 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
850 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
851 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
852 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
853 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
854 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
855 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
856 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
857 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
858 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
859 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
861 ; GFX10NSA-LABEL: name: atomic_add_1darray
862 ; GFX10NSA: bb.1.main_body:
863 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
864 ; GFX10NSA-NEXT: {{ $}}
865 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
866 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
867 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
868 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
869 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
870 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
871 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
872 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
873 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
874 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
875 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
876 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
877 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
878 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
879 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
880 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
881 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
882 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
884 %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32 %data, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
885 %out = bitcast i32 %v to float
889 define amdgpu_ps float @atomic_add_2darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice) {
890 ; GFX9-LABEL: name: atomic_add_2darray
891 ; GFX9: bb.1.main_body:
892 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
894 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
895 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
896 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
897 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
898 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
899 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
900 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
901 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
902 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
903 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
904 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
905 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
906 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
907 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
908 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
909 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
910 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
911 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
912 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
913 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
914 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
915 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
916 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
918 ; GFX10NSA-LABEL: name: atomic_add_2darray
919 ; GFX10NSA: bb.1.main_body:
920 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
921 ; GFX10NSA-NEXT: {{ $}}
922 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
923 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
924 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
925 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
926 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
927 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
928 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
929 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
930 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
931 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
932 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
933 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
934 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
935 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
936 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
937 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
938 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
939 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
940 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
941 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
942 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
943 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
944 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
946 %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
947 %out = bitcast i32 %v to float
951 define amdgpu_ps float @atomic_add_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %fragid) {
952 ; GFX9-LABEL: name: atomic_add_2dmsaa
953 ; GFX9: bb.1.main_body:
954 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
956 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
957 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
958 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
959 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
960 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
961 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
962 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
963 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
964 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
965 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
966 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
967 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
968 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
969 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
970 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
971 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
972 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
973 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
974 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
975 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
976 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
977 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
978 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
980 ; GFX10NSA-LABEL: name: atomic_add_2dmsaa
981 ; GFX10NSA: bb.1.main_body:
982 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
983 ; GFX10NSA-NEXT: {{ $}}
984 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
985 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
986 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
987 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
988 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
989 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
990 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
991 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
992 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
993 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
994 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
995 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
996 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
997 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
998 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
999 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1000 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1001 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1002 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1003 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
1004 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1005 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1006 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1008 %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1009 %out = bitcast i32 %v to float
1013 define amdgpu_ps float @atomic_add_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
1014 ; GFX9-LABEL: name: atomic_add_2darraymsaa
1015 ; GFX9: bb.1.main_body:
1016 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1018 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1019 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1020 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1021 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1022 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1023 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1024 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1025 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1026 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1027 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1028 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1029 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1030 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1031 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1032 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1033 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1034 ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
1035 ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
1036 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1037 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
1038 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
1039 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
1040 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1041 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1043 ; GFX10NSA-LABEL: name: atomic_add_2darraymsaa
1044 ; GFX10NSA: bb.1.main_body:
1045 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1046 ; GFX10NSA-NEXT: {{ $}}
1047 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1048 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1049 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1050 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1051 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1052 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1053 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1054 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1055 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1056 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1057 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1058 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1059 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1060 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1061 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1062 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1063 ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
1064 ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
1065 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1066 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
1067 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
1068 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1069 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1070 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1072 %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1073 %out = bitcast i32 %v to float
1077 define amdgpu_ps float @atomic_add_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
1078 ; GFX9-LABEL: name: atomic_add_1d_slc
1079 ; GFX9: bb.1.main_body:
1080 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1082 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1083 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1084 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1085 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1086 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1087 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1088 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1089 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1090 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1091 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1092 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1093 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1094 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1095 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
1096 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
1097 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1098 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1100 ; GFX10NSA-LABEL: name: atomic_add_1d_slc
1101 ; GFX10NSA: bb.1.main_body:
1102 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1103 ; GFX10NSA-NEXT: {{ $}}
1104 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1105 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1106 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1107 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1108 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1109 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1110 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1111 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1112 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1113 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1114 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1115 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
1116 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1117 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
1118 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1119 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1120 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1122 %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 2)
1123 %out = bitcast i32 %v to float
1127 define amdgpu_ps float @atomic_cmpswap_2d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s, i16 %t) {
1128 ; GFX9-LABEL: name: atomic_cmpswap_2d
1129 ; GFX9: bb.1.main_body:
1130 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1132 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1133 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1134 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1135 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1136 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1137 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1138 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1139 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1140 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1141 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1142 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1143 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1144 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1145 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1146 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1147 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1148 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1149 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
1150 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1151 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1153 ; GFX10NSA-LABEL: name: atomic_cmpswap_2d
1154 ; GFX10NSA: bb.1.main_body:
1155 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1156 ; GFX10NSA-NEXT: {{ $}}
1157 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1158 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1159 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1160 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1161 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1162 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1163 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1164 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1165 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1166 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1167 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1168 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1169 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1170 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1171 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1172 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1173 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1174 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1175 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1176 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1178 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.2d.i32.i16(i32 %cmp, i32 %swap, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
1179 %out = bitcast i32 %v to float
1183 define amdgpu_ps float @atomic_cmpswap_3d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %r) {
1184 ; GFX9-LABEL: name: atomic_cmpswap_3d
1185 ; GFX9: bb.1.main_body:
1186 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1188 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1189 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1190 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1191 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1192 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1193 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1194 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1195 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1196 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1197 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1198 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1199 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1200 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1201 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1202 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1203 ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
1204 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
1205 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1206 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1207 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1208 ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1209 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
1210 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
1211 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1212 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1214 ; GFX10NSA-LABEL: name: atomic_cmpswap_3d
1215 ; GFX10NSA: bb.1.main_body:
1216 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1217 ; GFX10NSA-NEXT: {{ $}}
1218 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1219 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1220 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1221 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1222 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1223 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1224 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1225 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1226 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1227 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1228 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1229 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1230 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1231 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1232 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1233 ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
1234 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
1235 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1236 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1237 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1238 ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1239 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
1240 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1241 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1242 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1244 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.3d.i32.i16(i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
1245 %out = bitcast i32 %v to float
1249 define amdgpu_ps float @atomic_cmpswap_2darraymsaa(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
1250 ; GFX9-LABEL: name: atomic_cmpswap_2darraymsaa
1251 ; GFX9: bb.1.main_body:
1252 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1254 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1255 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1256 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1257 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1258 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1259 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1260 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1261 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1262 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1263 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1264 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1265 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1266 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1267 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1268 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1269 ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
1270 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
1271 ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
1272 ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32)
1273 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1274 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1275 ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
1276 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
1277 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8)
1278 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1279 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1281 ; GFX10NSA-LABEL: name: atomic_cmpswap_2darraymsaa
1282 ; GFX10NSA: bb.1.main_body:
1283 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1284 ; GFX10NSA-NEXT: {{ $}}
1285 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1286 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1287 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1288 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1289 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1290 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1291 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1292 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1293 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1294 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1295 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1296 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1297 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
1298 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1299 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
1300 ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
1301 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
1302 ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
1303 ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32)
1304 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1305 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1306 ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
1307 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
1308 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8)
1309 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
1310 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1312 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.2darraymsaa.i32.i16(i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1313 %out = bitcast i32 %v to float
1317 declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1318 declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1319 declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1320 declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1321 declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1322 declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1323 declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1324 declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1325 declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1326 declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1327 declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1328 declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1329 declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32, i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1330 declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1331 declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1332 declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1333 declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1334 declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1335 declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1336 declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1337 declare i32 @llvm.amdgcn.image.atomic.cmpswap.2d.i32.i16(i32, i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1338 declare i32 @llvm.amdgcn.image.atomic.cmpswap.3d.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1339 declare i32 @llvm.amdgcn.image.atomic.cmpswap.cube.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1340 declare i32 @llvm.amdgcn.image.atomic.cmpswap.1darray.i32.i16(i32, i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1341 declare i32 @llvm.amdgcn.image.atomic.cmpswap.2darray.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1342 declare i32 @llvm.amdgcn.image.atomic.cmpswap.2dmsaa.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1343 declare i32 @llvm.amdgcn.image.atomic.cmpswap.2darraymsaa.i32.i16(i32, i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1345 attributes #0 = { nounwind }