1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10PLUS,GFX10 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10PLUS,GFX11 %s
7 define amdgpu_ps float @ds_fadd_f32_ss(ptr addrspace(3) inreg %ptr, float inreg %val) {
8 ; GFX8-LABEL: ds_fadd_f32_ss:
10 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
11 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
12 ; GFX8-NEXT: s_mov_b32 m0, -1
13 ; GFX8-NEXT: ds_add_rtn_f32 v0, v0, v1
14 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
15 ; GFX8-NEXT: ; return to shader part epilog
17 ; GFX9-LABEL: ds_fadd_f32_ss:
19 ; GFX9-NEXT: v_mov_b32_e32 v0, s2
20 ; GFX9-NEXT: v_mov_b32_e32 v1, s3
21 ; GFX9-NEXT: ds_add_rtn_f32 v0, v0, v1
22 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
23 ; GFX9-NEXT: ; return to shader part epilog
25 ; GFX10-LABEL: ds_fadd_f32_ss:
27 ; GFX10-NEXT: v_mov_b32_e32 v0, s2
28 ; GFX10-NEXT: v_mov_b32_e32 v1, s3
29 ; GFX10-NEXT: ds_add_rtn_f32 v0, v0, v1
30 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
31 ; GFX10-NEXT: ; return to shader part epilog
33 ; GFX11-LABEL: ds_fadd_f32_ss:
35 ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
36 ; GFX11-NEXT: ds_add_rtn_f32 v0, v0, v1
37 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
38 ; GFX11-NEXT: ; return to shader part epilog
39 %ret = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
43 define amdgpu_ps float @ds_fadd_f32_ss_offset(ptr addrspace(3) inreg %ptr, float inreg %val) {
44 ; GFX8-LABEL: ds_fadd_f32_ss_offset:
46 ; GFX8-NEXT: v_mov_b32_e32 v0, s3
47 ; GFX8-NEXT: v_mov_b32_e32 v1, s2
48 ; GFX8-NEXT: s_mov_b32 m0, -1
49 ; GFX8-NEXT: ds_add_rtn_f32 v0, v1, v0 offset:512
50 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
51 ; GFX8-NEXT: ; return to shader part epilog
53 ; GFX9-LABEL: ds_fadd_f32_ss_offset:
55 ; GFX9-NEXT: v_mov_b32_e32 v0, s3
56 ; GFX9-NEXT: v_mov_b32_e32 v1, s2
57 ; GFX9-NEXT: ds_add_rtn_f32 v0, v1, v0 offset:512
58 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
59 ; GFX9-NEXT: ; return to shader part epilog
61 ; GFX10-LABEL: ds_fadd_f32_ss_offset:
63 ; GFX10-NEXT: v_mov_b32_e32 v0, s3
64 ; GFX10-NEXT: v_mov_b32_e32 v1, s2
65 ; GFX10-NEXT: ds_add_rtn_f32 v0, v1, v0 offset:512
66 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
67 ; GFX10-NEXT: ; return to shader part epilog
69 ; GFX11-LABEL: ds_fadd_f32_ss_offset:
71 ; GFX11-NEXT: v_dual_mov_b32 v0, s3 :: v_dual_mov_b32 v1, s2
72 ; GFX11-NEXT: ds_add_rtn_f32 v0, v1, v0 offset:512
73 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
74 ; GFX11-NEXT: ; return to shader part epilog
75 %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128
76 %ret = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false)
80 define amdgpu_ps void @ds_fadd_f32_ss_nortn(ptr addrspace(3) inreg %ptr, float inreg %val) {
81 ; GFX8-LABEL: ds_fadd_f32_ss_nortn:
83 ; GFX8-NEXT: v_mov_b32_e32 v0, s2
84 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
85 ; GFX8-NEXT: s_mov_b32 m0, -1
86 ; GFX8-NEXT: ds_add_f32 v0, v1
89 ; GFX9-LABEL: ds_fadd_f32_ss_nortn:
91 ; GFX9-NEXT: v_mov_b32_e32 v0, s2
92 ; GFX9-NEXT: v_mov_b32_e32 v1, s3
93 ; GFX9-NEXT: ds_add_f32 v0, v1
96 ; GFX10-LABEL: ds_fadd_f32_ss_nortn:
98 ; GFX10-NEXT: v_mov_b32_e32 v0, s2
99 ; GFX10-NEXT: v_mov_b32_e32 v1, s3
100 ; GFX10-NEXT: ds_add_f32 v0, v1
101 ; GFX10-NEXT: s_endpgm
103 ; GFX11-LABEL: ds_fadd_f32_ss_nortn:
105 ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
106 ; GFX11-NEXT: ds_add_f32 v0, v1
107 ; GFX11-NEXT: s_endpgm
108 %unused = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
112 define amdgpu_ps void @ds_fadd_f32_ss_offset_nortn(ptr addrspace(3) inreg %ptr, float inreg %val) {
113 ; GFX8-LABEL: ds_fadd_f32_ss_offset_nortn:
115 ; GFX8-NEXT: v_mov_b32_e32 v0, s3
116 ; GFX8-NEXT: v_mov_b32_e32 v1, s2
117 ; GFX8-NEXT: s_mov_b32 m0, -1
118 ; GFX8-NEXT: ds_add_f32 v1, v0 offset:512
119 ; GFX8-NEXT: s_endpgm
121 ; GFX9-LABEL: ds_fadd_f32_ss_offset_nortn:
123 ; GFX9-NEXT: v_mov_b32_e32 v0, s3
124 ; GFX9-NEXT: v_mov_b32_e32 v1, s2
125 ; GFX9-NEXT: ds_add_f32 v1, v0 offset:512
126 ; GFX9-NEXT: s_endpgm
128 ; GFX10-LABEL: ds_fadd_f32_ss_offset_nortn:
130 ; GFX10-NEXT: v_mov_b32_e32 v0, s3
131 ; GFX10-NEXT: v_mov_b32_e32 v1, s2
132 ; GFX10-NEXT: ds_add_f32 v1, v0 offset:512
133 ; GFX10-NEXT: s_endpgm
135 ; GFX11-LABEL: ds_fadd_f32_ss_offset_nortn:
137 ; GFX11-NEXT: v_dual_mov_b32 v0, s3 :: v_dual_mov_b32 v1, s2
138 ; GFX11-NEXT: ds_add_f32 v1, v0 offset:512
139 ; GFX11-NEXT: s_endpgm
140 %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128
141 %unused = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false)
145 define float @ds_fadd_f32_vv(ptr addrspace(3) %ptr, float %val) {
146 ; GFX8-LABEL: ds_fadd_f32_vv:
148 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
149 ; GFX8-NEXT: s_mov_b32 m0, -1
150 ; GFX8-NEXT: ds_add_rtn_f32 v0, v0, v1
151 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
152 ; GFX8-NEXT: s_setpc_b64 s[30:31]
154 ; GFX9-LABEL: ds_fadd_f32_vv:
156 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157 ; GFX9-NEXT: ds_add_rtn_f32 v0, v0, v1
158 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
159 ; GFX9-NEXT: s_setpc_b64 s[30:31]
161 ; GFX10PLUS-LABEL: ds_fadd_f32_vv:
162 ; GFX10PLUS: ; %bb.0:
163 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164 ; GFX10PLUS-NEXT: ds_add_rtn_f32 v0, v0, v1
165 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0)
166 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
167 %ret = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
171 define float @ds_fadd_f32_vv_offset(ptr addrspace(3) %ptr, float %val) {
172 ; GFX8-LABEL: ds_fadd_f32_vv_offset:
174 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
175 ; GFX8-NEXT: s_mov_b32 m0, -1
176 ; GFX8-NEXT: ds_add_rtn_f32 v0, v0, v1 offset:512
177 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
178 ; GFX8-NEXT: s_setpc_b64 s[30:31]
180 ; GFX9-LABEL: ds_fadd_f32_vv_offset:
182 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
183 ; GFX9-NEXT: ds_add_rtn_f32 v0, v0, v1 offset:512
184 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
185 ; GFX9-NEXT: s_setpc_b64 s[30:31]
187 ; GFX10PLUS-LABEL: ds_fadd_f32_vv_offset:
188 ; GFX10PLUS: ; %bb.0:
189 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190 ; GFX10PLUS-NEXT: ds_add_rtn_f32 v0, v0, v1 offset:512
191 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0)
192 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
193 %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128
194 %ret = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false)
198 define void @ds_fadd_f32_vv_nortn(ptr addrspace(3) %ptr, float %val) {
199 ; GFX8-LABEL: ds_fadd_f32_vv_nortn:
201 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
202 ; GFX8-NEXT: s_mov_b32 m0, -1
203 ; GFX8-NEXT: ds_add_f32 v0, v1
204 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
205 ; GFX8-NEXT: s_setpc_b64 s[30:31]
207 ; GFX9-LABEL: ds_fadd_f32_vv_nortn:
209 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210 ; GFX9-NEXT: ds_add_f32 v0, v1
211 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
212 ; GFX9-NEXT: s_setpc_b64 s[30:31]
214 ; GFX10PLUS-LABEL: ds_fadd_f32_vv_nortn:
215 ; GFX10PLUS: ; %bb.0:
216 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217 ; GFX10PLUS-NEXT: ds_add_f32 v0, v1
218 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0)
219 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
220 %ret = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
224 define void @ds_fadd_f32_vv_offset_nortn(ptr addrspace(3) %ptr, float %val) {
225 ; GFX8-LABEL: ds_fadd_f32_vv_offset_nortn:
227 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
228 ; GFX8-NEXT: s_mov_b32 m0, -1
229 ; GFX8-NEXT: ds_add_f32 v0, v1 offset:512
230 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
231 ; GFX8-NEXT: s_setpc_b64 s[30:31]
233 ; GFX9-LABEL: ds_fadd_f32_vv_offset_nortn:
235 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
236 ; GFX9-NEXT: ds_add_f32 v0, v1 offset:512
237 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
238 ; GFX9-NEXT: s_setpc_b64 s[30:31]
240 ; GFX10PLUS-LABEL: ds_fadd_f32_vv_offset_nortn:
241 ; GFX10PLUS: ; %bb.0:
242 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
243 ; GFX10PLUS-NEXT: ds_add_f32 v0, v1 offset:512
244 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0)
245 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
246 %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128
247 %ret = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false)
251 define float @ds_fadd_f32_vv_volatile(ptr addrspace(3) %ptr, float %val) {
252 ; GFX8-LABEL: ds_fadd_f32_vv_volatile:
254 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
255 ; GFX8-NEXT: s_mov_b32 m0, -1
256 ; GFX8-NEXT: ds_add_rtn_f32 v0, v0, v1
257 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
258 ; GFX8-NEXT: s_setpc_b64 s[30:31]
260 ; GFX9-LABEL: ds_fadd_f32_vv_volatile:
262 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
263 ; GFX9-NEXT: ds_add_rtn_f32 v0, v0, v1
264 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
265 ; GFX9-NEXT: s_setpc_b64 s[30:31]
267 ; GFX10PLUS-LABEL: ds_fadd_f32_vv_volatile:
268 ; GFX10PLUS: ; %bb.0:
269 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
270 ; GFX10PLUS-NEXT: ds_add_rtn_f32 v0, v0, v1
271 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0)
272 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
273 %ret = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 true)
277 declare float @llvm.amdgcn.ds.fadd(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0
279 attributes #0 = { argmemonly nounwind willreturn }