1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=gfx900 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9 %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx1030 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10 %s
4 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 -amdgpu-enable-delay-alu=0 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11 %s
5 ; RUN: llc -march=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize64 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10 %s
6 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize64 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11 %s
8 define amdgpu_ps float @mad_i32_vvv(i32 %a, i32 %b, i32 %c) {
9 ; GFX9-LABEL: mad_i32_vvv:
11 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, v1, v[2:3]
12 ; GFX9-NEXT: ; return to shader part epilog
14 ; GFX10-LABEL: mad_i32_vvv:
16 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[2:3]
17 ; GFX10-NEXT: ; return to shader part epilog
19 ; GFX11-LABEL: mad_i32_vvv:
21 ; GFX11-NEXT: v_mov_b32_e32 v3, v1
22 ; GFX11-NEXT: v_mov_b32_e32 v4, v0
23 ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v4, v3, v[2:3]
24 ; GFX11-NEXT: ; return to shader part epilog
26 %add = add i32 %mul, %c
27 %cast = bitcast i32 %add to float
31 define amdgpu_ps float @mad_i32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c) {
32 ; GCN-LABEL: mad_i32_sss:
34 ; GCN-NEXT: s_mul_i32 s0, s0, s1
35 ; GCN-NEXT: s_add_i32 s0, s0, s2
36 ; GCN-NEXT: v_mov_b32_e32 v0, s0
37 ; GCN-NEXT: ; return to shader part epilog
39 %add = add i32 %mul, %c
40 %cast = bitcast i32 %add to float
44 define amdgpu_ps float @mad_i32_vvc(i32 %a, i32 %b) {
45 ; GFX9-LABEL: mad_i32_vvc:
47 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, v1, 42
48 ; GFX9-NEXT: ; return to shader part epilog
50 ; GFX10-LABEL: mad_i32_vvc:
52 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, 42
53 ; GFX10-NEXT: ; return to shader part epilog
55 ; GFX11-LABEL: mad_i32_vvc:
57 ; GFX11-NEXT: v_mov_b32_e32 v2, v1
58 ; GFX11-NEXT: v_mov_b32_e32 v3, v0
59 ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, 42
60 ; GFX11-NEXT: ; return to shader part epilog
62 %add = add i32 %mul, 42
63 %cast = bitcast i32 %add to float
67 define amdgpu_ps float @mad_i32_vvi(i32 %a, i32 %b) {
68 ; GFX9-LABEL: mad_i32_vvi:
70 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x12d687
71 ; GFX9-NEXT: v_mov_b32_e32 v3, 0
72 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, v1, v[2:3]
73 ; GFX9-NEXT: ; return to shader part epilog
75 ; GFX10-LABEL: mad_i32_vvi:
77 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, 0x12d687
78 ; GFX10-NEXT: ; return to shader part epilog
80 ; GFX11-LABEL: mad_i32_vvi:
82 ; GFX11-NEXT: v_mov_b32_e32 v2, v1
83 ; GFX11-NEXT: v_mov_b32_e32 v3, v0
84 ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, 0x12d687
85 ; GFX11-NEXT: ; return to shader part epilog
87 %add = add i32 %mul, 1234567
88 %cast = bitcast i32 %add to float
92 define amdgpu_ps float @mad_i32_vvi_neg(i32 %a, i32 %b) {
93 ; GFX9-LABEL: mad_i32_vvi_neg:
95 ; GFX9-NEXT: v_mov_b32_e32 v2, 0xffed2979
96 ; GFX9-NEXT: v_mov_b32_e32 v3, -1
97 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, v1, v[2:3]
98 ; GFX9-NEXT: ; return to shader part epilog
100 ; GFX10-LABEL: mad_i32_vvi_neg:
102 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, 0xffffffffffed2979
103 ; GFX10-NEXT: ; return to shader part epilog
105 ; GFX11-LABEL: mad_i32_vvi_neg:
107 ; GFX11-NEXT: v_mov_b32_e32 v2, v1
108 ; GFX11-NEXT: v_mov_b32_e32 v3, v0
109 ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, 0xffffffffffed2979
110 ; GFX11-NEXT: ; return to shader part epilog
111 %mul = mul i32 %a, %b
112 %add = add i32 %mul, -1234567
113 %cast = bitcast i32 %add to float
117 define amdgpu_ps float @mad_i32_vcv(i32 %a, i32 %c) {
118 ; GFX9-LABEL: mad_i32_vcv:
120 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 42, v[1:2]
121 ; GFX9-NEXT: ; return to shader part epilog
123 ; GFX10-LABEL: mad_i32_vcv:
125 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 42, v[1:2]
126 ; GFX10-NEXT: ; return to shader part epilog
128 ; GFX11-LABEL: mad_i32_vcv:
130 ; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v0, 42, v[1:2]
131 ; GFX11-NEXT: v_mov_b32_e32 v0, v2
132 ; GFX11-NEXT: ; return to shader part epilog
133 %mul = mul i32 %a, 42
134 %add = add i32 %mul, %c
135 %cast = bitcast i32 %add to float
139 define amdgpu_ps float @mad_i32_vcc(i32 %a) {
140 ; GFX9-LABEL: mad_i32_vcc:
142 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 42, 43
143 ; GFX9-NEXT: ; return to shader part epilog
145 ; GFX10-LABEL: mad_i32_vcc:
147 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 42, 43
148 ; GFX10-NEXT: ; return to shader part epilog
150 ; GFX11-LABEL: mad_i32_vcc:
152 ; GFX11-NEXT: v_mov_b32_e32 v2, v0
153 ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v2, 42, 43
154 ; GFX11-NEXT: ; return to shader part epilog
155 %mul = mul i32 %a, 42
156 %add = add i32 %mul, 43
157 %cast = bitcast i32 %add to float
161 define amdgpu_ps float @mad_i32_vvs(i32 %a, i32 %b, i32 inreg %c) {
162 ; GFX9-LABEL: mad_i32_vvs:
164 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, v1, s[0:1]
165 ; GFX9-NEXT: ; return to shader part epilog
167 ; GFX10-LABEL: mad_i32_vvs:
169 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, s[0:1]
170 ; GFX10-NEXT: ; return to shader part epilog
172 ; GFX11-LABEL: mad_i32_vvs:
174 ; GFX11-NEXT: v_mov_b32_e32 v2, v1
175 ; GFX11-NEXT: v_mov_b32_e32 v3, v0
176 ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, s[0:1]
177 ; GFX11-NEXT: ; return to shader part epilog
178 %mul = mul i32 %a, %b
179 %add = add i32 %mul, %c
180 %cast = bitcast i32 %add to float
184 define amdgpu_ps float @mad_i32_vsv(i32 %a, i32 inreg %b, i32 %c) {
185 ; GFX9-LABEL: mad_i32_vsv:
187 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, s0, v[1:2]
188 ; GFX9-NEXT: ; return to shader part epilog
190 ; GFX10-LABEL: mad_i32_vsv:
192 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s0, v[1:2]
193 ; GFX10-NEXT: ; return to shader part epilog
195 ; GFX11-LABEL: mad_i32_vsv:
197 ; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v0, s0, v[1:2]
198 ; GFX11-NEXT: v_mov_b32_e32 v0, v2
199 ; GFX11-NEXT: ; return to shader part epilog
200 %mul = mul i32 %a, %b
201 %add = add i32 %mul, %c
202 %cast = bitcast i32 %add to float
206 define amdgpu_ps float @mad_i32_svv(i32 inreg %a, i32 %b, i32 %c) {
207 ; GFX9-LABEL: mad_i32_svv:
209 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s0, v0, v[1:2]
210 ; GFX9-NEXT: ; return to shader part epilog
212 ; GFX10-LABEL: mad_i32_svv:
214 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s0, v0, v[1:2]
215 ; GFX10-NEXT: ; return to shader part epilog
217 ; GFX11-LABEL: mad_i32_svv:
219 ; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, s0, v0, v[1:2]
220 ; GFX11-NEXT: v_mov_b32_e32 v0, v2
221 ; GFX11-NEXT: ; return to shader part epilog
222 %mul = mul i32 %a, %b
223 %add = add i32 %mul, %c
224 %cast = bitcast i32 %add to float
228 define amdgpu_ps float @mad_i32_vss(i32 %a, i32 inreg %b, i32 inreg %c) {
229 ; GFX9-LABEL: mad_i32_vss:
231 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, s0
232 ; GFX9-NEXT: v_add_u32_e32 v0, s1, v0
233 ; GFX9-NEXT: ; return to shader part epilog
235 ; GFX10-LABEL: mad_i32_vss:
237 ; GFX10-NEXT: s_mov_b32 s2, s1
238 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s0, s[2:3]
239 ; GFX10-NEXT: ; return to shader part epilog
241 ; GFX11-LABEL: mad_i32_vss:
243 ; GFX11-NEXT: v_mov_b32_e32 v2, v0
244 ; GFX11-NEXT: s_mov_b32 s2, s1
245 ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v2, s0, s[2:3]
246 ; GFX11-NEXT: ; return to shader part epilog
247 %mul = mul i32 %a, %b
248 %add = add i32 %mul, %c
249 %cast = bitcast i32 %add to float
253 define amdgpu_ps float @mad_i32_svs(i32 inreg %a, i32 %b, i32 inreg %c) {
254 ; GFX9-LABEL: mad_i32_svs:
256 ; GFX9-NEXT: v_mul_lo_u32 v0, s0, v0
257 ; GFX9-NEXT: v_add_u32_e32 v0, s1, v0
258 ; GFX9-NEXT: ; return to shader part epilog
260 ; GFX10-LABEL: mad_i32_svs:
262 ; GFX10-NEXT: s_mov_b32 s2, s1
263 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s0, v0, s[2:3]
264 ; GFX10-NEXT: ; return to shader part epilog
266 ; GFX11-LABEL: mad_i32_svs:
268 ; GFX11-NEXT: v_mov_b32_e32 v2, v0
269 ; GFX11-NEXT: s_mov_b32 s2, s1
270 ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, s0, v2, s[2:3]
271 ; GFX11-NEXT: ; return to shader part epilog
272 %mul = mul i32 %a, %b
273 %add = add i32 %mul, %c
274 %cast = bitcast i32 %add to float
278 define amdgpu_ps float @mad_i32_ssv(i32 inreg %a, i32 inreg %b, i32 %c) {
279 ; GFX9-LABEL: mad_i32_ssv:
281 ; GFX9-NEXT: s_mul_i32 s0, s0, s1
282 ; GFX9-NEXT: v_add_u32_e32 v0, s0, v0
283 ; GFX9-NEXT: ; return to shader part epilog
285 ; GFX10-LABEL: mad_i32_ssv:
287 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s0, s1, v[0:1]
288 ; GFX10-NEXT: ; return to shader part epilog
290 ; GFX11-LABEL: mad_i32_ssv:
292 ; GFX11-NEXT: v_mad_u64_u32 v[1:2], null, s0, s1, v[0:1]
293 ; GFX11-NEXT: v_mov_b32_e32 v0, v1
294 ; GFX11-NEXT: ; return to shader part epilog
295 %mul = mul i32 %a, %b
296 %add = add i32 %mul, %c
297 %cast = bitcast i32 %add to float
301 define amdgpu_ps float @mad_i32_vvv_multiuse(i32 %a, i32 %b, i32 %c) {
302 ; GFX9-LABEL: mad_i32_vvv_multiuse:
304 ; GFX9-NEXT: v_mul_lo_u32 v1, v0, v1
305 ; GFX9-NEXT: v_add_u32_e32 v0, v1, v2
306 ; GFX9-NEXT: flat_store_dword v[0:1], v1
307 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
308 ; GFX9-NEXT: ; return to shader part epilog
310 ; GFX10-LABEL: mad_i32_vvv_multiuse:
312 ; GFX10-NEXT: v_mul_lo_u32 v1, v0, v1
313 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v1, v2
314 ; GFX10-NEXT: flat_store_dword v[0:1], v1
315 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
316 ; GFX10-NEXT: ; return to shader part epilog
318 ; GFX11-LABEL: mad_i32_vvv_multiuse:
320 ; GFX11-NEXT: v_mul_lo_u32 v1, v0, v1
321 ; GFX11-NEXT: v_add_nc_u32_e32 v0, v1, v2
322 ; GFX11-NEXT: flat_store_b32 v[0:1], v1
323 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
324 ; GFX11-NEXT: ; return to shader part epilog
325 %mul = mul i32 %a, %b
326 %add = add i32 %mul, %c
327 store i32 %mul, ptr undef
328 %cast = bitcast i32 %add to float