1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=gfx900 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9 %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx1030 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10 %s
4 ; RUN: llc -march=amdgcn -mcpu=gfx1100 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11 %s
5 ; RUN: llc -march=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize64 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10 %s
6 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11 %s
8 define amdgpu_ps float @mad_i32_vvv(i32 %a, i32 %b, i32 %c) {
9 ; GFX9-LABEL: mad_i32_vvv:
11 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, v1, v[2:3]
12 ; GFX9-NEXT: ; return to shader part epilog
14 ; GFX10-LABEL: mad_i32_vvv:
16 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[2:3]
17 ; GFX10-NEXT: ; return to shader part epilog
19 ; GFX11-LABEL: mad_i32_vvv:
21 ; GFX11-NEXT: v_mov_b32_e32 v3, v1
22 ; GFX11-NEXT: v_mov_b32_e32 v4, v0
23 ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v4, v3, v[2:3]
24 ; GFX11-NEXT: ; return to shader part epilog
26 %add = add i32 %mul, %c
27 %cast = bitcast i32 %add to float
31 define amdgpu_ps float @mad_i32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c) {
32 ; GCN-LABEL: mad_i32_sss:
34 ; GCN-NEXT: s_mul_i32 s0, s0, s1
35 ; GCN-NEXT: s_add_i32 s0, s0, s2
36 ; GCN-NEXT: v_mov_b32_e32 v0, s0
37 ; GCN-NEXT: ; return to shader part epilog
39 %add = add i32 %mul, %c
40 %cast = bitcast i32 %add to float
44 define amdgpu_ps float @mad_i32_vvc(i32 %a, i32 %b) {
45 ; GFX9-LABEL: mad_i32_vvc:
47 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, v1, 42
48 ; GFX9-NEXT: ; return to shader part epilog
50 ; GFX10-LABEL: mad_i32_vvc:
52 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, 42
53 ; GFX10-NEXT: ; return to shader part epilog
55 ; GFX11-LABEL: mad_i32_vvc:
57 ; GFX11-NEXT: v_mov_b32_e32 v2, v1
58 ; GFX11-NEXT: v_mov_b32_e32 v3, v0
59 ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, 42
60 ; GFX11-NEXT: ; return to shader part epilog
62 %add = add i32 %mul, 42
63 %cast = bitcast i32 %add to float
67 define amdgpu_ps float @mad_i32_vvi(i32 %a, i32 %b) {
68 ; GFX9-LABEL: mad_i32_vvi:
70 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x12d687
71 ; GFX9-NEXT: v_mov_b32_e32 v3, 0
72 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, v1, v[2:3]
73 ; GFX9-NEXT: ; return to shader part epilog
75 ; GFX10-LABEL: mad_i32_vvi:
77 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, 0x12d687
78 ; GFX10-NEXT: ; return to shader part epilog
80 ; GFX11-LABEL: mad_i32_vvi:
82 ; GFX11-NEXT: v_mov_b32_e32 v2, v1
83 ; GFX11-NEXT: v_mov_b32_e32 v3, v0
84 ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, 0x12d687
85 ; GFX11-NEXT: ; return to shader part epilog
87 %add = add i32 %mul, 1234567
88 %cast = bitcast i32 %add to float
92 define amdgpu_ps float @mad_i32_vcv(i32 %a, i32 %c) {
93 ; GFX9-LABEL: mad_i32_vcv:
95 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 42, v[1:2]
96 ; GFX9-NEXT: ; return to shader part epilog
98 ; GFX10-LABEL: mad_i32_vcv:
100 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 42, v[1:2]
101 ; GFX10-NEXT: ; return to shader part epilog
103 ; GFX11-LABEL: mad_i32_vcv:
105 ; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v0, 42, v[1:2]
106 ; GFX11-NEXT: v_mov_b32_e32 v0, v2
107 ; GFX11-NEXT: ; return to shader part epilog
108 %mul = mul i32 %a, 42
109 %add = add i32 %mul, %c
110 %cast = bitcast i32 %add to float
114 define amdgpu_ps float @mad_i32_vcc(i32 %a) {
115 ; GFX9-LABEL: mad_i32_vcc:
117 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 42, 43
118 ; GFX9-NEXT: ; return to shader part epilog
120 ; GFX10-LABEL: mad_i32_vcc:
122 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 42, 43
123 ; GFX10-NEXT: ; return to shader part epilog
125 ; GFX11-LABEL: mad_i32_vcc:
127 ; GFX11-NEXT: v_mov_b32_e32 v2, v0
128 ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v2, 42, 43
129 ; GFX11-NEXT: ; return to shader part epilog
130 %mul = mul i32 %a, 42
131 %add = add i32 %mul, 43
132 %cast = bitcast i32 %add to float
136 define amdgpu_ps float @mad_i32_vvs(i32 %a, i32 %b, i32 inreg %c) {
137 ; GFX9-LABEL: mad_i32_vvs:
139 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, v1, s[0:1]
140 ; GFX9-NEXT: ; return to shader part epilog
142 ; GFX10-LABEL: mad_i32_vvs:
144 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, s[0:1]
145 ; GFX10-NEXT: ; return to shader part epilog
147 ; GFX11-LABEL: mad_i32_vvs:
149 ; GFX11-NEXT: v_mov_b32_e32 v2, v1
150 ; GFX11-NEXT: v_mov_b32_e32 v3, v0
151 ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, s[0:1]
152 ; GFX11-NEXT: ; return to shader part epilog
153 %mul = mul i32 %a, %b
154 %add = add i32 %mul, %c
155 %cast = bitcast i32 %add to float
159 define amdgpu_ps float @mad_i32_vsv(i32 %a, i32 inreg %b, i32 %c) {
160 ; GFX9-LABEL: mad_i32_vsv:
162 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, s0, v[1:2]
163 ; GFX9-NEXT: ; return to shader part epilog
165 ; GFX10-LABEL: mad_i32_vsv:
167 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s0, v[1:2]
168 ; GFX10-NEXT: ; return to shader part epilog
170 ; GFX11-LABEL: mad_i32_vsv:
172 ; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v0, s0, v[1:2]
173 ; GFX11-NEXT: v_mov_b32_e32 v0, v2
174 ; GFX11-NEXT: ; return to shader part epilog
175 %mul = mul i32 %a, %b
176 %add = add i32 %mul, %c
177 %cast = bitcast i32 %add to float
181 define amdgpu_ps float @mad_i32_svv(i32 inreg %a, i32 %b, i32 %c) {
182 ; GFX9-LABEL: mad_i32_svv:
184 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s0, v0, v[1:2]
185 ; GFX9-NEXT: ; return to shader part epilog
187 ; GFX10-LABEL: mad_i32_svv:
189 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s0, v0, v[1:2]
190 ; GFX10-NEXT: ; return to shader part epilog
192 ; GFX11-LABEL: mad_i32_svv:
194 ; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, s0, v0, v[1:2]
195 ; GFX11-NEXT: v_mov_b32_e32 v0, v2
196 ; GFX11-NEXT: ; return to shader part epilog
197 %mul = mul i32 %a, %b
198 %add = add i32 %mul, %c
199 %cast = bitcast i32 %add to float
203 define amdgpu_ps float @mad_i32_vss(i32 %a, i32 inreg %b, i32 inreg %c) {
204 ; GFX9-LABEL: mad_i32_vss:
206 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, s0
207 ; GFX9-NEXT: v_add_u32_e32 v0, s1, v0
208 ; GFX9-NEXT: ; return to shader part epilog
210 ; GFX10-LABEL: mad_i32_vss:
212 ; GFX10-NEXT: s_mov_b32 s2, s1
213 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s0, s[2:3]
214 ; GFX10-NEXT: ; return to shader part epilog
216 ; GFX11-LABEL: mad_i32_vss:
218 ; GFX11-NEXT: v_mov_b32_e32 v2, v0
219 ; GFX11-NEXT: s_mov_b32 s2, s1
220 ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v2, s0, s[2:3]
221 ; GFX11-NEXT: ; return to shader part epilog
222 %mul = mul i32 %a, %b
223 %add = add i32 %mul, %c
224 %cast = bitcast i32 %add to float
228 define amdgpu_ps float @mad_i32_svs(i32 inreg %a, i32 %b, i32 inreg %c) {
229 ; GFX9-LABEL: mad_i32_svs:
231 ; GFX9-NEXT: v_mul_lo_u32 v0, s0, v0
232 ; GFX9-NEXT: v_add_u32_e32 v0, s1, v0
233 ; GFX9-NEXT: ; return to shader part epilog
235 ; GFX10-LABEL: mad_i32_svs:
237 ; GFX10-NEXT: s_mov_b32 s2, s1
238 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s0, v0, s[2:3]
239 ; GFX10-NEXT: ; return to shader part epilog
241 ; GFX11-LABEL: mad_i32_svs:
243 ; GFX11-NEXT: v_mov_b32_e32 v2, v0
244 ; GFX11-NEXT: s_mov_b32 s2, s1
245 ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, s0, v2, s[2:3]
246 ; GFX11-NEXT: ; return to shader part epilog
247 %mul = mul i32 %a, %b
248 %add = add i32 %mul, %c
249 %cast = bitcast i32 %add to float
253 define amdgpu_ps float @mad_i32_ssv(i32 inreg %a, i32 inreg %b, i32 %c) {
254 ; GFX9-LABEL: mad_i32_ssv:
256 ; GFX9-NEXT: s_mul_i32 s0, s0, s1
257 ; GFX9-NEXT: v_add_u32_e32 v0, s0, v0
258 ; GFX9-NEXT: ; return to shader part epilog
260 ; GFX10-LABEL: mad_i32_ssv:
262 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s0, s1, v[0:1]
263 ; GFX10-NEXT: ; return to shader part epilog
265 ; GFX11-LABEL: mad_i32_ssv:
267 ; GFX11-NEXT: v_mad_u64_u32 v[1:2], null, s0, s1, v[0:1]
268 ; GFX11-NEXT: v_mov_b32_e32 v0, v1
269 ; GFX11-NEXT: ; return to shader part epilog
270 %mul = mul i32 %a, %b
271 %add = add i32 %mul, %c
272 %cast = bitcast i32 %add to float
276 define amdgpu_ps float @mad_i32_vvv_multiuse(i32 %a, i32 %b, i32 %c) {
277 ; GFX9-LABEL: mad_i32_vvv_multiuse:
279 ; GFX9-NEXT: v_mul_lo_u32 v1, v0, v1
280 ; GFX9-NEXT: v_add_u32_e32 v0, v1, v2
281 ; GFX9-NEXT: flat_store_dword v[0:1], v1
282 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
283 ; GFX9-NEXT: ; return to shader part epilog
285 ; GFX10-LABEL: mad_i32_vvv_multiuse:
287 ; GFX10-NEXT: v_mul_lo_u32 v1, v0, v1
288 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v1, v2
289 ; GFX10-NEXT: flat_store_dword v[0:1], v1
290 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
291 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
292 ; GFX10-NEXT: ; return to shader part epilog
294 ; GFX11-LABEL: mad_i32_vvv_multiuse:
296 ; GFX11-NEXT: v_mul_lo_u32 v1, v0, v1
297 ; GFX11-NEXT: v_add_nc_u32_e32 v0, v1, v2
298 ; GFX11-NEXT: flat_store_b32 v[0:1], v1
299 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
300 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
301 ; GFX11-NEXT: ; return to shader part epilog
302 %mul = mul i32 %a, %b
303 %add = add i32 %mul, %c
304 store i32 %mul, i32* undef
305 %cast = bitcast i32 %add to float