Bump version to 19.1.0-rc3
[llvm-project.git] / llvm / test / CodeGen / AMDGPU / code-size-estimate.ll
blobac03d2dae8fa87742aef37fe45c7f7a142d72288
1 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX9,NOT-GFX12 %s
2 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX10,NOT-GFX12 %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX11,GFX1100,NOT-GFX12 %s
4 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX11,GFX1150,NOT-GFX12 %s
5 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX1200 %s
7 declare float @llvm.fabs.f32(float)
8 declare float @llvm.fma.f32(float, float, float)
10 define float @v_mul_f32_vop2(float %x, float %y) {
11 ; GFX9-LABEL: v_mul_f32_vop2:
12 ; GFX9:       ; %bb.0:
13 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
14 ; GFX9-NEXT:    v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x0a]
15 ; GFX9-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
17 ; GFX10-LABEL: v_mul_f32_vop2:
18 ; GFX10:       ; %bb.0:
19 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
20 ; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10]
21 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
23 ; GFX11-LABEL: v_mul_f32_vop2:
24 ; GFX11:       ; %bb.0:
25 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
26 ; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10]
27 ; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
29 ; GFX1200-LABEL: v_mul_f32_vop2:
30 ; GFX1200:       ; %bb.0:
31 ; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
32 ; GFX1200-NEXT:    s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
33 ; GFX1200-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
34 ; GFX1200-NEXT:    s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
35 ; GFX1200-NEXT:    s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
36 ; GFX1200-NEXT:    v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10]
37 ; GFX1200-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
38   %mul = fmul float %x, %y
39   ret float %mul
41 ; NOT-GFX12: codeLenInByte = 12
42 ; GFX1200: codeLenInByte = 28
44 define float @v_mul_f32_vop2_inline_imm(float %x) {
45 ; GFX9-LABEL: v_mul_f32_vop2_inline_imm:
46 ; GFX9:       ; %bb.0:
47 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
48 ; GFX9-NEXT:    v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x0a]
49 ; GFX9-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
51 ; GFX10-LABEL: v_mul_f32_vop2_inline_imm:
52 ; GFX10:       ; %bb.0:
53 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
54 ; GFX10-NEXT:    v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10]
55 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
57 ; GFX11-LABEL: v_mul_f32_vop2_inline_imm:
58 ; GFX11:       ; %bb.0:
59 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
60 ; GFX11-NEXT:    v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10]
61 ; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
63 ; GFX1200-LABEL: v_mul_f32_vop2_inline_imm:
64 ; GFX1200:       ; %bb.0:
65 ; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
66 ; GFX1200-NEXT:    s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
67 ; GFX1200-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
68 ; GFX1200-NEXT:    s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
69 ; GFX1200-NEXT:    s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
70 ; GFX1200-NEXT:    v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10]
71 ; GFX1200-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
72   %mul = fmul float %x, 4.0
73   ret float %mul
75 ; NOT-GFX12: codeLenInByte = 12
76 ; GFX1200: codeLenInByte = 28
78 define float @v_mul_f32_vop2_literal(float %x) {
79 ; GFX9-LABEL: v_mul_f32_vop2_literal:
80 ; GFX9:       ; %bb.0:
81 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
82 ; GFX9-NEXT:    v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x0a,0x00,0x00,0xf6,0x42]
83 ; GFX9-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
85 ; GFX10-LABEL: v_mul_f32_vop2_literal:
86 ; GFX10:       ; %bb.0:
87 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
88 ; GFX10-NEXT:    v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42]
89 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
91 ; GFX11-LABEL: v_mul_f32_vop2_literal:
92 ; GFX11:       ; %bb.0:
93 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
94 ; GFX11-NEXT:    v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42]
95 ; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
97 ; GFX1200-LABEL: v_mul_f32_vop2_literal:
98 ; GFX1200:       ; %bb.0:
99 ; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
100 ; GFX1200-NEXT:    s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
101 ; GFX1200-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
102 ; GFX1200-NEXT:    s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
103 ; GFX1200-NEXT:    s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
104 ; GFX1200-NEXT:    v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42]
105 ; GFX1200-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
106   %mul = fmul float %x, 123.0
107   ret float %mul
109 ; NOT-GFX12: codeLenInByte = 16
110 ; GFX1200: codeLenInByte = 32
112 define float @v_mul_f32_vop3_src_mods(float %x, float %y) {
113 ; GFX9-LABEL: v_mul_f32_vop3_src_mods:
114 ; GFX9:       ; %bb.0:
115 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
116 ; GFX9-NEXT:    v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x05,0xd1,0x00,0x03,0x02,0x00]
117 ; GFX9-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
119 ; GFX10-LABEL: v_mul_f32_vop3_src_mods:
120 ; GFX10:       ; %bb.0:
121 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
122 ; GFX10-NEXT:    v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00]
123 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
125 ; GFX11-LABEL: v_mul_f32_vop3_src_mods:
126 ; GFX11:       ; %bb.0:
127 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
128 ; GFX11-NEXT:    v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00]
129 ; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
131 ; GFX1200-LABEL: v_mul_f32_vop3_src_mods:
132 ; GFX1200:       ; %bb.0:
133 ; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
134 ; GFX1200-NEXT:    s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
135 ; GFX1200-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
136 ; GFX1200-NEXT:    s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
137 ; GFX1200-NEXT:    s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
138 ; GFX1200-NEXT:    v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00]
139 ; GFX1200-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
140   %fabs.x = call float @llvm.fabs.f32(float %x)
141   %mul = fmul float %fabs.x, %y
142   ret float %mul
144 ; NOT-GFX12: codeLenInByte = 16
145 ; GFX1200: codeLenInByte = 32
147 define float @v_mul_f32_vop3_src_mods_inline_imm(float %x, float %y) {
148 ; GFX9-LABEL: v_mul_f32_vop3_src_mods_inline_imm:
149 ; GFX9:       ; %bb.0:
150 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
151 ; GFX9-NEXT:    v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x05,0xd1,0x00,0xed,0x01,0x00]
152 ; GFX9-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
154 ; GFX10-LABEL: v_mul_f32_vop3_src_mods_inline_imm:
155 ; GFX10:       ; %bb.0:
156 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
157 ; GFX10-NEXT:    v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00]
158 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
160 ; GFX11-LABEL: v_mul_f32_vop3_src_mods_inline_imm:
161 ; GFX11:       ; %bb.0:
162 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
163 ; GFX11-NEXT:    v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00]
164 ; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
166 ; GFX1200-LABEL: v_mul_f32_vop3_src_mods_inline_imm:
167 ; GFX1200:       ; %bb.0:
168 ; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
169 ; GFX1200-NEXT:    s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
170 ; GFX1200-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
171 ; GFX1200-NEXT:    s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
172 ; GFX1200-NEXT:    s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
173 ; GFX1200-NEXT:    v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00]
174 ; GFX1200-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
175   %fabs.x = call float @llvm.fabs.f32(float %x)
176   %mul = fmul float %fabs.x, 4.0
177   ret float %mul
180 ; NOT-GFX12: codeLenInByte = 16
181 ; GFX1200: codeLenInByte = 32
183 define float @v_mul_f32_vop3_src_mods_literal(float %x, float %y) {
184 ; GFX9-LABEL: v_mul_f32_vop3_src_mods_literal:
185 ; GFX9:       ; %bb.0:
186 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
187 ; GFX9-NEXT:    s_mov_b32 s4, 0x42f60000 ; encoding: [0xff,0x00,0x84,0xbe,0x00,0x00,0xf6,0x42]
188 ; GFX9-NEXT:    v_mul_f32_e64 v0, |v0|, s4 ; encoding: [0x00,0x01,0x05,0xd1,0x00,0x09,0x00,0x00]
189 ; GFX9-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
191 ; GFX10-LABEL: v_mul_f32_vop3_src_mods_literal:
192 ; GFX10:       ; %bb.0:
193 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
194 ; GFX10-NEXT:    v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42]
195 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
197 ; GFX11-LABEL: v_mul_f32_vop3_src_mods_literal:
198 ; GFX11:       ; %bb.0:
199 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
200 ; GFX11-NEXT:    v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42]
201 ; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
203 ; GFX1200-LABEL: v_mul_f32_vop3_src_mods_literal:
204 ; GFX1200:       ; %bb.0:
205 ; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
206 ; GFX1200-NEXT:    s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
207 ; GFX1200-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
208 ; GFX1200-NEXT:    s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
209 ; GFX1200-NEXT:    s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
210 ; GFX1200-NEXT:    v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42]
211 ; GFX1200-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
212   %fabs.x = call float @llvm.fabs.f32(float %x)
213   %mul = fmul float %fabs.x, 123.0
214   ret float %mul
217 ; GFX9: codeLenInByte = 24
218 ; GFX10: codeLenInByte = 20
219 ; GFX11: codeLenInByte = 20
220 ; GFX1200: codeLenInByte = 36
222 define float @v_mul_f32_vop2_frame_index(float %x) {
223 ; GFX9-LABEL: v_mul_f32_vop2_frame_index:
224 ; GFX9:       ; %bb.0:
225 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
226 ; GFX9-NEXT:    v_lshrrev_b32_e64 v1, 6, s32 ; encoding: [0x01,0x00,0x10,0xd1,0x86,0x40,0x00,0x00]
227 ; GFX9-NEXT:    v_mul_f32_e32 v0, v1, v0 ; encoding: [0x01,0x01,0x00,0x0a]
228 ; GFX9-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
230 ; GFX10-LABEL: v_mul_f32_vop2_frame_index:
231 ; GFX10:       ; %bb.0:
232 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
233 ; GFX10-NEXT:    v_lshrrev_b32_e64 v1, 5, s32 ; encoding: [0x01,0x00,0x16,0xd5,0x85,0x40,0x00,0x00]
234 ; GFX10-NEXT:    v_mul_f32_e32 v0, v1, v0 ; encoding: [0x01,0x01,0x00,0x10]
235 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
237 ; GFX11-LABEL: v_mul_f32_vop2_frame_index:
238 ; GFX11:       ; %bb.0:
239 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
240 ; GFX11-NEXT:    v_mul_f32_e32 v0, s32, v0 ; encoding: [0x20,0x00,0x00,0x10]
241 ; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
243 ; GFX1200-LABEL: v_mul_f32_vop2_frame_index:
244 ; GFX1200:       ; %bb.0:
245 ; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
246 ; GFX1200-NEXT:    s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
247 ; GFX1200-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
248 ; GFX1200-NEXT:    s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
249 ; GFX1200-NEXT:    s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
250 ; GFX1200-NEXT:    v_mul_f32_e32 v0, s32, v0 ; encoding: [0x20,0x00,0x00,0x10]
251 ; GFX1200-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
252   %alloca = alloca i32, addrspace(5)
253   %ptrtoint = ptrtoint ptr addrspace(5) %alloca to i32
254   %cast = bitcast i32 %ptrtoint to float
255   %mul = fmul float %x, %cast
256   ret float %mul
259 ; GFX9: codeLenInByte = 20
260 ; GFX10: codeLenInByte = 20
261 ; GFX11: codeLenInByte = 12
262 ; GFX1200: codeLenInByte = 28
264 define float @v_fma_f32(float %x, float %y, float %z) {
265 ; GFX9-LABEL: v_fma_f32:
266 ; GFX9:       ; %bb.0:
267 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
268 ; GFX9-NEXT:    v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0xcb,0xd1,0x00,0x03,0x0a,0x04]
269 ; GFX9-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
271 ; GFX10-LABEL: v_fma_f32:
272 ; GFX10:       ; %bb.0:
273 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
274 ; GFX10-NEXT:    v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0x4b,0xd5,0x00,0x03,0x0a,0x04]
275 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
277 ; GFX11-LABEL: v_fma_f32:
278 ; GFX11:       ; %bb.0:
279 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
280 ; GFX11-NEXT:    v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0x13,0xd6,0x00,0x03,0x0a,0x04]
281 ; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
283 ; GFX1200-LABEL: v_fma_f32:
284 ; GFX1200:       ; %bb.0:
285 ; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
286 ; GFX1200-NEXT:    s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
287 ; GFX1200-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
288 ; GFX1200-NEXT:    s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
289 ; GFX1200-NEXT:    s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
290 ; GFX1200-NEXT:    v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0x13,0xd6,0x00,0x03,0x0a,0x04]
291 ; GFX1200-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
292   %fma = call float @llvm.fma.f32(float %x, float %y, float %z)
293   ret float %fma
296 ; NOT-GFX12: codeLenInByte = 16
297 ; GFX1200: codeLenInByte = 32
299 define float @v_fma_f32_src_mods(float %x, float %y, float %z) {
300 ; GFX9-LABEL: v_fma_f32_src_mods:
301 ; GFX9:       ; %bb.0:
302 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
303 ; GFX9-NEXT:    v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0xcb,0xd1,0x00,0x03,0x0a,0x04]
304 ; GFX9-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
306 ; GFX10-LABEL: v_fma_f32_src_mods:
307 ; GFX10:       ; %bb.0:
308 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
309 ; GFX10-NEXT:    v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0x4b,0xd5,0x00,0x03,0x0a,0x04]
310 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
312 ; GFX11-LABEL: v_fma_f32_src_mods:
313 ; GFX11:       ; %bb.0:
314 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
315 ; GFX11-NEXT:    v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0x0a,0x04]
316 ; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
318 ; GFX1200-LABEL: v_fma_f32_src_mods:
319 ; GFX1200:       ; %bb.0:
320 ; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
321 ; GFX1200-NEXT:    s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
322 ; GFX1200-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
323 ; GFX1200-NEXT:    s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
324 ; GFX1200-NEXT:    s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
325 ; GFX1200-NEXT:    v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0x0a,0x04]
326 ; GFX1200-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
327   %fabs.x = call float @llvm.fabs.f32(float %x)
328   %fma = call float @llvm.fma.f32(float %fabs.x, float %y, float %z)
329   ret float %fma
332 ; NOT-GFX12: codeLenInByte = 16
333 ; GFX1200: codeLenInByte = 32
335 define float @v_fmac_f32(float %x, float %y) {
336 ; GFX9-LABEL: v_fmac_f32:
337 ; GFX9:       ; %bb.0:
338 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
339 ; GFX9-NEXT:    v_fma_f32 v0, v0, v1, v0 ; encoding: [0x00,0x00,0xcb,0xd1,0x00,0x03,0x02,0x04]
340 ; GFX9-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
342 ; GFX10-LABEL: v_fmac_f32:
343 ; GFX10:       ; %bb.0:
344 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
345 ; GFX10-NEXT:    v_fmac_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x56]
346 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
348 ; GFX11-LABEL: v_fmac_f32:
349 ; GFX11:       ; %bb.0:
350 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
351 ; GFX11-NEXT:    v_fmac_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x56]
352 ; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
354 ; GFX1200-LABEL: v_fmac_f32:
355 ; GFX1200:       ; %bb.0:
356 ; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
357 ; GFX1200-NEXT:    s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
358 ; GFX1200-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
359 ; GFX1200-NEXT:    s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
360 ; GFX1200-NEXT:    s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
361 ; GFX1200-NEXT:    v_fmac_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x56]
362 ; GFX1200-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
363   %fma = call float @llvm.fma.f32(float %x, float %y, float %x)
364   ret float %fma
367 ; GFX9: codeLenInByte = 16
368 ; GFX10: codeLenInByte = 12
369 ; GFX11: codeLenInByte = 12
370 ; GFX1200: codeLenInByte = 28
372 define float @v_fmaak_f32(float %x, float %y) {
373 ; GFX9-LABEL: v_fmaak_f32:
374 ; GFX9:       ; %bb.0:
375 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
376 ; GFX9-NEXT:    s_mov_b32 s4, 0x43800000 ; encoding: [0xff,0x00,0x84,0xbe,0x00,0x00,0x80,0x43]
377 ; GFX9-NEXT:    v_fma_f32 v0, v0, v1, s4 ; encoding: [0x00,0x00,0xcb,0xd1,0x00,0x03,0x12,0x00]
378 ; GFX9-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
380 ; GFX10-LABEL: v_fmaak_f32:
381 ; GFX10:       ; %bb.0:
382 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
383 ; GFX10-NEXT:    v_fmaak_f32 v0, v0, v1, 0x43800000 ; encoding: [0x00,0x03,0x00,0x5a,0x00,0x00,0x80,0x43]
384 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
386 ; GFX11-LABEL: v_fmaak_f32:
387 ; GFX11:       ; %bb.0:
388 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
389 ; GFX11-NEXT:    v_fmaak_f32 v0, v0, v1, 0x43800000 ; encoding: [0x00,0x03,0x00,0x5a,0x00,0x00,0x80,0x43]
390 ; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
392 ; GFX1200-LABEL: v_fmaak_f32:
393 ; GFX1200:       ; %bb.0:
394 ; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
395 ; GFX1200-NEXT:    s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
396 ; GFX1200-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
397 ; GFX1200-NEXT:    s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
398 ; GFX1200-NEXT:    s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
399 ; GFX1200-NEXT:    v_fmaak_f32 v0, v0, v1, 0x43800000 ; encoding: [0x00,0x03,0x00,0x5a,0x00,0x00,0x80,0x43]
400 ; GFX1200-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
401   %fma = call float @llvm.fma.f32(float %x, float %y, float 256.0)
402   ret float %fma
405 ; GFX9: codeLenInByte = 24
406 ; GFX10: codeLenInByte = 16
407 ; GFX11: codeLenInByte = 16
408 ; GFX1200: codeLenInByte = 32
410 define float @v_fma_k_f32_src_mods(float %x, float %y) {
411 ; GFX9-LABEL: v_fma_k_f32_src_mods:
412 ; GFX9:       ; %bb.0:
413 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
414 ; GFX9-NEXT:    s_mov_b32 s4, 0x43800000 ; encoding: [0xff,0x00,0x84,0xbe,0x00,0x00,0x80,0x43]
415 ; GFX9-NEXT:    v_fma_f32 v0, |v0|, v1, s4 ; encoding: [0x00,0x01,0xcb,0xd1,0x00,0x03,0x12,0x00]
416 ; GFX9-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
418 ; GFX10-LABEL: v_fma_k_f32_src_mods:
419 ; GFX10:       ; %bb.0:
420 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
421 ; GFX10-NEXT:    v_fma_f32 v0, |v0|, v1, 0x43800000 ; encoding: [0x00,0x01,0x4b,0xd5,0x00,0x03,0xfe,0x03,0x00,0x00,0x80,0x43]
422 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
424 ; GFX11-LABEL: v_fma_k_f32_src_mods:
425 ; GFX11:       ; %bb.0:
426 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
427 ; GFX11-NEXT:    v_fma_f32 v0, |v0|, v1, 0x43800000 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0xfe,0x03,0x00,0x00,0x80,0x43]
428 ; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
430 ; GFX1200-LABEL: v_fma_k_f32_src_mods:
431 ; GFX1200:       ; %bb.0:
432 ; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
433 ; GFX1200-NEXT:    s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
434 ; GFX1200-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
435 ; GFX1200-NEXT:    s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
436 ; GFX1200-NEXT:    s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
437 ; GFX1200-NEXT:    v_fma_f32 v0, |v0|, v1, 0x43800000 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0xfe,0x03,0x00,0x00,0x80,0x43]
438 ; GFX1200-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
439   %fabs.x = call float @llvm.fabs.f32(float %x)
440   %fma = call float @llvm.fma.f32(float %fabs.x, float %y, float 256.0)
441   ret float %fma
444 ; GFX9: codeLenInByte = 24
445 ; GFX10: codeLenInByte = 20
446 ; GFX11: codeLenInByte = 20
447 ; GFX1200: codeLenInByte = 36
449 define amdgpu_ps float @s_fmaak_f32(float inreg %x, float inreg %y) {
450 ; GFX9-LABEL: s_fmaak_f32:
451 ; GFX9:       ; %bb.0:
452 ; GFX9-NEXT:    v_mov_b32_e32 v0, s1 ; encoding: [0x01,0x02,0x00,0x7e]
453 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0x43800000 ; encoding: [0xff,0x02,0x02,0x7e,0x00,0x00,0x80,0x43]
454 ; GFX9-NEXT:    v_fma_f32 v0, s0, v0, v1 ; encoding: [0x00,0x00,0xcb,0xd1,0x00,0x00,0x06,0x04]
455 ; GFX9-NEXT:    ; return to shader part epilog
457 ; GFX10-LABEL: s_fmaak_f32:
458 ; GFX10:       ; %bb.0:
459 ; GFX10-NEXT:    v_mov_b32_e32 v0, s1 ; encoding: [0x01,0x02,0x00,0x7e]
460 ; GFX10-NEXT:    v_fmaak_f32 v0, s0, v0, 0x43800000 ; encoding: [0x00,0x00,0x00,0x5a,0x00,0x00,0x80,0x43]
461 ; GFX10-NEXT:    ; return to shader part epilog
463 ; GFX1100-LABEL: s_fmaak_f32:
464 ; GFX1100:       ; %bb.0:
465 ; GFX1100-NEXT:    v_mov_b32_e32 v0, s1 ; encoding: [0x01,0x02,0x00,0x7e]
466 ; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) ; encoding: [0x01,0x00,0x87,0xbf]
467 ; GFX1100-NEXT:    v_fmaak_f32 v0, s0, v0, 0x43800000 ; encoding: [0x00,0x00,0x00,0x5a,0x00,0x00,0x80,0x43]
468 ; GFX1100-NEXT:    ; return to shader part epilog
470 ; GFX1150-LABEL: s_fmaak_f32:
471 ; GFX1150:       ; %bb.0:
472 ; GFX1150-NEXT:    s_fmaak_f32 s0, s0, s1, 0x43800000 ; encoding: [0x00,0x01,0x80,0xa2,0x00,0x00,0x80,0x43]
473 ; GFX1150-NEXT:    s_delay_alu instid0(SALU_CYCLE_3) ; encoding: [0x0b,0x00,0x87,0xbf]
474 ; GFX1150-NEXT:    v_mov_b32_e32 v0, s0 ; encoding: [0x00,0x02,0x00,0x7e]
475 ; GFX1150-NEXT:    ; return to shader part epilog
477 ; GFX1200-LABEL: s_fmaak_f32:
478 ; GFX1200:       ; %bb.0:
479 ; GFX1200-NEXT:    s_fmaak_f32 s0, s0, s1, 0x43800000 ; encoding: [0x00,0x01,0x80,0xa2,0x00,0x00,0x80,0x43]
480 ; GFX1200-NEXT:    s_delay_alu instid0(SALU_CYCLE_3) ; encoding: [0x0b,0x00,0x87,0xbf]
481 ; GFX1200-NEXT:    v_mov_b32_e32 v0, s0 ; encoding: [0x00,0x02,0x00,0x7e]
482 ; GFX1200-NEXT:    ; return to shader part epilog
483   %fma = call float @llvm.fma.f32(float %x, float %y, float 256.0)
484   ret float %fma
487 ; GFX9: codeLenInByte = 20
488 ; GFX10: codeLenInByte = 12
489 ; GFX1100: codeLenInByte = 16
490 ; GFX1150: codeLenInByte = 16
491 ; GFX1200: codeLenInByte = 16