1 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
4 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
6 ; GCN-LABEL: {{^}}vgpr:
7 ; GCN-DAG: v_mov_b32_e32 v1, v0
8 ; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm
9 ; GCN: s_waitcnt expcnt(0)
10 ; GCN: v_add_f32_e32 v0, 1.0, v1
12 define amdgpu_vs { float, float } @vgpr(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
14 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
15 %x = fadd float %arg3, 1.000000e+00
16 %a = insertvalue { float, float } undef, float %x, 0
17 %b = insertvalue { float, float } %a, float %arg3, 1
18 ret { float, float } %b
21 ; GCN-LABEL: {{^}}vgpr_literal:
22 ; GCN: exp mrt0 v0, v0, v0, v0 done vm
24 ; GCN-DAG: v_mov_b32_e32 v0, 1.0
25 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
26 ; GCN-DAG: v_mov_b32_e32 v2, 4.0
27 ; GCN-DAG: v_mov_b32_e32 v3, -1.0
28 ; GCN-DAG: s_waitcnt expcnt(0)
30 define amdgpu_vs { float, float, float, float } @vgpr_literal(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
32 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
33 ret { float, float, float, float } { float 1.000000e+00, float 2.000000e+00, float 4.000000e+00, float -1.000000e+00 }
38 ; GCN-NEXT: .long 165584
40 ; GCN-LABEL: {{^}}vgpr_ps_addr0:
41 ; GCN-NOT: v_mov_b32_e32 v0
42 ; GCN-NOT: v_mov_b32_e32 v1
43 ; GCN-NOT: v_mov_b32_e32 v2
44 ; GCN: v_mov_b32_e32 v3, v4
45 ; GCN: v_mov_b32_e32 v4, v6
47 define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr0(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
49 %i0 = extractelement <2 x i32> %arg4, i32 0
50 %i1 = extractelement <2 x i32> %arg4, i32 1
51 %i2 = extractelement <2 x i32> %arg7, i32 0
52 %i3 = extractelement <2 x i32> %arg8, i32 0
53 %f0 = bitcast i32 %i0 to float
54 %f1 = bitcast i32 %i1 to float
55 %f2 = bitcast i32 %i2 to float
56 %f3 = bitcast i32 %i3 to float
57 %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
58 %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
59 %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
60 %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
61 %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
62 ret { float, float, float, float, float } %r4
67 ; GCN-NEXT: .long 165584
69 ; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
70 ; GCN: v_mov_b32_e32 v0, 1.0
72 define amdgpu_ps float @ps_input_ena_no_inputs(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
74 ret float 1.000000e+00
78 ; GCN-NEXT: .long 2081
79 ; GCN-NEXT: .long 165584
80 ; GCN-NEXT: .long 2081
81 ; GCN-LABEL: {{^}}ps_input_ena_pos_w:
82 ; GCN-DAG: v_mov_b32_e32 v0, v4
83 ; GCN-DAG: v_mov_b32_e32 v1, v2
84 ; GCN-DAG: v_mov_b32_e32 v2, v3
86 define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
88 %f = bitcast <2 x i32> %arg8 to <2 x float>
89 %s = insertvalue { float, <2 x float> } undef, float %arg14, 0
90 %s1 = insertvalue { float, <2 x float> } %s, <2 x float> %f, 1
91 ret { float, <2 x float> } %s1
96 ; GCN-NEXT: .long 165584
98 ; GCN-LABEL: {{^}}vgpr_ps_addr1:
99 ; GCN-DAG: v_mov_b32_e32 v0, v2
100 ; GCN-DAG: v_mov_b32_e32 v1, v3
101 ; GCN: v_mov_b32_e32 v2, v4
102 ; GCN-DAG: v_mov_b32_e32 v3, v6
103 ; GCN-DAG: v_mov_b32_e32 v4, v8
105 define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr1(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #2 {
107 %i0 = extractelement <2 x i32> %arg4, i32 0
108 %i1 = extractelement <2 x i32> %arg4, i32 1
109 %i2 = extractelement <2 x i32> %arg7, i32 0
110 %i3 = extractelement <2 x i32> %arg8, i32 0
111 %f0 = bitcast i32 %i0 to float
112 %f1 = bitcast i32 %i1 to float
113 %f2 = bitcast i32 %i2 to float
114 %f3 = bitcast i32 %i3 to float
115 %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
116 %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
117 %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
118 %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
119 %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
120 ret { float, float, float, float, float } %r4
124 ; GCN-NEXT: .long 562
125 ; GCN-NEXT: .long 165584
126 ; GCN-NEXT: .long 631
127 ; GCN-LABEL: {{^}}vgpr_ps_addr119:
128 ; GCN-DAG: v_mov_b32_e32 v0, v2
129 ; GCN-DAG: v_mov_b32_e32 v1, v3
130 ; GCN-DAG: v_mov_b32_e32 v2, v6
131 ; GCN-DAG: v_mov_b32_e32 v3, v8
132 ; GCN-DAG: v_mov_b32_e32 v4, v12
134 define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 {
136 %i0 = extractelement <2 x i32> %arg4, i32 0
137 %i1 = extractelement <2 x i32> %arg4, i32 1
138 %i2 = extractelement <2 x i32> %arg7, i32 0
139 %i3 = extractelement <2 x i32> %arg8, i32 0
140 %f0 = bitcast i32 %i0 to float
141 %f1 = bitcast i32 %i1 to float
142 %f2 = bitcast i32 %i2 to float
143 %f3 = bitcast i32 %i3 to float
144 %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
145 %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
146 %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
147 %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
148 %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
149 ret { float, float, float, float, float } %r4
153 ; GCN-NEXT: .long 562
154 ; GCN-NEXT: .long 165584
155 ; GCN-NEXT: .long 946
156 ; GCN-LABEL: {{^}}vgpr_ps_addr418:
157 ; GCN-NOT: v_mov_b32_e32 v0
158 ; GCN-NOT: v_mov_b32_e32 v1
159 ; GCN-NOT: v_mov_b32_e32 v2
160 ; GCN: v_mov_b32_e32 v3, v4
161 ; GCN: v_mov_b32_e32 v4, v8
163 define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr418(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #4 {
165 %i0 = extractelement <2 x i32> %arg4, i32 0
166 %i1 = extractelement <2 x i32> %arg4, i32 1
167 %i2 = extractelement <2 x i32> %arg7, i32 0
168 %i3 = extractelement <2 x i32> %arg8, i32 0
169 %f0 = bitcast i32 %i0 to float
170 %f1 = bitcast i32 %i1 to float
171 %f2 = bitcast i32 %i2 to float
172 %f3 = bitcast i32 %i3 to float
173 %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
174 %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
175 %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
176 %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
177 %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
178 ret { float, float, float, float, float } %r4
181 ; GCN-LABEL: {{^}}sgpr:
182 ; GCN-DAG: s_mov_b32 s2, s3
183 ; GCN-DAG: s_add_{{i|u}}32 s0, s3, 2
185 define amdgpu_vs { i32, i32, i32 } @sgpr(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
187 %x = add i32 %arg2, 2
188 %a = insertvalue { i32, i32, i32 } undef, i32 %x, 0
189 %b = insertvalue { i32, i32, i32 } %a, i32 %arg1, 1
190 %c = insertvalue { i32, i32, i32 } %a, i32 %arg2, 2
191 ret { i32, i32, i32 } %c
194 ; GCN-LABEL: {{^}}sgpr_literal:
195 ; GCN: s_mov_b32 s0, 5
196 ; GCN-NOT: s_mov_b32 s0, s0
197 ; GCN-DAG: s_mov_b32 s1, 6
198 ; GCN-DAG: s_mov_b32 s2, 7
199 ; GCN-DAG: s_mov_b32 s3, 8
201 define amdgpu_vs { i32, i32, i32, i32 } @sgpr_literal(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
203 %x = add i32 %arg2, 2
204 ret { i32, i32, i32, i32 } { i32 5, i32 6, i32 7, i32 8 }
207 ; GCN-LABEL: {{^}}both:
208 ; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm
209 ; GCN-DAG: v_mov_b32_e32 v1, v0
210 ; GCN-DAG: s_mov_b32 s1, s2
211 ; GCN-DAG: s_waitcnt expcnt(0)
212 ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
213 ; GCN-DAG: s_add_{{i|u}}32 s0, s3, 2
214 ; GCN-DAG: s_mov_b32 s2, s3
216 define amdgpu_vs { float, i32, float, i32, i32 } @both(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
218 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
219 %v = fadd float %arg3, 1.000000e+00
220 %s = add i32 %arg2, 2
221 %a0 = insertvalue { float, i32, float, i32, i32 } undef, float %v, 0
222 %a1 = insertvalue { float, i32, float, i32, i32 } %a0, i32 %s, 1
223 %a2 = insertvalue { float, i32, float, i32, i32 } %a1, float %arg3, 2
224 %a3 = insertvalue { float, i32, float, i32, i32 } %a2, i32 %arg1, 3
225 %a4 = insertvalue { float, i32, float, i32, i32 } %a3, i32 %arg2, 4
226 ret { float, i32, float, i32, i32 } %a4
229 ; GCN-LABEL: {{^}}structure_literal:
230 ; GCN: exp mrt0 v0, v0, v0, v0 done vm
232 ; GCN-DAG: v_mov_b32_e32 v0, 1.0
233 ; GCN-DAG: s_mov_b32 s0, 2
234 ; GCN-DAG: s_mov_b32 s1, 3
235 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
236 ; GCN-DAG: v_mov_b32_e32 v2, 4.0
237 ; GCN-DAG: s_waitcnt expcnt(0)
238 define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
240 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
241 ret { { float, i32 }, { i32, <2 x float> } } { { float, i32 } { float 1.000000e+00, i32 2 }, { i32, <2 x float> } { i32 3, <2 x float> <float 2.000000e+00, float 4.000000e+00> } }
244 ; GCN-LABEL: {{^}}ret_return_to_epilog_pseudo_size:
245 ; GCN: codeLenInByte = 0{{$}}
246 define amdgpu_ps float @ret_return_to_epilog_pseudo_size() #0 {
250 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
252 attributes #0 = { nounwind }
253 attributes #1 = { nounwind "InitialPSInputAddr"="0" }
254 attributes #2 = { nounwind "InitialPSInputAddr"="1" }
255 attributes #3 = { nounwind "InitialPSInputAddr"="119" }
256 attributes #4 = { nounwind "InitialPSInputAddr"="418" }