1 ; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s
4 declare i32 @sample_add(i32, i32)
5 declare i32 @stack_callee_int(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
6 declare i32 @stack_callee_int_szext(i1 signext, i8 zeroext, i32, i32, i32, i32, i32, i32, i16 zeroext, i8 signext)
7 declare float @stack_callee_float(float, float, float, float, float, float, float, float, float, float)
8 declare void @test(i64)
10 ; Scalar argument passing must not change (same tests as in VE/Scalar/call.ll below - this time with +vpu)
12 define fastcc i32 @sample_call() {
13 ; CHECK-LABEL: sample_call:
14 ; CHECK: .LBB{{[0-9]+}}_2:
15 ; CHECK-NEXT: lea %s0, sample_add@lo
16 ; CHECK-NEXT: and %s0, %s0, (32)0
17 ; CHECK-NEXT: lea.sl %s12, sample_add@hi(, %s0)
18 ; CHECK-NEXT: or %s0, 1, (0)1
19 ; CHECK-NEXT: or %s1, 2, (0)1
20 ; CHECK-NEXT: bsic %s10, (, %s12)
21 ; CHECK-NEXT: or %s11, 0, %s9
22 %r = tail call fastcc i32 @sample_add(i32 1, i32 2)
26 define fastcc i32 @stack_call_int() {
27 ; CHECK-LABEL: stack_call_int:
28 ; CHECK: .LBB{{[0-9]+}}_2:
29 ; CHECK-NEXT: or %s0, 10, (0)1
30 ; CHECK-NEXT: st %s0, 248(, %s11)
31 ; CHECK-NEXT: or %s34, 9, (0)1
32 ; CHECK-NEXT: lea %s0, stack_callee_int@lo
33 ; CHECK-NEXT: and %s0, %s0, (32)0
34 ; CHECK-NEXT: lea.sl %s12, stack_callee_int@hi(, %s0)
35 ; CHECK-NEXT: or %s0, 1, (0)1
36 ; CHECK-NEXT: or %s1, 2, (0)1
37 ; CHECK-NEXT: or %s2, 3, (0)1
38 ; CHECK-NEXT: or %s3, 4, (0)1
39 ; CHECK-NEXT: or %s4, 5, (0)1
40 ; CHECK-NEXT: or %s5, 6, (0)1
41 ; CHECK-NEXT: or %s6, 7, (0)1
42 ; CHECK-NEXT: or %s7, 8, (0)1
43 ; CHECK-NEXT: st %s34, 240(, %s11)
44 ; CHECK-NEXT: bsic %s10, (, %s12)
45 ; CHECK-NEXT: or %s11, 0, %s9
46 %r = tail call fastcc i32 @stack_callee_int(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10)
50 define fastcc i32 @stack_call_int_szext() {
51 ; CHECK-LABEL: stack_call_int_szext:
52 ; CHECK: .LBB{{[0-9]+}}_2:
53 ; CHECK-NEXT: or %s0, -1, (0)1
54 ; CHECK-NEXT: st %s0, 248(, %s11)
55 ; CHECK-NEXT: lea %s34, 65535
56 ; CHECK-NEXT: lea %s0, stack_callee_int_szext@lo
57 ; CHECK-NEXT: and %s0, %s0, (32)0
58 ; CHECK-NEXT: lea.sl %s12, stack_callee_int_szext@hi(, %s0)
59 ; CHECK-NEXT: or %s0, -1, (0)1
60 ; CHECK-NEXT: lea %s1, 255
61 ; CHECK-NEXT: or %s2, 3, (0)1
62 ; CHECK-NEXT: or %s3, 4, (0)1
63 ; CHECK-NEXT: or %s4, 5, (0)1
64 ; CHECK-NEXT: or %s5, 6, (0)1
65 ; CHECK-NEXT: or %s6, 7, (0)1
66 ; CHECK-NEXT: or %s7, 8, (0)1
67 ; CHECK-NEXT: st %s34, 240(, %s11)
68 ; CHECK-NEXT: bsic %s10, (, %s12)
69 ; CHECK-NEXT: or %s11, 0, %s9
70 %r = tail call fastcc i32 @stack_callee_int_szext(i1 -1, i8 -1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i16 -1, i8 -1)
74 define fastcc float @stack_call_float() {
75 ; CHECK-LABEL: stack_call_float:
76 ; CHECK: .LBB{{[0-9]+}}_2:
77 ; CHECK-NEXT: lea.sl %s0, 1092616192
78 ; CHECK-NEXT: st %s0, 248(, %s11)
79 ; CHECK-NEXT: lea.sl %s34, 1091567616
80 ; CHECK-NEXT: lea %s0, stack_callee_float@lo
81 ; CHECK-NEXT: and %s0, %s0, (32)0
82 ; CHECK-NEXT: lea.sl %s12, stack_callee_float@hi(, %s0)
83 ; CHECK-NEXT: lea.sl %s0, 1065353216
84 ; CHECK-NEXT: lea.sl %s1, 1073741824
85 ; CHECK-NEXT: lea.sl %s2, 1077936128
86 ; CHECK-NEXT: lea.sl %s3, 1082130432
87 ; CHECK-NEXT: lea.sl %s4, 1084227584
88 ; CHECK-NEXT: lea.sl %s5, 1086324736
89 ; CHECK-NEXT: lea.sl %s6, 1088421888
90 ; CHECK-NEXT: lea.sl %s7, 1090519040
91 ; CHECK-NEXT: st %s34, 240(, %s11)
92 ; CHECK-NEXT: bsic %s10, (, %s12)
93 ; CHECK-NEXT: or %s11, 0, %s9
94 %r = tail call fastcc float @stack_callee_float(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0)
98 define fastcc float @stack_call_float2(float %p0) {
99 ; CHECK-LABEL: stack_call_float2:
100 ; CHECK: .LBB{{[0-9]+}}_2:
101 ; CHECK-NEXT: st %s0, 248(, %s11)
102 ; CHECK-NEXT: lea %s1, stack_callee_float@lo
103 ; CHECK-NEXT: and %s1, %s1, (32)0
104 ; CHECK-NEXT: lea.sl %s12, stack_callee_float@hi(, %s1)
105 ; CHECK-NEXT: st %s0, 240(, %s11)
106 ; CHECK-NEXT: or %s1, 0, %s0
107 ; CHECK-NEXT: or %s2, 0, %s0
108 ; CHECK-NEXT: or %s3, 0, %s0
109 ; CHECK-NEXT: or %s4, 0, %s0
110 ; CHECK-NEXT: or %s5, 0, %s0
111 ; CHECK-NEXT: or %s6, 0, %s0
112 ; CHECK-NEXT: or %s7, 0, %s0
113 ; CHECK-NEXT: bsic %s10, (, %s12)
114 ; CHECK-NEXT: or %s11, 0, %s9
115 %r = tail call fastcc float @stack_callee_float(float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0)
119 ; Vector argument passing (fastcc feature)
121 declare fastcc <256 x i32> @get_v256i32()
122 declare fastcc void @vsample_v(<256 x i32>)
123 declare fastcc void @vsample_iv(i32, <256 x i32>)
125 define void @caller_vret() {
126 ; CHECK: caller_vret:
127 ; CHECK: .LBB{{[0-9]+}}_2:
128 ; CHECK-NEXT: lea %s0, get_v256i32@lo
129 ; CHECK-NEXT: and %s0, %s0, (32)0
130 ; CHECK-NEXT: lea.sl %s12, get_v256i32@hi(, %s0)
131 ; CHECK-NEXT: bsic %s10, (, %s12)
132 ; CHECK-NEXT: or %s11, 0, %s9
133 %r = tail call fastcc <256 x i32> @get_v256i32()
137 define void @caller_vret_pass_p0() {
138 ; CHECK-LABEL: caller_vret_pass_p0:
139 ; CHECK: .LBB{{[0-9]+}}_2:
140 ; CHECK: lea %s0, get_v256i32@lo
141 ; CHECK-NEXT: and %s0, %s0, (32)0
142 ; CHECK-NEXT: lea.sl %s12, get_v256i32@hi(, %s0)
143 ; CHECK-NEXT: bsic %s10, (, %s12)
144 ; CHECK-NEXT: lea %s0, vsample_v@lo
145 ; CHECK-NEXT: and %s0, %s0, (32)0
146 ; CHECK-NEXT: lea.sl %s12, vsample_v@hi(, %s0)
147 ; CHECK-NEXT: bsic %s10, (, %s12)
148 ; CHECK-NEXT: or %s11, 0, %s9
149 %p = tail call fastcc <256 x i32> @get_v256i32()
150 call fastcc void @vsample_v(<256 x i32> %p)
154 define void @caller_vret_pass_p1(i32 %s) {
155 ; CHECK-LABEL: caller_vret_pass_p1:
156 ; CHECK: .LBB{{[0-9]+}}_2:
157 ; CHECK: or %s18, 0, %s0
158 ; CHECK-NEXT: lea %s0, get_v256i32@lo
159 ; CHECK-NEXT: and %s0, %s0, (32)0
160 ; CHECK-NEXT: lea.sl %s12, get_v256i32@hi(, %s0)
161 ; CHECK-NEXT: bsic %s10, (, %s12)
162 ; CHECK-NEXT: lea %s0, vsample_iv@lo
163 ; CHECK-NEXT: and %s0, %s0, (32)0
164 ; CHECK-NEXT: lea.sl %s12, vsample_iv@hi(, %s0)
165 ; CHECK-NEXT: or %s0, 0, %s18
166 ; CHECK-NEXT: bsic %s10, (, %s12)
167 %p = tail call fastcc <256 x i32> @get_v256i32()
168 call fastcc void @vsample_iv(i32 %s, <256 x i32> %p)
172 declare fastcc void @vsample_vv(<256 x i32>, <256 x i32>)
173 declare fastcc void @vsample_vvv(<256 x i32>, <256 x i32>, <256 x i32>)
175 define void @caller_vret_pass_p01() {
176 ; CHECK-LABEL: caller_vret_pass_p01:
177 ; CHECK: .LBB{{[0-9]+}}_2:
178 ; CHECK-NEXT: lea %s0, get_v256i32@lo
179 ; CHECK-NEXT: and %s0, %s0, (32)0
180 ; CHECK-NEXT: lea.sl %s12, get_v256i32@hi(, %s0)
181 ; CHECK-NEXT: bsic %s10, (, %s12)
182 ; CHECK-NEXT: lea %s0, vsample_vv@lo
183 ; CHECK-NEXT: and %s0, %s0, (32)0
184 ; CHECK-NEXT: lea.sl %s12, vsample_vv@hi(, %s0)
185 ; CHECK-NEXT: lea %s16, 256
186 ; CHECK-NEXT: lvl %s16
187 ; CHECK-NEXT: vor %v1, (0)1, %v0
188 ; CHECK-NEXT: bsic %s10, (, %s12)
189 ; CHECK-NEXT: or %s11, 0, %s9
190 %p = tail call fastcc <256 x i32> @get_v256i32()
191 call fastcc void @vsample_vv(<256 x i32> %p, <256 x i32> %p)
195 define void @caller_vret_pass_p012() {
196 ; CHECK-LABEL: caller_vret_pass_p012:
197 ; CHECK: .LBB{{[0-9]+}}_2:
198 ; CHECK-NEXT: lea %s0, get_v256i32@lo
199 ; CHECK-NEXT: and %s0, %s0, (32)0
200 ; CHECK-NEXT: lea.sl %s12, get_v256i32@hi(, %s0)
201 ; CHECK-NEXT: bsic %s10, (, %s12)
202 ; CHECK-NEXT: lea %s0, vsample_vvv@lo
203 ; CHECK-NEXT: and %s0, %s0, (32)0
204 ; CHECK-NEXT: lea.sl %s12, vsample_vvv@hi(, %s0)
205 ; CHECK-NEXT: lea %s16, 256
206 ; CHECK-NEXT: lvl %s16
207 ; CHECK-NEXT: vor %v1, (0)1, %v0
208 ; CHECK-NEXT: lea %s16, 256
209 ; CHECK-NEXT: lvl %s16
210 ; CHECK-NEXT: vor %v2, (0)1, %v0
211 ; CHECK-NEXT: bsic %s10, (, %s12)
212 ; CHECK-NEXT: or %s11, 0, %s9
213 %p = tail call fastcc <256 x i32> @get_v256i32()
214 call fastcc void @vsample_vvv(<256 x i32> %p, <256 x i32> %p, <256 x i32> %p)
218 ; Expose register parameter mapping by forcing an explicit vreg move for all parameter positions
219 declare fastcc void @vsample_vvvvvvv(<256 x i32>, <256 x i32>, <256 x i32>, <256 x i32>, <256 x i32>, <256 x i32>, <256 x i32>)
221 ; TODO improve vreg copy (redundant lea+lvl emitted)
222 define fastcc void @roundtrip_caller_callee(<256 x i32> %p0, <256 x i32> %p1, <256 x i32> %p2, <256 x i32> %p3, <256 x i32> %p4, <256 x i32> %p5, <256 x i32> %p6) {
223 ; CHECK-LABEL: roundtrip_caller_callee:
224 ; CHECK: .LBB{{[0-9]+}}_2:
225 ; CHECK-NEXT: lea %s16, 256
226 ; CHECK-NEXT: lvl %s16
227 ; CHECK-NEXT: vor %v7, (0)1, %v0
228 ; CHECK-NEXT: lea %s0, vsample_vvvvvvv@lo
229 ; CHECK-NEXT: and %s0, %s0, (32)0
230 ; CHECK-NEXT: lea.sl %s12, vsample_vvvvvvv@hi(, %s0)
231 ; CHECK-NEXT: lea %s16, 256
232 ; CHECK-NEXT: lvl %s16
233 ; CHECK-NEXT: vor %v0, (0)1, %v1
234 ; CHECK-NEXT: lea %s16, 256
235 ; CHECK-NEXT: lvl %s16
236 ; CHECK-NEXT: vor %v1, (0)1, %v2
237 ; CHECK-NEXT: lea %s16, 256
238 ; CHECK-NEXT: lvl %s16
239 ; CHECK-NEXT: vor %v2, (0)1, %v3
240 ; CHECK-NEXT: lea %s16, 256
241 ; CHECK-NEXT: lvl %s16
242 ; CHECK-NEXT: vor %v3, (0)1, %v4
243 ; CHECK-NEXT: lea %s16, 256
244 ; CHECK-NEXT: lvl %s16
245 ; CHECK-NEXT: vor %v4, (0)1, %v5
246 ; CHECK-NEXT: lea %s16, 256
247 ; CHECK-NEXT: lvl %s16
248 ; CHECK-NEXT: vor %v5, (0)1, %v6
249 ; CHECK-NEXT: lea %s16, 256
250 ; CHECK-NEXT: lvl %s16
251 ; CHECK-NEXT: vor %v6, (0)1, %v7
252 ; CHECK-NEXT: bsic %s10, (, %s12)
253 ; CHECK-NEXT: or %s11, 0, %s9
254 call fastcc void @vsample_vvvvvvv(<256 x i32> %p1, <256 x i32> %p2, <256 x i32> %p3, <256 x i32> %p4, <256 x i32> %p5, <256 x i32> %p6, <256 x i32> %p0)