1 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -stop-after=finalize-isel < %s | FileCheck %s
3 ; Test that z8 and z9, passed in by reference, are correctly loaded from x0 and x1.
9 define aarch64_sve_vector_pcs <vscale x 4 x i32> @callee_with_many_sve_arg(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5, <vscale x 4 x i32> %z6, <vscale x 4 x i32> %z7, <vscale x 4 x i32> %z8, <vscale x 4 x i32> %z9) {
10 ; CHECK: name: callee_with_many_sve_arg
11 ; CHECK-DAG: [[BASE:%[0-9]+]]:gpr64common = COPY $x1
12 ; CHECK-DAG: [[PTRUE:%[0-9]+]]:ppr_3b = PTRUE_S 31
13 ; CHECK-DAG: [[RES:%[0-9]+]]:zpr = LD1W_IMM killed [[PTRUE]], [[BASE]]
14 ; CHECK-DAG: $z0 = COPY [[RES]]
15 ; CHECK: RET_ReallyLR implicit $z0
16 ret <vscale x 4 x i32> %z9
19 ; Test that z8 and z9 are passed by reference.
20 define aarch64_sve_vector_pcs <vscale x 4 x i32> @caller_with_many_sve_arg(<vscale x 4 x i32> %z) {
21 ; CHECK: name: caller_with_many_sve_arg
23 ; CHECK: - { id: 0, name: '', type: default, offset: 0, size: 16, alignment: 16,
24 ; CHECK-NEXT: stack-id: scalable-vector
25 ; CHECK: - { id: 1, name: '', type: default, offset: 0, size: 16, alignment: 16,
26 ; CHECK-NEXT: stack-id: scalable-vector
27 ; CHECK-DAG: [[PTRUE:%[0-9]+]]:ppr_3b = PTRUE_S 31
28 ; CHECK-DAG: ST1W_IMM %{{[0-9]+}}, [[PTRUE]], %stack.1, 0
29 ; CHECK-DAG: ST1W_IMM %{{[0-9]+}}, [[PTRUE]], %stack.0, 0
30 ; CHECK-DAG: [[BASE2:%[0-9]+]]:gpr64sp = ADDXri %stack.1, 0
31 ; CHECK-DAG: [[BASE1:%[0-9]+]]:gpr64sp = ADDXri %stack.0, 0
32 ; CHECK-DAG: $x0 = COPY [[BASE1]]
33 ; CHECK-DAG: $x1 = COPY [[BASE2]]
34 ; CHECK-NEXT: BL @callee_with_many_sve_arg
35 ; CHECK: RET_ReallyLR implicit $z0
36 %ret = call aarch64_sve_vector_pcs <vscale x 4 x i32> @callee_with_many_sve_arg(<vscale x 4 x i32> %z, <vscale x 4 x i32> %z, <vscale x 4 x i32> %z, <vscale x 4 x i32> %z, <vscale x 4 x i32> %z, <vscale x 4 x i32> %z, <vscale x 4 x i32> %z, <vscale x 4 x i32> %z, <vscale x 4 x i32> %z, <vscale x 4 x i32> %z)
37 ret <vscale x 4 x i32> %ret
40 ; Test that p4 and p5, passed in by reference, are correctly loaded from register x0 and x1.
46 define aarch64_sve_vector_pcs <vscale x 16 x i1> @callee_with_many_svepred_arg(<vscale x 16 x i1> %p0, <vscale x 16 x i1> %p1, <vscale x 16 x i1> %p2, <vscale x 16 x i1> %p3, <vscale x 16 x i1> %p4, <vscale x 16 x i1> %p5) {
47 ; CHECK: name: callee_with_many_svepred_arg
48 ; CHECK-DAG: [[BASE:%[0-9]+]]:gpr64common = COPY $x1
49 ; CHECK-DAG: [[RES:%[0-9]+]]:ppr = LDR_PXI [[BASE]], 0
50 ; CHECK-DAG: $p0 = COPY [[RES]]
51 ; CHECK: RET_ReallyLR implicit $p0
52 ret <vscale x 16 x i1> %p5
55 ; Test that p4 and p5 are passed by reference.
56 define aarch64_sve_vector_pcs <vscale x 16 x i1> @caller_with_many_svepred_arg(<vscale x 16 x i1> %p) {
57 ; CHECK: name: caller_with_many_svepred_arg
59 ; CHECK: - { id: 0, name: '', type: default, offset: 0, size: 2, alignment: 2,
60 ; CHECK-NEXT: stack-id: scalable-vector
61 ; CHECK: - { id: 1, name: '', type: default, offset: 0, size: 2, alignment: 2,
62 ; CHECK-NEXT: stack-id: scalable-vector
63 ; CHECK-DAG: STR_PXI %{{[0-9]+}}, %stack.0, 0
64 ; CHECK-DAG: STR_PXI %{{[0-9]+}}, %stack.1, 0
65 ; CHECK-DAG: [[BASE1:%[0-9]+]]:gpr64sp = ADDXri %stack.0, 0
66 ; CHECK-DAG: [[BASE2:%[0-9]+]]:gpr64sp = ADDXri %stack.1, 0
67 ; CHECK-DAG: $x0 = COPY [[BASE1]]
68 ; CHECK-DAG: $x1 = COPY [[BASE2]]
69 ; CHECK-NEXT: BL @callee_with_many_svepred_arg
70 ; CHECK: RET_ReallyLR implicit $p0
71 %ret = call aarch64_sve_vector_pcs <vscale x 16 x i1> @callee_with_many_svepred_arg(<vscale x 16 x i1> %p, <vscale x 16 x i1> %p, <vscale x 16 x i1> %p, <vscale x 16 x i1> %p, <vscale x 16 x i1> %p, <vscale x 16 x i1> %p)
72 ret <vscale x 16 x i1> %ret
75 ; Test that arg2 is passed through x0, i.e., x0 = &%arg2; and return values are loaded from x0:
77 define aarch64_sve_vector_pcs <vscale x 16 x i1> @callee_with_svepred_arg_4xv16i1_1xv16i1([4 x <vscale x 16 x i1>] %arg1, [1 x <vscale x 16 x i1>] %arg2) {
78 ; CHECK: name: callee_with_svepred_arg_4xv16i1_1xv16i1
79 ; CHECK: [[BASE:%[0-9]+]]:gpr64common = COPY $x0
80 ; CHECK: [[PRED0:%[0-9]+]]:ppr = LDR_PXI [[BASE]], 0 :: (load (<vscale x 1 x s16>))
81 ; CHECK: $p0 = COPY [[PRED0]]
82 ; CHECK: RET_ReallyLR implicit $p0
83 %res = extractvalue [1 x <vscale x 16 x i1>] %arg2, 0
84 ret <vscale x 16 x i1> %res
87 ; Test that arg1 is stored to the stack from p0; and the stack location is passed throuch x0 to setup the call:
88 ; str P0, [stack_loc_for_args]
89 ; x0 = stack_loc_for_args
90 define aarch64_sve_vector_pcs <vscale x 16 x i1> @caller_with_svepred_arg_1xv16i1_4xv16i1([1 x <vscale x 16 x i1>] %arg1, [4 x <vscale x 16 x i1>] %arg2) {
91 ; CHECK: name: caller_with_svepred_arg_1xv16i1_4xv16i1
93 ; CHECK: - { id: 0, name: '', type: default, offset: 0, size: 2, alignment: 2,
94 ; CHECK-NEXT: stack-id: scalable-vector,
95 ; CHECK: [[PRED0:%[0-9]+]]:ppr = COPY $p0
96 ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
97 ; CHECK: STR_PXI [[PRED0]], %stack.0, 0 :: (store (<vscale x 1 x s16>) into %stack.0)
98 ; CHECK: [[STACK:%[0-9]+]]:gpr64sp = ADDXri %stack.0, 0, 0
99 ; CHECK: $x0 = COPY [[STACK]]
100 ; CHECK: BL @callee_with_svepred_arg_4xv16i1_1xv16i1, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $p0, implicit $p1, implicit $p2, implicit $p3, implicit $x0, implicit-def $sp, implicit-def $p0
101 ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
102 %res = call <vscale x 16 x i1> @callee_with_svepred_arg_4xv16i1_1xv16i1([4 x <vscale x 16 x i1>] %arg2, [1 x <vscale x 16 x i1>] %arg1)
103 ret <vscale x 16 x i1> %res
106 ; Test that arg2 is passed through x0, i.e., x0 = &%arg2; and return values are loaded from x0:
108 ; P1 = ldr [x0 + sizeof(Px)]
109 ; P2 = ldr [x0 + 2*sizeof(Px)]
110 ; P3 = ldr [x0 + 3*sizeof(Px)]
111 define aarch64_sve_vector_pcs [4 x <vscale x 16 x i1>] @callee_with_svepred_arg_4xv16i1_4xv16i1([4 x <vscale x 16 x i1>] %arg1, [4 x <vscale x 16 x i1>] %arg2) {
112 ; CHECK: name: callee_with_svepred_arg_4xv16i1_4xv16i1
113 ; CHECK: [[BASE:%[0-9]+]]:gpr64common = COPY $x0
114 ; CHECK: [[OFFSET1:%[0-9]+]]:gpr64 = CNTD_XPiI 31, 1, implicit $vg
115 ; CHECK: [[ADDR1:%[0-9]+]]:gpr64common = nuw ADDXrr [[BASE]], killed [[OFFSET1]]
116 ; CHECK: [[PRED1:%[0-9]+]]:ppr = LDR_PXI killed [[ADDR1]], 0 :: (load (<vscale x 1 x s16>))
117 ; CHECK: [[OFFSET2:%[0-9]+]]:gpr64 = CNTW_XPiI 31, 1, implicit $vg
118 ; CHECK: [[ADDR2:%[0-9]+]]:gpr64common = ADDXrr [[BASE]], killed [[OFFSET2]]
119 ; CHECK: [[PRED2:%[0-9]+]]:ppr = LDR_PXI killed [[ADDR2]], 0 :: (load (<vscale x 1 x s16>))
120 ; CHECK: [[OFFSET3:%[0-9]+]]:gpr64 = CNTD_XPiI 31, 3, implicit $vg
121 ; CHECK: [[ADDR3:%[0-9]+]]:gpr64common = ADDXrr [[BASE]], killed [[OFFSET3]]
122 ; CHECK: [[PRED3:%[0-9]+]]:ppr = LDR_PXI killed [[ADDR3]], 0 :: (load (<vscale x 1 x s16>))
123 ; CHECK: [[PRED0:%[0-9]+]]:ppr = LDR_PXI [[BASE]], 0 :: (load (<vscale x 1 x s16>))
124 ; CHECK: $p0 = COPY [[PRED0]]
125 ; CHECK: $p1 = COPY [[PRED1]]
126 ; CHECK: $p2 = COPY [[PRED2]]
127 ; CHECK: $p3 = COPY [[PRED3]]
128 ; CHECK: RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3
129 ret [4 x <vscale x 16 x i1>] %arg2
132 ; Test that arg1 is stored to the stack from p0~p3; and the stack location is passed throuch x0 to setup the call:
133 ; str P0, [stack_loc_for_args]
134 ; str P1, [stack_loc_for_args + sizeof(Px)]
135 ; str P2, [stack_loc_for_args + 2*sizeof(Px)]
136 ; str P3, [stack_loc_for_args + 3*sizeof(Px)]
137 ; x0 = stack_loc_for_args
138 define [4 x <vscale x 16 x i1>] @caller_with_svepred_arg_4xv16i1_4xv16i1([4 x <vscale x 16 x i1>] %arg1, [4 x <vscale x 16 x i1>] %arg2) {
139 ; CHECK: name: caller_with_svepred_arg_4xv16i1_4xv16i1
141 ; CHECK: - { id: 0, name: '', type: default, offset: 0, size: 8, alignment: 2,
142 ; CHECK-NEXT: stack-id: scalable-vector,
143 ; CHECK: [[PRED3:%[0-9]+]]:ppr = COPY $p3
144 ; CHECK: [[PRED2:%[0-9]+]]:ppr = COPY $p2
145 ; CHECK: [[PRED1:%[0-9]+]]:ppr = COPY $p1
146 ; CHECK: [[PRED0:%[0-9]+]]:ppr = COPY $p0
147 ; CHECK: [[OFFSET1:%[0-9]+]]:gpr64 = CNTD_XPiI 31, 1, implicit $vg
148 ; CHECK: [[OFFSET2:%[0-9]+]]:gpr64 = CNTW_XPiI 31, 1, implicit $vg
149 ; CHECK: [[OFFSET3:%[0-9]+]]:gpr64 = CNTD_XPiI 31, 3, implicit $vg
150 ; CHECK: [[STACK:%[0-9]+]]:gpr64common = ADDXri %stack.0, 0, 0
151 ; CHECK: [[ADDR3:%[0-9]+]]:gpr64common = ADDXrr [[STACK]], [[OFFSET3]]
152 ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
153 ; CHECK: STR_PXI [[PRED3]], killed [[ADDR3]], 0 :: (store (<vscale x 1 x s16>))
154 ; CHECK: [[ADDR2:%[0-9]+]]:gpr64common = ADDXrr [[STACK]], [[OFFSET2]]
155 ; CHECK: STR_PXI [[PRED2]], killed [[ADDR2]], 0 :: (store (<vscale x 1 x s16>))
156 ; CHECK: [[ADDR1:%[0-9]+]]:gpr64common = nuw ADDXrr [[STACK]], [[OFFSET1]]
157 ; CHECK: STR_PXI [[PRED1]], killed [[ADDR1]], 0 :: (store (<vscale x 1 x s16>))
158 ; CHECK: STR_PXI [[PRED0]], %stack.0, 0 :: (store (<vscale x 1 x s16>) into %stack.0)
159 ; CHECK: $x0 = COPY [[STACK]]
160 ; CHECK: BL @callee_with_svepred_arg_4xv16i1_4xv16i1, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $p0, implicit $p1, implicit $p2, implicit $p3, implicit $x0, implicit-def $sp, implicit-def $p0, implicit-def $p1, implicit-def $p2, implicit-def $p3
161 ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
162 %res = call [4 x <vscale x 16 x i1>] @callee_with_svepred_arg_4xv16i1_4xv16i1([4 x <vscale x 16 x i1>] %arg2, [4 x <vscale x 16 x i1>] %arg1)
163 ret [4 x <vscale x 16 x i1>] %res
166 ; Test that arg2 is passed through x0, i.e., x0 = &%arg2; and return values are loaded from x0:
168 ; P1 = ldr [x0 + sizeof(Px)]
169 ; P2 = ldr [x0 + 2*sizeof(Px)]
170 ; P3 = ldr [x0 + 3*sizeof(Px)]
171 define aarch64_sve_vector_pcs [2 x <vscale x 32 x i1>] @callee_with_svepred_arg_1xv16i1_2xv32i1([1 x <vscale x 16 x i1>] %arg1, [2 x <vscale x 32 x i1>] %arg2) {
172 ; CHECK: name: callee_with_svepred_arg_1xv16i1_2xv32i1
173 ; CHECK: [[BASE:%[0-9]+]]:gpr64common = COPY $x0
174 ; CHECK: [[OFFSET1:%[0-9]+]]:gpr64 = CNTD_XPiI 31, 1, implicit $vg
175 ; CHECK: [[ADDR1:%[0-9]+]]:gpr64common = nuw ADDXrr [[BASE]], killed [[OFFSET1]]
176 ; CHECK: [[PRED1:%[0-9]+]]:ppr = LDR_PXI killed [[ADDR1]], 0 :: (load (<vscale x 1 x s16>))
177 ; CHECK: [[OFFSET2:%[0-9]+]]:gpr64 = CNTW_XPiI 31, 1, implicit $vg
178 ; CHECK: [[ADDR2:%[0-9]+]]:gpr64common = ADDXrr [[BASE]], killed [[OFFSET2]]
179 ; CHECK: [[PRED2:%[0-9]+]]:ppr = LDR_PXI killed [[ADDR2]], 0 :: (load (<vscale x 1 x s16>))
180 ; CHECK: [[OFFSET3:%[0-9]+]]:gpr64 = CNTD_XPiI 31, 3, implicit $vg
181 ; CHECK: [[ADDR3:%[0-9]+]]:gpr64common = ADDXrr [[BASE]], killed [[OFFSET3]]
182 ; CHECK: [[PRED3:%[0-9]+]]:ppr = LDR_PXI killed [[ADDR3]], 0 :: (load (<vscale x 1 x s16>))
183 ; CHECK: [[PRED0:%[0-9]+]]:ppr = LDR_PXI [[BASE]], 0 :: (load (<vscale x 1 x s16>))
184 ; CHECK: $p0 = COPY [[PRED0]]
185 ; CHECK: $p1 = COPY [[PRED1]]
186 ; CHECK: $p2 = COPY [[PRED2]]
187 ; CHECK: $p3 = COPY [[PRED3]]
188 ; CHECK: RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3
189 ret [2 x <vscale x 32 x i1>] %arg2
192 ; Test that arg1 is stored to the stack from p0~p3; and the stack location is passed throuch x0 to setup the call:
193 ; str P0, [stack_loc_for_args]
194 ; str P1, [stack_loc_for_args + sizeof(Px)]
195 ; str P2, [stack_loc_for_args + 2*sizeof(Px)]
196 ; str P3, [stack_loc_for_args + 3*sizeof(Px)]
197 ; x0 = stack_loc_for_args
198 define [2 x <vscale x 32 x i1>] @caller_with_svepred_arg_2xv32i1_1xv16i1([2 x <vscale x 32 x i1>] %arg1, [1 x <vscale x 16 x i1>] %arg2) {
199 ; CHECK: name: caller_with_svepred_arg_2xv32i1_1xv16i1
201 ; CHECK: - { id: 0, name: '', type: default, offset: 0, size: 8, alignment: 2,
202 ; CHECK-NEXT: stack-id: scalable-vector,
203 ; CHECK: [[PRED3:%[0-9]+]]:ppr = COPY $p3
204 ; CHECK: [[PRED2:%[0-9]+]]:ppr = COPY $p2
205 ; CHECK: [[PRED1:%[0-9]+]]:ppr = COPY $p1
206 ; CHECK: [[PRED0:%[0-9]+]]:ppr = COPY $p0
207 ; CHECK: [[OFFSET3:%[0-9]+]]:gpr64 = CNTD_XPiI 31, 3, implicit $vg
208 ; CHECK: [[STACK:%[0-9]+]]:gpr64common = ADDXri %stack.0, 0, 0
209 ; CHECK: [[ADDR3:%[0-9]+]]:gpr64common = ADDXrr [[STACK]], killed [[OFFSET3]]
210 ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
211 ; CHECK: STR_PXI [[PRED3]], killed [[ADDR3]], 0 :: (store (<vscale x 1 x s16>))
212 ; CHECK: [[OFFSET2:%[0-9]+]]:gpr64 = CNTW_XPiI 31, 1, implicit $vg
213 ; CHECK: [[ADDR2:%[0-9]+]]:gpr64common = ADDXrr [[STACK]], killed [[OFFSET2]]
214 ; CHECK: STR_PXI [[PRED2]], killed [[ADDR2]], 0 :: (store (<vscale x 1 x s16>))
215 ; CHECK: [[OFFSET1:%[0-9]+]]:gpr64 = CNTD_XPiI 31, 1, implicit $vg
216 ; CHECK: [[ADDR1:%[0-9]+]]:gpr64common = nuw ADDXrr [[STACK]], killed [[OFFSET1]]
217 ; CHECK: STR_PXI [[PRED1]], killed [[ADDR1]], 0 :: (store (<vscale x 1 x s16>))
218 ; CHECK: STR_PXI [[PRED0]], %stack.0, 0 :: (store (<vscale x 1 x s16>) into %stack.0)
219 ; CHECK: $x0 = COPY [[STACK]]
220 ; CHECK: BL @callee_with_svepred_arg_1xv16i1_2xv32i1, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $p0, implicit $x0, implicit-def $sp, implicit-def $p0, implicit-def $p1, implicit-def $p2, implicit-def $p3
221 ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
222 %res = call [2 x <vscale x 32 x i1>] @callee_with_svepred_arg_1xv16i1_2xv32i1([1 x <vscale x 16 x i1>] %arg2, [2 x <vscale x 32 x i1>] %arg1)
223 ret [2 x <vscale x 32 x i1>] %res
226 ; Test that arg1 and arg3 are passed via P0~P3, arg1 is passed indirectly through address on stack in x0
227 define aarch64_sve_vector_pcs [4 x <vscale x 16 x i1>] @callee_with_svepred_arg_2xv16i1_4xv16i1_2xv16i1([2 x <vscale x 16 x i1>] %arg1, [4 x <vscale x 16 x i1>] %arg2, [2 x <vscale x 16 x i1>] %arg3) nounwind {
228 ; CHECK: name: callee_with_svepred_arg_2xv16i1_4xv16i1_2xv16i1
229 ; CHECK: [[P3:%[0-9]+]]:ppr = COPY $p3
230 ; CHECK: [[P2:%[0-9]+]]:ppr = COPY $p2
231 ; CHECK: [[X0:%[0-9]+]]:gpr64common = COPY $x0
232 ; CHECK: [[P1:%[0-9]+]]:ppr = COPY $p1
233 ; CHECK: [[P0:%[0-9]+]]:ppr = COPY $p0
234 ; CHECK: [[OFFSET3:%[0-9]+]]:gpr64 = CNTD_XPiI 31, 3, implicit $vg
235 ; CHECK: [[ADDR3:%[0-9]+]]:gpr64common = ADDXrr [[X0]], killed [[OFFSET3]]
236 ; CHECK: [[P7:%[0-9]+]]:ppr = LDR_PXI killed [[ADDR3]], 0 :: (load (<vscale x 1 x s16>))
237 ; CHECK: [[OFFSET2:%[0-9]+]]:gpr64 = CNTW_XPiI 31, 1, implicit $vg
238 ; CHECK: [[ADDR2:%[0-9]+]]:gpr64common = ADDXrr [[X0]], killed [[OFFSET2]]
239 ; CHECK: [[P6:%[0-9]+]]:ppr = LDR_PXI killed [[ADDR2]], 0 :: (load (<vscale x 1 x s16>))
240 ; CHECK: [[OFFSET1:%[0-9]+]]:gpr64 = CNTD_XPiI 31, 1, implicit $vg
241 ; CHECK: [[ADDR1:%[0-9]+]]:gpr64common = nuw ADDXrr [[X0]], killed [[OFFSET1]]
242 ; CHECK: [[P5:%[0-9]+]]:ppr = LDR_PXI killed [[ADDR1]], 0 :: (load (<vscale x 1 x s16>))
243 ; CHECK: [[P4:%[0-9]+]]:ppr = LDR_PXI [[X0]], 0 :: (load (<vscale x 1 x s16>))
244 ; CHECK: [[RES0:%[0-9]+]]:ppr = AND_PPzPP [[P0]], [[P0]], killed [[P4]]
245 ; CHECK: [[RES1:%[0-9]+]]:ppr = AND_PPzPP [[P1]], [[P1]], killed [[P5]]
246 ; CHECK: [[RES2:%[0-9]+]]:ppr = AND_PPzPP [[P2]], [[P2]], killed [[P6]]
247 ; CHECK: [[RES3:%[0-9]+]]:ppr = AND_PPzPP [[P3]], [[P3]], killed [[P7]]
248 ; CHECK: $p0 = COPY [[RES0]]
249 ; CHECK: $p1 = COPY [[RES1]]
250 ; CHECK: $p2 = COPY [[RES2]]
251 ; CHECK: $p3 = COPY [[RES3]]
252 ; CHECK: RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3
253 %p0 = extractvalue [2 x <vscale x 16 x i1>] %arg1, 0
254 %p1 = extractvalue [2 x <vscale x 16 x i1>] %arg1, 1
255 %p2 = extractvalue [2 x <vscale x 16 x i1>] %arg3, 0
256 %p3 = extractvalue [2 x <vscale x 16 x i1>] %arg3, 1
257 %p4 = extractvalue [4 x <vscale x 16 x i1>] %arg2, 0
258 %p5 = extractvalue [4 x <vscale x 16 x i1>] %arg2, 1
259 %p6 = extractvalue [4 x <vscale x 16 x i1>] %arg2, 2
260 %p7 = extractvalue [4 x <vscale x 16 x i1>] %arg2, 3
261 %r0 = and <vscale x 16 x i1> %p0, %p4
262 %r1 = and <vscale x 16 x i1> %p1, %p5
263 %r2 = and <vscale x 16 x i1> %p2, %p6
264 %r3 = and <vscale x 16 x i1> %p3, %p7
265 %1 = insertvalue [4 x <vscale x 16 x i1>] undef, <vscale x 16 x i1> %r0, 0
266 %2 = insertvalue [4 x <vscale x 16 x i1>] %1, <vscale x 16 x i1> %r1, 1
267 %3 = insertvalue [4 x <vscale x 16 x i1>] %2, <vscale x 16 x i1> %r2, 2
268 %4 = insertvalue [4 x <vscale x 16 x i1>] %3, <vscale x 16 x i1> %r3, 3
269 ret [4 x <vscale x 16 x i1>] %4
272 ; Test that z8 and z9, passed by reference, are loaded from a location that is passed on the stack.
282 define aarch64_sve_vector_pcs <vscale x 4 x i32> @callee_with_many_gpr_sve_arg(i64 %x0, i64 %x1, i64 %x2, i64 %x3, i64 %x4, i64 %x5, i64 %x6, i64 %x7, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5, <vscale x 4 x i32> %z6, <vscale x 4 x i32> %z7, <vscale x 2 x i64> %z8, <vscale x 4 x i32> %z9) {
283 ; CHECK: name: callee_with_many_gpr_sve_arg
285 ; CHECK: - { id: 0, type: default, offset: 8, size: 8, alignment: 8, stack-id: default,
286 ; CHECK-DAG: [[BASE:%[0-9]+]]:gpr64common = LDRXui %fixed-stack.0, 0
287 ; CHECK-DAG: [[PTRUE:%[0-9]+]]:ppr_3b = PTRUE_S 31
288 ; CHECK-DAG: [[RES:%[0-9]+]]:zpr = LD1W_IMM killed [[PTRUE]], killed [[BASE]]
289 ; CHECK-DAG: $z0 = COPY [[RES]]
290 ; CHECK: RET_ReallyLR implicit $z0
291 ret <vscale x 4 x i32> %z9
294 ; Test that z8 and z9 are passed by reference, where reference is passed on the stack.
295 define aarch64_sve_vector_pcs <vscale x 4 x i32> @caller_with_many_gpr_sve_arg(i64 %x, <vscale x 4 x i32> %z, <vscale x 2 x i64> %z2) {
296 ; CHECK: name: caller_with_many_gpr_sve_arg
298 ; CHECK: - { id: 0, name: '', type: default, offset: 0, size: 16, alignment: 16,
299 ; CHECK-NEXT: stack-id: scalable-vector
300 ; CHECK: - { id: 1, name: '', type: default, offset: 0, size: 16, alignment: 16,
301 ; CHECK-NEXT: stack-id: scalable-vector
302 ; CHECK-DAG: [[PTRUE_S:%[0-9]+]]:ppr_3b = PTRUE_S 31
303 ; CHECK-DAG: [[PTRUE_D:%[0-9]+]]:ppr_3b = PTRUE_D 31
304 ; CHECK-DAG: ST1D_IMM %{{[0-9]+}}, killed [[PTRUE_D]], %stack.0, 0
305 ; CHECK-DAG: ST1W_IMM %{{[0-9]+}}, killed [[PTRUE_S]], %stack.1, 0
306 ; CHECK-DAG: [[BASE1:%[0-9]+]]:gpr64common = ADDXri %stack.0, 0
307 ; CHECK-DAG: [[BASE2:%[0-9]+]]:gpr64common = ADDXri %stack.1, 0
308 ; CHECK-DAG: [[SP:%[0-9]+]]:gpr64sp = COPY $sp
309 ; CHECK-DAG: STRXui killed [[BASE1]], [[SP]], 0
310 ; CHECK-DAG: STRXui killed [[BASE2]], [[SP]], 1
311 ; CHECK: BL @callee_with_many_gpr_sve_arg
312 ; CHECK: RET_ReallyLR implicit $z0
313 %ret = call aarch64_sve_vector_pcs <vscale x 4 x i32> @callee_with_many_gpr_sve_arg(i64 %x, i64 %x, i64 %x, i64 %x, i64 %x, i64 %x, i64 %x, i64 %x, <vscale x 4 x i32> %z, <vscale x 4 x i32> %z, <vscale x 4 x i32> %z, <vscale x 4 x i32> %z, <vscale x 4 x i32> %z, <vscale x 4 x i32> %z, <vscale x 4 x i32> %z, <vscale x 4 x i32> %z, <vscale x 2 x i64> %z2, <vscale x 4 x i32> %z)
314 ret <vscale x 4 x i32> %ret