1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64 -mattr=+sve2p1 < %s | FileCheck %s
6 define <vscale x 16 x i1> @whilege_x2_nxv16i1(i64 %m, i64 %n) nounwind {
7 ; CHECK-LABEL: whilege_x2_nxv16i1:
9 ; CHECK-NEXT: whilege { p0.b, p1.b }, x0, x1
10 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
12 %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 %m, i64 %n)
13 %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
14 ret <vscale x 16 x i1> %res
17 define <vscale x 8 x i1> @whilege_x2_nxv8i1(i64 %m, i64 %n) nounwind {
18 ; CHECK-LABEL: whilege_x2_nxv8i1:
20 ; CHECK-NEXT: whilege { p0.h, p1.h }, x0, x1
21 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
23 %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 %m, i64 %n)
24 %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
25 ret <vscale x 8 x i1> %res
28 define <vscale x 4 x i1> @whilege_x2_nxv4i1(i64 %m, i64 %n) nounwind {
29 ; CHECK-LABEL: whilege_x2_nxv4i1:
31 ; CHECK-NEXT: whilege { p0.s, p1.s }, x0, x1
32 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
34 %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64 %m, i64 %n)
35 %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
36 ret <vscale x 4 x i1> %res
39 define <vscale x 2 x i1> @whilege_x2_nxv2i1(i64 %m, i64 %n) nounwind {
40 ; CHECK-LABEL: whilege_x2_nxv2i1:
42 ; CHECK-NEXT: whilege { p0.d, p1.d }, x0, x1
43 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
45 %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64 %m, i64 %n)
46 %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
47 ret <vscale x 2 x i1> %res
53 define <vscale x 16 x i1> @whilegt_x2_nxv16i1(i64 %m, i64 %n) nounwind {
54 ; CHECK-LABEL: whilegt_x2_nxv16i1:
56 ; CHECK-NEXT: whilegt { p0.b, p1.b }, x0, x1
57 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
59 %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64 %m, i64 %n)
60 %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
61 ret <vscale x 16 x i1> %res
64 define <vscale x 8 x i1> @whilegt_x2_nxv8i1(i64 %m, i64 %n) nounwind {
65 ; CHECK-LABEL: whilegt_x2_nxv8i1:
67 ; CHECK-NEXT: whilegt { p0.h, p1.h }, x0, x1
68 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
70 %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 %m, i64 %n)
71 %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
72 ret <vscale x 8 x i1> %res
75 define <vscale x 4 x i1> @whilegt_x2_nxv4i1(i64 %m, i64 %n) nounwind {
76 ; CHECK-LABEL: whilegt_x2_nxv4i1:
78 ; CHECK-NEXT: whilegt { p0.s, p1.s }, x0, x1
79 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
81 %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 %m, i64 %n)
82 %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
83 ret <vscale x 4 x i1> %res
86 define <vscale x 2 x i1> @whilegt_x2_nxv2i1(i64 %m, i64 %n) nounwind {
87 ; CHECK-LABEL: whilegt_x2_nxv2i1:
89 ; CHECK-NEXT: whilegt { p0.d, p1.d }, x0, x1
90 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
92 %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64 %m, i64 %n)
93 %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
94 ret <vscale x 2 x i1> %res
100 define <vscale x 16 x i1> @whilehi_x2_nxv16i1(i64 %m, i64 %n) nounwind {
101 ; CHECK-LABEL: whilehi_x2_nxv16i1:
103 ; CHECK-NEXT: whilehi { p0.b, p1.b }, x0, x1
104 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
106 %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64 %m, i64 %n)
107 %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
108 ret <vscale x 16 x i1> %res
111 define <vscale x 8 x i1> @whilehi_x2_nxv8i1(i64 %m, i64 %n) nounwind {
112 ; CHECK-LABEL: whilehi_x2_nxv8i1:
114 ; CHECK-NEXT: whilehi { p0.h, p1.h }, x0, x1
115 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
117 %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64 %m, i64 %n)
118 %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
119 ret <vscale x 8 x i1> %res
122 define <vscale x 4 x i1> @whilehi_x2_nxv4i1(i64 %m, i64 %n) nounwind {
123 ; CHECK-LABEL: whilehi_x2_nxv4i1:
125 ; CHECK-NEXT: whilehi { p0.s, p1.s }, x0, x1
126 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
128 %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 %m, i64 %n)
129 %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
130 ret <vscale x 4 x i1> %res
133 define <vscale x 2 x i1> @whilehi_x2_nxv2i1(i64 %m, i64 %n) nounwind {
134 ; CHECK-LABEL: whilehi_x2_nxv2i1:
136 ; CHECK-NEXT: whilehi { p0.d, p1.d }, x0, x1
137 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
139 %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 %m, i64 %n)
140 %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
141 ret <vscale x 2 x i1> %res
147 define <vscale x 16 x i1> @whilehs_x2_nxv16i1(i64 %m, i64 %n) nounwind {
148 ; CHECK-LABEL: whilehs_x2_nxv16i1:
150 ; CHECK-NEXT: whilehs { p0.b, p1.b }, x0, x1
151 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
153 %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64 %m, i64 %n)
154 %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
155 ret <vscale x 16 x i1> %res
158 define <vscale x 8 x i1> @whilehs_x2_nxv8i1(i64 %m, i64 %n) nounwind {
159 ; CHECK-LABEL: whilehs_x2_nxv8i1:
161 ; CHECK-NEXT: whilehs { p0.h, p1.h }, x0, x1
162 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
164 %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 %m, i64 %n)
165 %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
166 ret <vscale x 8 x i1> %res
169 define <vscale x 4 x i1> @whilehs_x2_nxv4i1(i64 %m, i64 %n) nounwind {
170 ; CHECK-LABEL: whilehs_x2_nxv4i1:
172 ; CHECK-NEXT: whilehs { p0.s, p1.s }, x0, x1
173 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
175 %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64 %m, i64 %n)
176 %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
177 ret <vscale x 4 x i1> %res
180 define <vscale x 2 x i1> @whilehs_x2_nxv2i1(i64 %m, i64 %n) nounwind {
181 ; CHECK-LABEL: whilehs_x2_nxv2i1:
183 ; CHECK-NEXT: whilehs { p0.d, p1.d }, x0, x1
184 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
186 %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 %m, i64 %n)
187 %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
188 ret <vscale x 2 x i1> %res
194 define <vscale x 16 x i1> @whilele_x2_nxv16i1(i64 %m, i64 %n) nounwind {
195 ; CHECK-LABEL: whilele_x2_nxv16i1:
197 ; CHECK-NEXT: whilele { p0.b, p1.b }, x0, x1
198 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
200 %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 %m, i64 %n)
201 %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
202 ret <vscale x 16 x i1> %res
205 define <vscale x 8 x i1> @whilele_x2_nxv8i1(i64 %m, i64 %n) nounwind {
206 ; CHECK-LABEL: whilele_x2_nxv8i1:
208 ; CHECK-NEXT: whilele { p0.h, p1.h }, x0, x1
209 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
211 %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64 %m, i64 %n)
212 %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
213 ret <vscale x 8 x i1> %res
216 define <vscale x 4 x i1> @whilele_x2_nxv4i1(i64 %m, i64 %n) nounwind {
217 ; CHECK-LABEL: whilele_x2_nxv4i1:
219 ; CHECK-NEXT: whilele { p0.s, p1.s }, x0, x1
220 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
222 %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 %m, i64 %n)
223 %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
224 ret <vscale x 4 x i1> %res
227 define <vscale x 2 x i1> @whilele_x2_nxv2i1(i64 %m, i64 %n) nounwind {
228 ; CHECK-LABEL: whilele_x2_nxv2i1:
230 ; CHECK-NEXT: whilele { p0.d, p1.d }, x0, x1
231 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
233 %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64 %m, i64 %n)
234 %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
235 ret <vscale x 2 x i1> %res
241 define <vscale x 16 x i1> @whilelo_x2_nxv16i1(i64 %m, i64 %n) nounwind {
242 ; CHECK-LABEL: whilelo_x2_nxv16i1:
244 ; CHECK-NEXT: whilelo { p0.b, p1.b }, x0, x1
245 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
247 %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64 %m, i64 %n)
248 %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
249 ret <vscale x 16 x i1> %res
252 define <vscale x 8 x i1> @whilelo_x2_nxv8i1(i64 %m, i64 %n) nounwind {
253 ; CHECK-LABEL: whilelo_x2_nxv8i1:
255 ; CHECK-NEXT: whilelo { p0.h, p1.h }, x0, x1
256 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
258 %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 %m, i64 %n)
259 %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
260 ret <vscale x 8 x i1> %res
263 define <vscale x 4 x i1> @whilelo_x2_nxv4i1(i64 %m, i64 %n) nounwind {
264 ; CHECK-LABEL: whilelo_x2_nxv4i1:
266 ; CHECK-NEXT: whilelo { p0.s, p1.s }, x0, x1
267 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
269 %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64 %m, i64 %n)
270 %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
271 ret <vscale x 4 x i1> %res
274 define <vscale x 2 x i1> @whilelo_x2_nxv2i1(i64 %m, i64 %n) nounwind {
275 ; CHECK-LABEL: whilelo_x2_nxv2i1:
277 ; CHECK-NEXT: whilelo { p0.d, p1.d }, x0, x1
278 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
280 %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 %m, i64 %n)
281 %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
282 ret <vscale x 2 x i1> %res
288 define <vscale x 16 x i1> @whilels_x2_nxv16i1(i64 %m, i64 %n) nounwind {
289 ; CHECK-LABEL: whilels_x2_nxv16i1:
291 ; CHECK-NEXT: whilels { p0.b, p1.b }, x0, x1
292 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
294 %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64 %m, i64 %n)
295 %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
296 ret <vscale x 16 x i1> %res
299 define <vscale x 8 x i1> @whilels_x2_nxv8i1(i64 %m, i64 %n) nounwind {
300 ; CHECK-LABEL: whilels_x2_nxv8i1:
302 ; CHECK-NEXT: whilels { p0.h, p1.h }, x0, x1
303 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
305 %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 %m, i64 %n)
306 %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
307 ret <vscale x 8 x i1> %res
310 define <vscale x 4 x i1> @whilels_x2_nxv4i1(i64 %m, i64 %n) nounwind {
311 ; CHECK-LABEL: whilels_x2_nxv4i1:
313 ; CHECK-NEXT: whilels { p0.s, p1.s }, x0, x1
314 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
316 %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 %m, i64 %n)
317 %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
318 ret <vscale x 4 x i1> %res
321 define <vscale x 2 x i1> @whilels_x2_nxv2i1(i64 %m, i64 %n) nounwind {
322 ; CHECK-LABEL: whilels_x2_nxv2i1:
324 ; CHECK-NEXT: whilels { p0.d, p1.d }, x0, x1
325 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
327 %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64 %m, i64 %n)
328 %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
329 ret <vscale x 2 x i1> %res
335 define <vscale x 16 x i1> @whilelt_x2_nxv16i1(i64 %m, i64 %n) nounwind {
336 ; CHECK-LABEL: whilelt_x2_nxv16i1:
338 ; CHECK-NEXT: whilelt { p0.b, p1.b }, x0, x1
339 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
341 %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64 %m, i64 %n)
342 %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
343 ret <vscale x 16 x i1> %res
346 define <vscale x 8 x i1> @whilelt_x2_nxv8i1(i64 %m, i64 %n) nounwind {
347 ; CHECK-LABEL: whilelt_x2_nxv8i1:
349 ; CHECK-NEXT: whilelt { p0.h, p1.h }, x0, x1
350 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
352 %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 %m, i64 %n)
353 %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
354 ret <vscale x 8 x i1> %res
357 define <vscale x 4 x i1> @whilelt_x2_nxv4i1(i64 %m, i64 %n) nounwind {
358 ; CHECK-LABEL: whilelt_x2_nxv4i1:
360 ; CHECK-NEXT: whilelt { p0.s, p1.s }, x0, x1
361 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
363 %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 %m, i64 %n)
364 %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
365 ret <vscale x 4 x i1> %res
368 define <vscale x 2 x i1> @whilelt_x2_nxv2i1(i64 %m, i64 %n) nounwind {
369 ; CHECK-LABEL: whilelt_x2_nxv2i1:
371 ; CHECK-NEXT: whilelt { p0.d, p1.d }, x0, x1
372 ; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
374 %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 %m, i64 %n)
375 %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
376 ret <vscale x 2 x i1> %res
380 ; Test that we get good code quality when using while in combination with other intrinsics
382 define <vscale x 32 x i1> @codegen_whilege_b16_x2(i64 noundef %op1, i64 noundef %op2) nounwind {
383 ; CHECK-LABEL: codegen_whilege_b16_x2:
384 ; CHECK: // %bb.0: // %entry
385 ; CHECK-NEXT: whilege { p0.h, p1.h }, x0, x1
388 %0 = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 %op1, i64 %op2)
389 %1 = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } %0, 0
390 %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
391 %3 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> %2, i64 0)
392 %4 = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } %0, 1
393 %5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %4)
394 %6 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> %3, <vscale x 16 x i1> %5, i64 16)
395 ret <vscale x 32 x i1> %6
398 define <vscale x 32 x i1> @codegen_whilegt_b32_x2(i64 noundef %op1, i64 noundef %op2) nounwind {
399 ; CHECK-LABEL: codegen_whilegt_b32_x2:
400 ; CHECK: // %bb.0: // %entry
401 ; CHECK-NEXT: whilegt { p0.s, p1.s }, x0, x1
404 %0 = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 %op1, i64 %op2)
405 %1 = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %0, 0
406 %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
407 %3 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> %2, i64 0)
408 %4 = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %0, 1
409 %5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %4)
410 %6 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> %3, <vscale x 16 x i1> %5, i64 16)
411 ret <vscale x 32 x i1> %6
414 define <vscale x 32 x i1> @codegen_whilehi_b64_x2(i64 noundef %op1, i64 noundef %op2) nounwind {
415 ; CHECK-LABEL: codegen_whilehi_b64_x2:
416 ; CHECK: // %bb.0: // %entry
417 ; CHECK-NEXT: whilehi { p0.d, p1.d }, x0, x1
420 %0 = tail call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 %op1, i64 %op2)
421 %1 = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } %0, 0
422 %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %1)
423 %3 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> %2, i64 0)
424 %4 = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } %0, 1
425 %5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %4)
426 %6 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> %3, <vscale x 16 x i1> %5, i64 16)
427 ret <vscale x 32 x i1> %6
430 define <vscale x 32 x i1> @codegen_whilehs_b16_x2(i64 noundef %op1, i64 noundef %op2) nounwind {
431 ; CHECK-LABEL: codegen_whilehs_b16_x2:
432 ; CHECK: // %bb.0: // %entry
433 ; CHECK-NEXT: whilehs { p0.h, p1.h }, x0, x1
436 %0 = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 %op1, i64 %op2)
437 %1 = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } %0, 0
438 %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
439 %3 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> %2, i64 0)
440 %4 = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } %0, 1
441 %5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %4)
442 %6 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> %3, <vscale x 16 x i1> %5, i64 16)
443 ret <vscale x 32 x i1> %6
446 define <vscale x 32 x i1> @codegen_whilele_b32_x2(i64 noundef %op1, i64 noundef %op2) nounwind {
447 ; CHECK-LABEL: codegen_whilele_b32_x2:
448 ; CHECK: // %bb.0: // %entry
449 ; CHECK-NEXT: whilele { p0.s, p1.s }, x0, x1
452 %0 = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 %op1, i64 %op2)
453 %1 = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %0, 0
454 %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
455 %3 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> %2, i64 0)
456 %4 = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %0, 1
457 %5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %4)
458 %6 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> %3, <vscale x 16 x i1> %5, i64 16)
459 ret <vscale x 32 x i1> %6
462 define <vscale x 32 x i1> @codegen_whilelo_b64_x2(i64 noundef %op1, i64 noundef %op2) nounwind {
463 ; CHECK-LABEL: codegen_whilelo_b64_x2:
464 ; CHECK: // %bb.0: // %entry
465 ; CHECK-NEXT: whilelo { p0.d, p1.d }, x0, x1
468 %0 = tail call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 %op1, i64 %op2)
469 %1 = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } %0, 0
470 %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %1)
471 %3 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> %2, i64 0)
472 %4 = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } %0, 1
473 %5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %4)
474 %6 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> %3, <vscale x 16 x i1> %5, i64 16)
475 ret <vscale x 32 x i1> %6
478 define <vscale x 32 x i1> @codegen_whilels_b16_x2(i64 noundef %op1, i64 noundef %op2) nounwind {
479 ; CHECK-LABEL: codegen_whilels_b16_x2:
480 ; CHECK: // %bb.0: // %entry
481 ; CHECK-NEXT: whilels { p0.h, p1.h }, x0, x1
484 %0 = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 %op1, i64 %op2)
485 %1 = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } %0, 0
486 %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
487 %3 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> %2, i64 0)
488 %4 = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } %0, 1
489 %5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %4)
490 %6 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> %3, <vscale x 16 x i1> %5, i64 16)
491 ret <vscale x 32 x i1> %6
494 define <vscale x 32 x i1> @codegen_whilelt_b32_x2(i64 noundef %op1, i64 noundef %op2) nounwind {
495 ; CHECK-LABEL: codegen_whilelt_b32_x2:
496 ; CHECK: // %bb.0: // %entry
497 ; CHECK-NEXT: whilelt { p0.s, p1.s }, x0, x1
500 %0 = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 %op1, i64 %op2)
501 %1 = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %0, 0
502 %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
503 %3 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> %2, i64 0)
504 %4 = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %0, 1
505 %5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %4)
506 %6 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> %3, <vscale x 16 x i1> %5, i64 16)
507 ret <vscale x 32 x i1> %6
511 ; == Test that we use predicate registers starting at a multiple of 2 ==
513 define <vscale x 16 x i1> @whilege_x2_nxv16i1_reg_off(<vscale x 16 x i1> %p0, i64 %m, i64 %n) nounwind {
514 ; CHECK-LABEL: whilege_x2_nxv16i1_reg_off:
516 ; CHECK-NEXT: whilege { p2.b, p3.b }, x0, x1
517 ; CHECK-NEXT: and p0.b, p2/z, p2.b, p0.b
519 %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 %m, i64 %n)
520 %part1 = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
521 %res = and <vscale x 16 x i1> %part1, %p0
522 ret <vscale x 16 x i1> %res
525 define <vscale x 8 x i1> @whilegt_x2_nxv8i1_reg_off(<vscale x 8 x i1> %p0, i64 %m, i64 %n) nounwind {
526 ; CHECK-LABEL: whilegt_x2_nxv8i1_reg_off:
528 ; CHECK-NEXT: whilegt { p2.h, p3.h }, x0, x1
529 ; CHECK-NEXT: and p0.b, p2/z, p2.b, p0.b
531 %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 %m, i64 %n)
532 %part1 = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
533 %res = and <vscale x 8 x i1> %part1, %p0
534 ret <vscale x 8 x i1> %res
537 define <vscale x 4 x i1> @whilehi_x2_nxv4i1_reg_off(<vscale x 4 x i1> %p0, i64 %m, i64 %n) nounwind {
538 ; CHECK-LABEL: whilehi_x2_nxv4i1_reg_off:
540 ; CHECK-NEXT: whilehi { p2.s, p3.s }, x0, x1
541 ; CHECK-NEXT: and p0.b, p2/z, p2.b, p0.b
543 %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 %m, i64 %n)
544 %part1 = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
545 %res = and <vscale x 4 x i1> %part1, %p0
546 ret <vscale x 4 x i1> %res
549 define <vscale x 2 x i1> @whilehs_x2_nxv2i1_reg_off(<vscale x 2 x i1> %p0, i64 %m, i64 %n) nounwind {
550 ; CHECK-LABEL: whilehs_x2_nxv2i1_reg_off:
552 ; CHECK-NEXT: whilehs { p2.d, p3.d }, x0, x1
553 ; CHECK-NEXT: and p0.b, p2/z, p2.b, p0.b
555 %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 %m, i64 %n)
556 %part1 = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
557 %res = and <vscale x 2 x i1> %part1, %p0
558 ret <vscale x 2 x i1> %res
561 define <vscale x 16 x i1> @whilele_x2_nxv16i1_reg_off(<vscale x 16 x i1> %p0, i64 %m, i64 %n) nounwind {
562 ; CHECK-LABEL: whilele_x2_nxv16i1_reg_off:
564 ; CHECK-NEXT: whilele { p2.b, p3.b }, x0, x1
565 ; CHECK-NEXT: and p0.b, p2/z, p2.b, p0.b
567 %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 %m, i64 %n)
568 %part1 = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
569 %res = and <vscale x 16 x i1> %part1, %p0
570 ret <vscale x 16 x i1> %res
573 define <vscale x 8 x i1> @whilelo_x2_nxv8i1_reg_off(<vscale x 8 x i1> %p0, i64 %m, i64 %n) nounwind {
574 ; CHECK-LABEL: whilelo_x2_nxv8i1_reg_off:
576 ; CHECK-NEXT: whilelo { p2.h, p3.h }, x0, x1
577 ; CHECK-NEXT: and p0.b, p2/z, p2.b, p0.b
579 %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 %m, i64 %n)
580 %part1 = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
581 %res = and <vscale x 8 x i1> %part1, %p0
582 ret <vscale x 8 x i1> %res
585 define <vscale x 4 x i1> @whilels_x2_nxv4i1_reg_off(<vscale x 4 x i1> %p0, i64 %m, i64 %n) nounwind {
586 ; CHECK-LABEL: whilels_x2_nxv4i1_reg_off:
588 ; CHECK-NEXT: whilels { p2.s, p3.s }, x0, x1
589 ; CHECK-NEXT: and p0.b, p2/z, p2.b, p0.b
591 %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 %m, i64 %n)
592 %part1 = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
593 %res = and <vscale x 4 x i1> %part1, %p0
594 ret <vscale x 4 x i1> %res
597 define <vscale x 2 x i1> @whilelt_x2_nxv2i1_reg_off(<vscale x 2 x i1> %p0, i64 %m, i64 %n) nounwind {
598 ; CHECK-LABEL: whilelt_x2_nxv2i1_reg_off:
600 ; CHECK-NEXT: whilelt { p2.d, p3.d }, x0, x1
601 ; CHECK-NEXT: and p0.b, p2/z, p2.b, p0.b
603 %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 %m, i64 %n)
604 %part1 = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
605 %res = and <vscale x 2 x i1> %part1, %p0
606 ret <vscale x 2 x i1> %res
610 declare { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64, i64)
611 declare { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64, i64)
612 declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64, i64)
613 declare { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64, i64)
616 declare { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64, i64)
617 declare { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64, i64)
618 declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64, i64)
619 declare { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64, i64)
622 declare { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64, i64)
623 declare { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64, i64)
624 declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64, i64)
625 declare { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64, i64)
628 declare { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64, i64)
629 declare { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64, i64)
630 declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64, i64)
631 declare { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64, i64)
634 declare { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64, i64)
635 declare { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64, i64)
636 declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64, i64)
637 declare { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64, i64)
640 declare { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64, i64)
641 declare { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64, i64)
642 declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64, i64)
643 declare { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64, i64)
646 declare { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64, i64)
647 declare { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64, i64)
648 declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64, i64)
649 declare { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64, i64)
652 declare { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64, i64)
653 declare { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64, i64)
654 declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64, i64)
655 declare { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64, i64)
657 ; == SVBOOL CONVERSION ==
658 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
659 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
660 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
662 ; == VECTOR INSERTS ==
663 declare <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1>, <vscale x 16 x i1>, i64 immarg)