1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
4 define arm_aapcs_vfpcc <4 x i32> @sext_v4i1_v4i32(<4 x i32> %src) {
5 ; CHECK-LABEL: sext_v4i1_v4i32:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vmov.i32 q1, #0x0
8 ; CHECK-NEXT: vmov.i8 q2, #0xff
9 ; CHECK-NEXT: vcmp.s32 gt, q0, zr
10 ; CHECK-NEXT: vpsel q0, q2, q1
13 %c = icmp sgt <4 x i32> %src, zeroinitializer
14 %0 = sext <4 x i1> %c to <4 x i32>
18 define arm_aapcs_vfpcc <4 x i32> @sext_v4i1_v4f32(<4 x float> %src1, <4 x float> %src2) {
19 ; CHECK-LABEL: sext_v4i1_v4f32:
20 ; CHECK: @ %bb.0: @ %entry
21 ; CHECK-NEXT: vmov.i32 q2, #0x0
22 ; CHECK-NEXT: vmov.i8 q3, #0xff
23 ; CHECK-NEXT: vcmp.f32 ne, q0, q1
24 ; CHECK-NEXT: vpsel q0, q3, q2
27 %c = fcmp une <4 x float> %src1, %src2
28 %0 = sext <4 x i1> %c to <4 x i32>
32 define arm_aapcs_vfpcc <8 x i16> @sext_v8i1_v8i16(<8 x i16> %src) {
33 ; CHECK-LABEL: sext_v8i1_v8i16:
34 ; CHECK: @ %bb.0: @ %entry
35 ; CHECK-NEXT: vmov.i16 q1, #0x0
36 ; CHECK-NEXT: vmov.i8 q2, #0xff
37 ; CHECK-NEXT: vcmp.s16 gt, q0, zr
38 ; CHECK-NEXT: vpsel q0, q2, q1
41 %c = icmp sgt <8 x i16> %src, zeroinitializer
42 %0 = sext <8 x i1> %c to <8 x i16>
46 define arm_aapcs_vfpcc <8 x i16> @sext_v8i1_v8f32(<8 x half> %src1, <8 x half> %src2) {
47 ; CHECK-LABEL: sext_v8i1_v8f32:
48 ; CHECK: @ %bb.0: @ %entry
49 ; CHECK-NEXT: vmov.i16 q2, #0x0
50 ; CHECK-NEXT: vmov.i8 q3, #0xff
51 ; CHECK-NEXT: vcmp.f16 ne, q0, q1
52 ; CHECK-NEXT: vpsel q0, q3, q2
55 %c = fcmp une <8 x half> %src1, %src2
56 %0 = sext <8 x i1> %c to <8 x i16>
60 define arm_aapcs_vfpcc <16 x i8> @sext_v16i1_v16i8(<16 x i8> %src) {
61 ; CHECK-LABEL: sext_v16i1_v16i8:
62 ; CHECK: @ %bb.0: @ %entry
63 ; CHECK-NEXT: vmov.i8 q1, #0x0
64 ; CHECK-NEXT: vmov.i8 q2, #0xff
65 ; CHECK-NEXT: vcmp.s8 gt, q0, zr
66 ; CHECK-NEXT: vpsel q0, q2, q1
69 %c = icmp sgt <16 x i8> %src, zeroinitializer
70 %0 = sext <16 x i1> %c to <16 x i8>
74 define arm_aapcs_vfpcc <2 x i64> @sext_v2i1_v2i64(<2 x i64> %src) {
75 ; CHECK-LABEL: sext_v2i1_v2i64:
76 ; CHECK: @ %bb.0: @ %entry
77 ; CHECK-NEXT: vmov r0, r1, d1
78 ; CHECK-NEXT: movs r3, #0
79 ; CHECK-NEXT: vmov r2, r12, d0
80 ; CHECK-NEXT: rsbs r0, r0, #0
81 ; CHECK-NEXT: sbcs.w r0, r3, r1
82 ; CHECK-NEXT: mov.w r0, #0
84 ; CHECK-NEXT: movlt r0, #1
85 ; CHECK-NEXT: cmp r0, #0
86 ; CHECK-NEXT: csetm r0, ne
87 ; CHECK-NEXT: rsbs r1, r2, #0
88 ; CHECK-NEXT: sbcs.w r1, r3, r12
90 ; CHECK-NEXT: movlt r3, #1
91 ; CHECK-NEXT: cmp r3, #0
92 ; CHECK-NEXT: csetm r1, ne
93 ; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
94 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
97 %c = icmp sgt <2 x i64> %src, zeroinitializer
98 %0 = sext <2 x i1> %c to <2 x i64>
102 define arm_aapcs_vfpcc <2 x i64> @sext_v2i1_v2f64(<2 x double> %src) {
103 ; CHECK-LABEL: sext_v2i1_v2f64:
104 ; CHECK: @ %bb.0: @ %entry
105 ; CHECK-NEXT: .save {r4, r5, r6, lr}
106 ; CHECK-NEXT: push {r4, r5, r6, lr}
107 ; CHECK-NEXT: .vsave {d8, d9}
108 ; CHECK-NEXT: vpush {d8, d9}
109 ; CHECK-NEXT: vmov q4, q0
110 ; CHECK-NEXT: vldr d0, .LCPI6_0
111 ; CHECK-NEXT: vmov r0, r1, d9
112 ; CHECK-NEXT: vmov r4, r5, d0
113 ; CHECK-NEXT: mov r2, r4
114 ; CHECK-NEXT: mov r3, r5
115 ; CHECK-NEXT: bl __aeabi_dcmpeq
116 ; CHECK-NEXT: vmov r2, r1, d8
117 ; CHECK-NEXT: clz r0, r0
118 ; CHECK-NEXT: mov r3, r5
119 ; CHECK-NEXT: lsrs r0, r0, #5
120 ; CHECK-NEXT: csetm r6, ne
121 ; CHECK-NEXT: mov r0, r2
122 ; CHECK-NEXT: mov r2, r4
123 ; CHECK-NEXT: bl __aeabi_dcmpeq
124 ; CHECK-NEXT: clz r0, r0
125 ; CHECK-NEXT: lsrs r0, r0, #5
126 ; CHECK-NEXT: csetm r0, ne
127 ; CHECK-NEXT: vmov q0[2], q0[0], r0, r6
128 ; CHECK-NEXT: vmov q0[3], q0[1], r0, r6
129 ; CHECK-NEXT: vpop {d8, d9}
130 ; CHECK-NEXT: pop {r4, r5, r6, pc}
131 ; CHECK-NEXT: .p2align 3
132 ; CHECK-NEXT: @ %bb.1:
133 ; CHECK-NEXT: .LCPI6_0:
134 ; CHECK-NEXT: .long 0 @ double 0
135 ; CHECK-NEXT: .long 0
137 %c = fcmp une <2 x double> %src, zeroinitializer
138 %0 = sext <2 x i1> %c to <2 x i64>
143 define arm_aapcs_vfpcc <4 x i32> @zext_v4i1_v4i32(<4 x i32> %src) {
144 ; CHECK-LABEL: zext_v4i1_v4i32:
145 ; CHECK: @ %bb.0: @ %entry
146 ; CHECK-NEXT: vmov.i32 q1, #0x0
147 ; CHECK-NEXT: vmov.i32 q2, #0x1
148 ; CHECK-NEXT: vcmp.s32 gt, q0, zr
149 ; CHECK-NEXT: vpsel q0, q2, q1
152 %c = icmp sgt <4 x i32> %src, zeroinitializer
153 %0 = zext <4 x i1> %c to <4 x i32>
157 define arm_aapcs_vfpcc <4 x i32> @zext_v4i1_v4f32(<4 x float> %src1, <4 x float> %src2) {
158 ; CHECK-LABEL: zext_v4i1_v4f32:
159 ; CHECK: @ %bb.0: @ %entry
160 ; CHECK-NEXT: vmov.i32 q2, #0x0
161 ; CHECK-NEXT: vmov.i32 q3, #0x1
162 ; CHECK-NEXT: vcmp.f32 ne, q0, q1
163 ; CHECK-NEXT: vpsel q0, q3, q2
166 %c = fcmp une <4 x float> %src1, %src2
167 %0 = zext <4 x i1> %c to <4 x i32>
171 define arm_aapcs_vfpcc <8 x i16> @zext_v8i1_v8i16(<8 x i16> %src) {
172 ; CHECK-LABEL: zext_v8i1_v8i16:
173 ; CHECK: @ %bb.0: @ %entry
174 ; CHECK-NEXT: vmov.i16 q1, #0x0
175 ; CHECK-NEXT: vmov.i16 q2, #0x1
176 ; CHECK-NEXT: vcmp.s16 gt, q0, zr
177 ; CHECK-NEXT: vpsel q0, q2, q1
180 %c = icmp sgt <8 x i16> %src, zeroinitializer
181 %0 = zext <8 x i1> %c to <8 x i16>
185 define arm_aapcs_vfpcc <8 x i16> @zext_v8i1_v8f32(<8 x half> %src1, <8 x half> %src2) {
186 ; CHECK-LABEL: zext_v8i1_v8f32:
187 ; CHECK: @ %bb.0: @ %entry
188 ; CHECK-NEXT: vmov.i16 q2, #0x0
189 ; CHECK-NEXT: vmov.i16 q3, #0x1
190 ; CHECK-NEXT: vcmp.f16 ne, q0, q1
191 ; CHECK-NEXT: vpsel q0, q3, q2
194 %c = fcmp une <8 x half> %src1, %src2
195 %0 = zext <8 x i1> %c to <8 x i16>
199 define arm_aapcs_vfpcc <16 x i8> @zext_v16i1_v16i8(<16 x i8> %src) {
200 ; CHECK-LABEL: zext_v16i1_v16i8:
201 ; CHECK: @ %bb.0: @ %entry
202 ; CHECK-NEXT: vmov.i8 q1, #0x0
203 ; CHECK-NEXT: vmov.i8 q2, #0x1
204 ; CHECK-NEXT: vcmp.s8 gt, q0, zr
205 ; CHECK-NEXT: vpsel q0, q2, q1
208 %c = icmp sgt <16 x i8> %src, zeroinitializer
209 %0 = zext <16 x i1> %c to <16 x i8>
213 define arm_aapcs_vfpcc <2 x i64> @zext_v2i1_v2i64(<2 x i64> %src) {
214 ; CHECK-LABEL: zext_v2i1_v2i64:
215 ; CHECK: @ %bb.0: @ %entry
216 ; CHECK-NEXT: .save {r7, lr}
217 ; CHECK-NEXT: push {r7, lr}
218 ; CHECK-NEXT: vmov r0, r1, d1
219 ; CHECK-NEXT: movs r3, #0
220 ; CHECK-NEXT: vmov lr, r12, d0
221 ; CHECK-NEXT: adr r2, .LCPI12_0
222 ; CHECK-NEXT: vldrw.u32 q0, [r2]
223 ; CHECK-NEXT: rsbs r0, r0, #0
224 ; CHECK-NEXT: sbcs.w r0, r3, r1
225 ; CHECK-NEXT: mov.w r0, #0
227 ; CHECK-NEXT: movlt r0, #1
228 ; CHECK-NEXT: cmp r0, #0
229 ; CHECK-NEXT: csetm r0, ne
230 ; CHECK-NEXT: rsbs.w r1, lr, #0
231 ; CHECK-NEXT: sbcs.w r1, r3, r12
233 ; CHECK-NEXT: movlt r3, #1
234 ; CHECK-NEXT: cmp r3, #0
235 ; CHECK-NEXT: csetm r1, ne
236 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
237 ; CHECK-NEXT: vand q0, q1, q0
238 ; CHECK-NEXT: pop {r7, pc}
239 ; CHECK-NEXT: .p2align 4
240 ; CHECK-NEXT: @ %bb.1:
241 ; CHECK-NEXT: .LCPI12_0:
242 ; CHECK-NEXT: .long 1 @ 0x1
243 ; CHECK-NEXT: .long 0 @ 0x0
244 ; CHECK-NEXT: .long 1 @ 0x1
245 ; CHECK-NEXT: .long 0 @ 0x0
247 %c = icmp sgt <2 x i64> %src, zeroinitializer
248 %0 = zext <2 x i1> %c to <2 x i64>
252 define arm_aapcs_vfpcc <2 x i64> @zext_v2i1_v2f64(<2 x double> %src) {
253 ; CHECK-LABEL: zext_v2i1_v2f64:
254 ; CHECK: @ %bb.0: @ %entry
255 ; CHECK-NEXT: .save {r4, r5, r6, lr}
256 ; CHECK-NEXT: push {r4, r5, r6, lr}
257 ; CHECK-NEXT: .vsave {d8, d9}
258 ; CHECK-NEXT: vpush {d8, d9}
259 ; CHECK-NEXT: vmov q4, q0
260 ; CHECK-NEXT: vldr d0, .LCPI13_0
261 ; CHECK-NEXT: vmov r0, r1, d9
262 ; CHECK-NEXT: vmov r4, r5, d0
263 ; CHECK-NEXT: mov r2, r4
264 ; CHECK-NEXT: mov r3, r5
265 ; CHECK-NEXT: bl __aeabi_dcmpeq
266 ; CHECK-NEXT: vmov r2, r1, d8
267 ; CHECK-NEXT: clz r0, r0
268 ; CHECK-NEXT: adr r3, .LCPI13_1
269 ; CHECK-NEXT: lsrs r0, r0, #5
270 ; CHECK-NEXT: vldrw.u32 q4, [r3]
271 ; CHECK-NEXT: mov r3, r5
272 ; CHECK-NEXT: csetm r6, ne
273 ; CHECK-NEXT: mov r0, r2
274 ; CHECK-NEXT: mov r2, r4
275 ; CHECK-NEXT: bl __aeabi_dcmpeq
276 ; CHECK-NEXT: clz r0, r0
277 ; CHECK-NEXT: lsrs r0, r0, #5
278 ; CHECK-NEXT: csetm r0, ne
279 ; CHECK-NEXT: vmov q0[2], q0[0], r0, r6
280 ; CHECK-NEXT: vand q0, q0, q4
281 ; CHECK-NEXT: vpop {d8, d9}
282 ; CHECK-NEXT: pop {r4, r5, r6, pc}
283 ; CHECK-NEXT: .p2align 4
284 ; CHECK-NEXT: @ %bb.1:
285 ; CHECK-NEXT: .LCPI13_1:
286 ; CHECK-NEXT: .long 1 @ 0x1
287 ; CHECK-NEXT: .long 0 @ 0x0
288 ; CHECK-NEXT: .long 1 @ 0x1
289 ; CHECK-NEXT: .long 0 @ 0x0
290 ; CHECK-NEXT: .LCPI13_0:
291 ; CHECK-NEXT: .long 0 @ double 0
292 ; CHECK-NEXT: .long 0
294 %c = fcmp une <2 x double> %src, zeroinitializer
295 %0 = zext <2 x i1> %c to <2 x i64>
300 define arm_aapcs_vfpcc <4 x i32> @trunc_v4i1_v4i32(<4 x i32> %src) {
301 ; CHECK-LABEL: trunc_v4i1_v4i32:
302 ; CHECK: @ %bb.0: @ %entry
303 ; CHECK-NEXT: vmov.i32 q2, #0x1
304 ; CHECK-NEXT: vmov.i32 q1, #0x0
305 ; CHECK-NEXT: vand q2, q0, q2
306 ; CHECK-NEXT: vcmp.i32 ne, q2, zr
307 ; CHECK-NEXT: vpsel q0, q0, q1
310 %0 = trunc <4 x i32> %src to <4 x i1>
311 %1 = select <4 x i1> %0, <4 x i32> %src, <4 x i32> zeroinitializer
315 define arm_aapcs_vfpcc <8 x i16> @trunc_v8i1_v8i16(<8 x i16> %src) {
316 ; CHECK-LABEL: trunc_v8i1_v8i16:
317 ; CHECK: @ %bb.0: @ %entry
318 ; CHECK-NEXT: vmov.i16 q2, #0x1
319 ; CHECK-NEXT: vmov.i32 q1, #0x0
320 ; CHECK-NEXT: vand q2, q0, q2
321 ; CHECK-NEXT: vcmp.i16 ne, q2, zr
322 ; CHECK-NEXT: vpsel q0, q0, q1
325 %0 = trunc <8 x i16> %src to <8 x i1>
326 %1 = select <8 x i1> %0, <8 x i16> %src, <8 x i16> zeroinitializer
330 define arm_aapcs_vfpcc <16 x i8> @trunc_v16i1_v16i8(<16 x i8> %src) {
331 ; CHECK-LABEL: trunc_v16i1_v16i8:
332 ; CHECK: @ %bb.0: @ %entry
333 ; CHECK-NEXT: vmov.i8 q2, #0x1
334 ; CHECK-NEXT: vmov.i32 q1, #0x0
335 ; CHECK-NEXT: vand q2, q0, q2
336 ; CHECK-NEXT: vcmp.i8 ne, q2, zr
337 ; CHECK-NEXT: vpsel q0, q0, q1
340 %0 = trunc <16 x i8> %src to <16 x i1>
341 %1 = select <16 x i1> %0, <16 x i8> %src, <16 x i8> zeroinitializer
345 define arm_aapcs_vfpcc <2 x i64> @trunc_v2i1_v2i64(<2 x i64> %src) {
346 ; CHECK-LABEL: trunc_v2i1_v2i64:
347 ; CHECK: @ %bb.0: @ %entry
348 ; CHECK-NEXT: vmov r0, s2
349 ; CHECK-NEXT: vmov r1, s0
350 ; CHECK-NEXT: and r0, r0, #1
351 ; CHECK-NEXT: and r1, r1, #1
352 ; CHECK-NEXT: rsbs r0, r0, #0
353 ; CHECK-NEXT: rsbs r1, r1, #0
354 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
355 ; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
356 ; CHECK-NEXT: vand q0, q0, q1
359 %0 = trunc <2 x i64> %src to <2 x i1>
360 %1 = select <2 x i1> %0, <2 x i64> %src, <2 x i64> zeroinitializer
365 define arm_aapcs_vfpcc <4 x float> @uitofp_v4i1_v4f32(<4 x i32> %src) {
366 ; CHECK-LABEL: uitofp_v4i1_v4f32:
367 ; CHECK: @ %bb.0: @ %entry
368 ; CHECK-NEXT: vmov.i32 q1, #0x0
369 ; CHECK-NEXT: vmov.f32 q2, #1.000000e+00
370 ; CHECK-NEXT: vcmp.s32 gt, q0, zr
371 ; CHECK-NEXT: vpsel q0, q2, q1
374 %c = icmp sgt <4 x i32> %src, zeroinitializer
375 %0 = uitofp <4 x i1> %c to <4 x float>
379 define arm_aapcs_vfpcc <4 x float> @sitofp_v4i1_v4f32(<4 x i32> %src) {
380 ; CHECK-LABEL: sitofp_v4i1_v4f32:
381 ; CHECK: @ %bb.0: @ %entry
382 ; CHECK-NEXT: vmov.i32 q1, #0x0
383 ; CHECK-NEXT: vmov.f32 q2, #-1.000000e+00
384 ; CHECK-NEXT: vcmp.s32 gt, q0, zr
385 ; CHECK-NEXT: vpsel q0, q2, q1
388 %c = icmp sgt <4 x i32> %src, zeroinitializer
389 %0 = sitofp <4 x i1> %c to <4 x float>
393 define arm_aapcs_vfpcc <4 x float> @fptoui_v4i1_v4f32(<4 x float> %src) {
394 ; CHECK-LABEL: fptoui_v4i1_v4f32:
395 ; CHECK: @ %bb.0: @ %entry
396 ; CHECK-NEXT: vmov.i32 q1, #0x0
397 ; CHECK-NEXT: vmov.f32 q2, #1.000000e+00
398 ; CHECK-NEXT: vcmp.f32 ne, q0, zr
399 ; CHECK-NEXT: vpsel q0, q2, q1
402 %0 = fptoui <4 x float> %src to <4 x i1>
403 %s = select <4 x i1> %0, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, <4 x float> zeroinitializer
407 define arm_aapcs_vfpcc <4 x float> @fptosi_v4i1_v4f32(<4 x float> %src) {
408 ; CHECK-LABEL: fptosi_v4i1_v4f32:
409 ; CHECK: @ %bb.0: @ %entry
410 ; CHECK-NEXT: vmov.i32 q1, #0x0
411 ; CHECK-NEXT: vmov.f32 q2, #1.000000e+00
412 ; CHECK-NEXT: vcmp.f32 ne, q0, zr
413 ; CHECK-NEXT: vpsel q0, q2, q1
416 %0 = fptosi <4 x float> %src to <4 x i1>
417 %s = select <4 x i1> %0, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, <4 x float> zeroinitializer
423 define arm_aapcs_vfpcc <8 x half> @uitofp_v8i1_v8f16(<8 x i16> %src) {
424 ; CHECK-LABEL: uitofp_v8i1_v8f16:
425 ; CHECK: @ %bb.0: @ %entry
426 ; CHECK-NEXT: vmov.i16 q1, #0x0
427 ; CHECK-NEXT: vmov.i16 q2, #0x3c00
428 ; CHECK-NEXT: vcmp.s16 gt, q0, zr
429 ; CHECK-NEXT: vpsel q0, q2, q1
432 %c = icmp sgt <8 x i16> %src, zeroinitializer
433 %0 = uitofp <8 x i1> %c to <8 x half>
437 define arm_aapcs_vfpcc <8 x half> @sitofp_v8i1_v8f16(<8 x i16> %src) {
438 ; CHECK-LABEL: sitofp_v8i1_v8f16:
439 ; CHECK: @ %bb.0: @ %entry
440 ; CHECK-NEXT: vmov.i16 q1, #0x0
441 ; CHECK-NEXT: vmov.i16 q2, #0xbc00
442 ; CHECK-NEXT: vcmp.s16 gt, q0, zr
443 ; CHECK-NEXT: vpsel q0, q2, q1
446 %c = icmp sgt <8 x i16> %src, zeroinitializer
447 %0 = sitofp <8 x i1> %c to <8 x half>
451 define arm_aapcs_vfpcc <8 x half> @fptoui_v8i1_v8f16(<8 x half> %src) {
452 ; CHECK-LABEL: fptoui_v8i1_v8f16:
453 ; CHECK: @ %bb.0: @ %entry
454 ; CHECK-NEXT: vmov.i32 q1, #0x0
455 ; CHECK-NEXT: vmov.i16 q2, #0x3c00
456 ; CHECK-NEXT: vcmp.f16 ne, q0, zr
457 ; CHECK-NEXT: vpsel q0, q2, q1
460 %0 = fptoui <8 x half> %src to <8 x i1>
461 %s = select <8 x i1> %0, <8 x half> <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>, <8 x half> zeroinitializer
465 define arm_aapcs_vfpcc <8 x half> @fptosi_v8i1_v8f16(<8 x half> %src) {
466 ; CHECK-LABEL: fptosi_v8i1_v8f16:
467 ; CHECK: @ %bb.0: @ %entry
468 ; CHECK-NEXT: vmov.i32 q1, #0x0
469 ; CHECK-NEXT: vmov.i16 q2, #0x3c00
470 ; CHECK-NEXT: vcmp.f16 ne, q0, zr
471 ; CHECK-NEXT: vpsel q0, q2, q1
474 %0 = fptosi <8 x half> %src to <8 x i1>
475 %s = select <8 x i1> %0, <8 x half> <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>, <8 x half> zeroinitializer