1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
4 define arm_aapcs_vfpcc <4 x i32> @cmp_sel_C1_or_C2_vec(<4 x i32> %x, <4 x i32> %y) {
5 ; CHECK-LABEL: cmp_sel_C1_or_C2_vec:
7 ; CHECK-NEXT: adr r0, .LCPI0_0
8 ; CHECK-NEXT: adr r1, .LCPI0_1
9 ; CHECK-NEXT: vcmp.i32 eq, q0, q1
10 ; CHECK-NEXT: vldrw.u32 q0, [r1]
11 ; CHECK-NEXT: vldrw.u32 q1, [r0]
12 ; CHECK-NEXT: vpsel q0, q1, q0
14 ; CHECK-NEXT: .p2align 4
15 ; CHECK-NEXT: @ %bb.1:
16 ; CHECK-NEXT: .LCPI0_0:
17 ; CHECK-NEXT: .long 3000 @ 0xbb8
18 ; CHECK-NEXT: .long 1 @ 0x1
19 ; CHECK-NEXT: .long 4294967295 @ 0xffffffff
20 ; CHECK-NEXT: .long 0 @ 0x0
21 ; CHECK-NEXT: .LCPI0_1:
22 ; CHECK-NEXT: .long 42 @ 0x2a
23 ; CHECK-NEXT: .long 0 @ 0x0
24 ; CHECK-NEXT: .long 4294967294 @ 0xfffffffe
25 ; CHECK-NEXT: .long 4294967295 @ 0xffffffff
26 %cond = icmp eq <4 x i32> %x, %y
27 %add = select <4 x i1> %cond, <4 x i32> <i32 3000, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
31 define arm_aapcs_vfpcc <4 x i32> @cmp_sel_Cplus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) {
32 ; CHECK-LABEL: cmp_sel_Cplus1_or_C_vec:
34 ; CHECK-NEXT: adr r0, .LCPI1_0
35 ; CHECK-NEXT: adr r1, .LCPI1_1
36 ; CHECK-NEXT: vcmp.i32 eq, q0, q1
37 ; CHECK-NEXT: vldrw.u32 q0, [r1]
38 ; CHECK-NEXT: vldrw.u32 q1, [r0]
39 ; CHECK-NEXT: vpsel q0, q1, q0
41 ; CHECK-NEXT: .p2align 4
42 ; CHECK-NEXT: @ %bb.1:
43 ; CHECK-NEXT: .LCPI1_0:
44 ; CHECK-NEXT: .long 43 @ 0x2b
45 ; CHECK-NEXT: .long 1 @ 0x1
46 ; CHECK-NEXT: .long 4294967295 @ 0xffffffff
47 ; CHECK-NEXT: .long 0 @ 0x0
48 ; CHECK-NEXT: .LCPI1_1:
49 ; CHECK-NEXT: .long 42 @ 0x2a
50 ; CHECK-NEXT: .long 0 @ 0x0
51 ; CHECK-NEXT: .long 4294967294 @ 0xfffffffe
52 ; CHECK-NEXT: .long 4294967295 @ 0xffffffff
53 %cond = icmp eq <4 x i32> %x, %y
54 %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
58 define arm_aapcs_vfpcc <4 x i32> @cmp_sel_Cminus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) {
59 ; CHECK-LABEL: cmp_sel_Cminus1_or_C_vec:
61 ; CHECK-NEXT: adr r0, .LCPI2_0
62 ; CHECK-NEXT: adr r1, .LCPI2_1
63 ; CHECK-NEXT: vcmp.i32 eq, q0, q1
64 ; CHECK-NEXT: vldrw.u32 q0, [r1]
65 ; CHECK-NEXT: vldrw.u32 q1, [r0]
66 ; CHECK-NEXT: vpsel q0, q1, q0
68 ; CHECK-NEXT: .p2align 4
69 ; CHECK-NEXT: @ %bb.1:
70 ; CHECK-NEXT: .LCPI2_0:
71 ; CHECK-NEXT: .long 43 @ 0x2b
72 ; CHECK-NEXT: .long 1 @ 0x1
73 ; CHECK-NEXT: .long 4294967295 @ 0xffffffff
74 ; CHECK-NEXT: .long 0 @ 0x0
75 ; CHECK-NEXT: .LCPI2_1:
76 ; CHECK-NEXT: .long 44 @ 0x2c
77 ; CHECK-NEXT: .long 2 @ 0x2
78 ; CHECK-NEXT: .long 0 @ 0x0
79 ; CHECK-NEXT: .long 1 @ 0x1
80 %cond = icmp eq <4 x i32> %x, %y
81 %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 44, i32 2, i32 0, i32 1>
85 define arm_aapcs_vfpcc <4 x i32> @cmp_sel_minus1_or_0_vec(<4 x i32> %x, <4 x i32> %y) {
86 ; CHECK-LABEL: cmp_sel_minus1_or_0_vec:
88 ; CHECK-NEXT: vmov.i32 q2, #0x0
89 ; CHECK-NEXT: vmov.i8 q3, #0xff
90 ; CHECK-NEXT: vcmp.i32 eq, q0, q1
91 ; CHECK-NEXT: vpsel q0, q3, q2
93 %cond = icmp eq <4 x i32> %x, %y
94 %add = select <4 x i1> %cond, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
98 define arm_aapcs_vfpcc <4 x i32> @cmp_sel_0_or_minus1_vec(<4 x i32> %x, <4 x i32> %y) {
99 ; CHECK-LABEL: cmp_sel_0_or_minus1_vec:
101 ; CHECK-NEXT: vmov.i8 q2, #0xff
102 ; CHECK-NEXT: vmov.i32 q3, #0x0
103 ; CHECK-NEXT: vcmp.i32 eq, q0, q1
104 ; CHECK-NEXT: vpsel q0, q3, q2
106 %cond = icmp eq <4 x i32> %x, %y
107 %add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
111 define arm_aapcs_vfpcc <4 x i32> @cmp_sel_1_or_0_vec(<4 x i32> %x, <4 x i32> %y) {
112 ; CHECK-LABEL: cmp_sel_1_or_0_vec:
114 ; CHECK-NEXT: vmov.i32 q2, #0x0
115 ; CHECK-NEXT: vmov.i32 q3, #0x1
116 ; CHECK-NEXT: vcmp.i32 eq, q0, q1
117 ; CHECK-NEXT: vpsel q0, q3, q2
119 %cond = icmp eq <4 x i32> %x, %y
120 %add = select <4 x i1> %cond, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
124 define arm_aapcs_vfpcc <4 x i32> @cmp_sel_0_or_1_vec(<4 x i32> %x, <4 x i32> %y) {
125 ; CHECK-LABEL: cmp_sel_0_or_1_vec:
127 ; CHECK-NEXT: vmov.i32 q2, #0x1
128 ; CHECK-NEXT: vmov.i32 q3, #0x0
129 ; CHECK-NEXT: vcmp.i32 eq, q0, q1
130 ; CHECK-NEXT: vpsel q0, q3, q2
132 %cond = icmp eq <4 x i32> %x, %y
133 %add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
137 define arm_aapcs_vfpcc <16 x i8> @signbit_mask_v16i8(<16 x i8> %a, <16 x i8> %b) {
138 ; CHECK-LABEL: signbit_mask_v16i8:
140 ; CHECK-NEXT: vshr.s8 q0, q0, #7
141 ; CHECK-NEXT: vand q0, q0, q1
143 %cond = icmp slt <16 x i8> %a, zeroinitializer
144 %r = select <16 x i1> %cond, <16 x i8> %b, <16 x i8> zeroinitializer
148 define arm_aapcs_vfpcc <8 x i16> @signbit_mask_v8i16(<8 x i16> %a, <8 x i16> %b) {
149 ; CHECK-LABEL: signbit_mask_v8i16:
151 ; CHECK-NEXT: vshr.s16 q0, q0, #15
152 ; CHECK-NEXT: vand q0, q0, q1
154 %cond = icmp slt <8 x i16> %a, zeroinitializer
155 %r = select <8 x i1> %cond, <8 x i16> %b, <8 x i16> zeroinitializer
159 define arm_aapcs_vfpcc <4 x i32> @signbit_mask_v4i32(<4 x i32> %a, <4 x i32> %b) {
160 ; CHECK-LABEL: signbit_mask_v4i32:
162 ; CHECK-NEXT: vshr.s32 q0, q0, #31
163 ; CHECK-NEXT: vand q0, q0, q1
165 %cond = icmp slt <4 x i32> %a, zeroinitializer
166 %r = select <4 x i1> %cond, <4 x i32> %b, <4 x i32> zeroinitializer
170 define arm_aapcs_vfpcc <2 x i64> @signbit_mask_v2i64(<2 x i64> %a, <2 x i64> %b) {
171 ; CHECK-LABEL: signbit_mask_v2i64:
173 ; CHECK-NEXT: vmov r1, s1
174 ; CHECK-NEXT: movs r0, #0
175 ; CHECK-NEXT: vmov.i32 q2, #0x0
176 ; CHECK-NEXT: asrs r1, r1, #31
177 ; CHECK-NEXT: bfi r0, r1, #0, #8
178 ; CHECK-NEXT: vmov r1, s3
179 ; CHECK-NEXT: asrs r1, r1, #31
180 ; CHECK-NEXT: bfi r0, r1, #8, #8
181 ; CHECK-NEXT: vmsr p0, r0
182 ; CHECK-NEXT: vpsel q0, q1, q2
184 %cond = icmp slt <2 x i64> %a, zeroinitializer
185 %r = select <2 x i1> %cond, <2 x i64> %b, <2 x i64> zeroinitializer
189 define arm_aapcs_vfpcc <16 x i8> @signbit_setmask_v16i8(<16 x i8> %a, <16 x i8> %b) {
190 ; CHECK-LABEL: signbit_setmask_v16i8:
192 ; CHECK-NEXT: vshr.s8 q0, q0, #7
193 ; CHECK-NEXT: vorr q0, q0, q1
195 %cond = icmp slt <16 x i8> %a, zeroinitializer
196 %r = select <16 x i1> %cond, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %b
200 define arm_aapcs_vfpcc <8 x i16> @signbit_setmask_v8i16(<8 x i16> %a, <8 x i16> %b) {
201 ; CHECK-LABEL: signbit_setmask_v8i16:
203 ; CHECK-NEXT: vshr.s16 q0, q0, #15
204 ; CHECK-NEXT: vorr q0, q0, q1
206 %cond = icmp slt <8 x i16> %a, zeroinitializer
207 %r = select <8 x i1> %cond, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %b
211 define arm_aapcs_vfpcc <4 x i32> @signbit_setmask_v4i32(<4 x i32> %a, <4 x i32> %b) {
212 ; CHECK-LABEL: signbit_setmask_v4i32:
214 ; CHECK-NEXT: vshr.s32 q0, q0, #31
215 ; CHECK-NEXT: vorr q0, q0, q1
217 %cond = icmp slt <4 x i32> %a, zeroinitializer
218 %r = select <4 x i1> %cond, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %b
222 define arm_aapcs_vfpcc <2 x i64> @signbit_setmask_v2i64(<2 x i64> %a, <2 x i64> %b) {
223 ; CHECK-LABEL: signbit_setmask_v2i64:
225 ; CHECK-NEXT: vmov r1, s1
226 ; CHECK-NEXT: movs r0, #0
227 ; CHECK-NEXT: vmov.i8 q2, #0xff
228 ; CHECK-NEXT: asrs r1, r1, #31
229 ; CHECK-NEXT: bfi r0, r1, #0, #8
230 ; CHECK-NEXT: vmov r1, s3
231 ; CHECK-NEXT: asrs r1, r1, #31
232 ; CHECK-NEXT: bfi r0, r1, #8, #8
233 ; CHECK-NEXT: vmsr p0, r0
234 ; CHECK-NEXT: vpsel q0, q2, q1
236 %cond = icmp slt <2 x i64> %a, zeroinitializer
237 %r = select <2 x i1> %cond, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %b
240 define arm_aapcs_vfpcc <16 x i8> @not_signbit_mask_v16i8(<16 x i8> %a, <16 x i8> %b) {
241 ; CHECK-LABEL: not_signbit_mask_v16i8:
243 ; CHECK-NEXT: vmov.i8 q2, #0xff
244 ; CHECK-NEXT: vmov.i32 q3, #0x0
245 ; CHECK-NEXT: vcmp.s8 gt, q0, q2
246 ; CHECK-NEXT: vpsel q0, q1, q3
248 %cond = icmp sgt <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
249 %r = select <16 x i1> %cond, <16 x i8> %b, <16 x i8> zeroinitializer
253 define arm_aapcs_vfpcc <8 x i16> @not_signbit_mask_v8i16(<8 x i16> %a, <8 x i16> %b) {
254 ; CHECK-LABEL: not_signbit_mask_v8i16:
256 ; CHECK-NEXT: vmov.i8 q2, #0xff
257 ; CHECK-NEXT: vmov.i32 q3, #0x0
258 ; CHECK-NEXT: vcmp.s16 gt, q0, q2
259 ; CHECK-NEXT: vpsel q0, q1, q3
261 %cond = icmp sgt <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
262 %r = select <8 x i1> %cond, <8 x i16> %b, <8 x i16> zeroinitializer
266 define arm_aapcs_vfpcc <4 x i32> @not_signbit_mask_v4i32(<4 x i32> %a, <4 x i32> %b) {
267 ; CHECK-LABEL: not_signbit_mask_v4i32:
269 ; CHECK-NEXT: vmov.i8 q2, #0xff
270 ; CHECK-NEXT: vmov.i32 q3, #0x0
271 ; CHECK-NEXT: vcmp.s32 gt, q0, q2
272 ; CHECK-NEXT: vpsel q0, q1, q3
274 %cond = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
275 %r = select <4 x i1> %cond, <4 x i32> %b, <4 x i32> zeroinitializer
279 define arm_aapcs_vfpcc <2 x i64> @not_signbit_mask_v2i64(<2 x i64> %a, <2 x i64> %b) {
280 ; CHECK-LABEL: not_signbit_mask_v2i64:
282 ; CHECK-NEXT: vmov r1, s1
283 ; CHECK-NEXT: movs r0, #0
284 ; CHECK-NEXT: vmov.i32 q2, #0x0
285 ; CHECK-NEXT: cmp.w r1, #-1
286 ; CHECK-NEXT: csetm r1, gt
287 ; CHECK-NEXT: bfi r0, r1, #0, #8
288 ; CHECK-NEXT: vmov r1, s3
289 ; CHECK-NEXT: cmp.w r1, #-1
290 ; CHECK-NEXT: csetm r1, gt
291 ; CHECK-NEXT: bfi r0, r1, #8, #8
292 ; CHECK-NEXT: vmsr p0, r0
293 ; CHECK-NEXT: vpsel q0, q1, q2
295 %cond = icmp sgt <2 x i64> %a, <i64 -1, i64 -1>
296 %r = select <2 x i1> %cond, <2 x i64> %b, <2 x i64> zeroinitializer