1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
3 ; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
5 declare arm_aapcs_vfpcc <4 x i32> @ext_i32(<4 x i32> %c)
6 declare arm_aapcs_vfpcc <8 x i16> @ext_i16(<8 x i16> %c)
7 declare arm_aapcs_vfpcc <16 x i8> @ext_i8(<16 x i8> %c)
9 define arm_aapcs_vfpcc <4 x i32> @shuffle1_v4i32(<4 x i32> %src, <4 x i32> %a) {
10 ; CHECK-LE-LABEL: shuffle1_v4i32:
11 ; CHECK-LE: @ %bb.0: @ %entry
12 ; CHECK-LE-NEXT: .save {r7, lr}
13 ; CHECK-LE-NEXT: push {r7, lr}
14 ; CHECK-LE-NEXT: .vsave {d8, d9}
15 ; CHECK-LE-NEXT: vpush {d8, d9}
16 ; CHECK-LE-NEXT: .pad #8
17 ; CHECK-LE-NEXT: sub sp, #8
18 ; CHECK-LE-NEXT: vcmp.i32 eq, q0, zr
19 ; CHECK-LE-NEXT: vmov.i32 q0, #0x0
20 ; CHECK-LE-NEXT: vpsel q0, q1, q0
21 ; CHECK-LE-NEXT: vmov q4, q1
22 ; CHECK-LE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
23 ; CHECK-LE-NEXT: bl ext_i32
24 ; CHECK-LE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
25 ; CHECK-LE-NEXT: vpsel q0, q4, q0
26 ; CHECK-LE-NEXT: add sp, #8
27 ; CHECK-LE-NEXT: vpop {d8, d9}
28 ; CHECK-LE-NEXT: pop {r7, pc}
30 ; CHECK-BE-LABEL: shuffle1_v4i32:
31 ; CHECK-BE: @ %bb.0: @ %entry
32 ; CHECK-BE-NEXT: .save {r7, lr}
33 ; CHECK-BE-NEXT: push {r7, lr}
34 ; CHECK-BE-NEXT: .vsave {d8, d9}
35 ; CHECK-BE-NEXT: vpush {d8, d9}
36 ; CHECK-BE-NEXT: .pad #8
37 ; CHECK-BE-NEXT: sub sp, #8
38 ; CHECK-BE-NEXT: vrev64.32 q4, q1
39 ; CHECK-BE-NEXT: vrev64.32 q1, q0
40 ; CHECK-BE-NEXT: vcmp.i32 eq, q1, zr
41 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0
42 ; CHECK-BE-NEXT: vpsel q1, q4, q0
43 ; CHECK-BE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
44 ; CHECK-BE-NEXT: vrev64.32 q0, q1
45 ; CHECK-BE-NEXT: bl ext_i32
46 ; CHECK-BE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
47 ; CHECK-BE-NEXT: vrev64.32 q1, q0
48 ; CHECK-BE-NEXT: vpsel q1, q4, q1
49 ; CHECK-BE-NEXT: vrev64.32 q0, q1
50 ; CHECK-BE-NEXT: add sp, #8
51 ; CHECK-BE-NEXT: vpop {d8, d9}
52 ; CHECK-BE-NEXT: pop {r7, pc}
54 %c = icmp eq <4 x i32> %src, zeroinitializer
55 %s1 = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer
56 %ext = call arm_aapcs_vfpcc <4 x i32> @ext_i32(<4 x i32> %s1)
57 %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ext
61 define arm_aapcs_vfpcc <8 x i16> @shuffle1_v8i16(<8 x i16> %src, <8 x i16> %a) {
62 ; CHECK-LE-LABEL: shuffle1_v8i16:
63 ; CHECK-LE: @ %bb.0: @ %entry
64 ; CHECK-LE-NEXT: .save {r7, lr}
65 ; CHECK-LE-NEXT: push {r7, lr}
66 ; CHECK-LE-NEXT: .vsave {d8, d9}
67 ; CHECK-LE-NEXT: vpush {d8, d9}
68 ; CHECK-LE-NEXT: .pad #8
69 ; CHECK-LE-NEXT: sub sp, #8
70 ; CHECK-LE-NEXT: vcmp.i16 eq, q0, zr
71 ; CHECK-LE-NEXT: vmov.i32 q0, #0x0
72 ; CHECK-LE-NEXT: vpsel q0, q1, q0
73 ; CHECK-LE-NEXT: vmov q4, q1
74 ; CHECK-LE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
75 ; CHECK-LE-NEXT: bl ext_i16
76 ; CHECK-LE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
77 ; CHECK-LE-NEXT: vpsel q0, q4, q0
78 ; CHECK-LE-NEXT: add sp, #8
79 ; CHECK-LE-NEXT: vpop {d8, d9}
80 ; CHECK-LE-NEXT: pop {r7, pc}
82 ; CHECK-BE-LABEL: shuffle1_v8i16:
83 ; CHECK-BE: @ %bb.0: @ %entry
84 ; CHECK-BE-NEXT: .save {r7, lr}
85 ; CHECK-BE-NEXT: push {r7, lr}
86 ; CHECK-BE-NEXT: .vsave {d8, d9}
87 ; CHECK-BE-NEXT: vpush {d8, d9}
88 ; CHECK-BE-NEXT: .pad #8
89 ; CHECK-BE-NEXT: sub sp, #8
90 ; CHECK-BE-NEXT: vrev64.16 q4, q1
91 ; CHECK-BE-NEXT: vmov.i32 q1, #0x0
92 ; CHECK-BE-NEXT: vrev64.16 q2, q0
93 ; CHECK-BE-NEXT: vrev32.16 q1, q1
94 ; CHECK-BE-NEXT: vcmp.i16 eq, q2, zr
95 ; CHECK-BE-NEXT: vpsel q1, q4, q1
96 ; CHECK-BE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
97 ; CHECK-BE-NEXT: vrev64.16 q0, q1
98 ; CHECK-BE-NEXT: bl ext_i16
99 ; CHECK-BE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
100 ; CHECK-BE-NEXT: vrev64.16 q1, q0
101 ; CHECK-BE-NEXT: vpsel q1, q4, q1
102 ; CHECK-BE-NEXT: vrev64.16 q0, q1
103 ; CHECK-BE-NEXT: add sp, #8
104 ; CHECK-BE-NEXT: vpop {d8, d9}
105 ; CHECK-BE-NEXT: pop {r7, pc}
107 %c = icmp eq <8 x i16> %src, zeroinitializer
108 %s1 = select <8 x i1> %c, <8 x i16> %a, <8 x i16> zeroinitializer
109 %ext = call arm_aapcs_vfpcc <8 x i16> @ext_i16(<8 x i16> %s1)
110 %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ext
114 define arm_aapcs_vfpcc <16 x i8> @shuffle1_v16i8(<16 x i8> %src, <16 x i8> %a) {
115 ; CHECK-LE-LABEL: shuffle1_v16i8:
116 ; CHECK-LE: @ %bb.0: @ %entry
117 ; CHECK-LE-NEXT: .save {r7, lr}
118 ; CHECK-LE-NEXT: push {r7, lr}
119 ; CHECK-LE-NEXT: .vsave {d8, d9}
120 ; CHECK-LE-NEXT: vpush {d8, d9}
121 ; CHECK-LE-NEXT: .pad #8
122 ; CHECK-LE-NEXT: sub sp, #8
123 ; CHECK-LE-NEXT: vcmp.i8 eq, q0, zr
124 ; CHECK-LE-NEXT: vmov.i32 q0, #0x0
125 ; CHECK-LE-NEXT: vpsel q0, q1, q0
126 ; CHECK-LE-NEXT: vmov q4, q1
127 ; CHECK-LE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
128 ; CHECK-LE-NEXT: bl ext_i8
129 ; CHECK-LE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
130 ; CHECK-LE-NEXT: vpsel q0, q4, q0
131 ; CHECK-LE-NEXT: add sp, #8
132 ; CHECK-LE-NEXT: vpop {d8, d9}
133 ; CHECK-LE-NEXT: pop {r7, pc}
135 ; CHECK-BE-LABEL: shuffle1_v16i8:
136 ; CHECK-BE: @ %bb.0: @ %entry
137 ; CHECK-BE-NEXT: .save {r7, lr}
138 ; CHECK-BE-NEXT: push {r7, lr}
139 ; CHECK-BE-NEXT: .vsave {d8, d9}
140 ; CHECK-BE-NEXT: vpush {d8, d9}
141 ; CHECK-BE-NEXT: .pad #8
142 ; CHECK-BE-NEXT: sub sp, #8
143 ; CHECK-BE-NEXT: vrev64.8 q4, q1
144 ; CHECK-BE-NEXT: vmov.i32 q1, #0x0
145 ; CHECK-BE-NEXT: vrev64.8 q2, q0
146 ; CHECK-BE-NEXT: vrev32.8 q1, q1
147 ; CHECK-BE-NEXT: vcmp.i8 eq, q2, zr
148 ; CHECK-BE-NEXT: vpsel q1, q4, q1
149 ; CHECK-BE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
150 ; CHECK-BE-NEXT: vrev64.8 q0, q1
151 ; CHECK-BE-NEXT: bl ext_i8
152 ; CHECK-BE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
153 ; CHECK-BE-NEXT: vrev64.8 q1, q0
154 ; CHECK-BE-NEXT: vpsel q1, q4, q1
155 ; CHECK-BE-NEXT: vrev64.8 q0, q1
156 ; CHECK-BE-NEXT: add sp, #8
157 ; CHECK-BE-NEXT: vpop {d8, d9}
158 ; CHECK-BE-NEXT: pop {r7, pc}
160 %c = icmp eq <16 x i8> %src, zeroinitializer
161 %s1 = select <16 x i1> %c, <16 x i8> %a, <16 x i8> zeroinitializer
162 %ext = call arm_aapcs_vfpcc <16 x i8> @ext_i8(<16 x i8> %s1)
163 %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ext