1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -O3 -mtriple=thumbv8.1m.main-none-none-eabi --verify-machineinstrs -mattr=+mve.fp %s -o - | FileCheck %s
4 declare <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>)
6 define arm_aapcs_vfpcc <4 x i32> @vpt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
7 ; CHECK-LABEL: vpt_block:
8 ; CHECK: @ %bb.0: @ %entry
9 ; CHECK-NEXT: vpt.s32 ge, q0, q2
10 ; CHECK-NEXT: vorrt q0, q1, q2
13 %0 = icmp sge <4 x i32> %a, %c
14 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
18 define arm_aapcs_vfpcc <4 x i32> @vptt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
19 ; CHECK-LABEL: vptt_block:
20 ; CHECK: @ %bb.0: @ %entry
21 ; CHECK-NEXT: vmov q3, q0
22 ; CHECK-NEXT: vptt.s32 ge, q0, q2
23 ; CHECK-NEXT: vorrt q3, q1, q2
24 ; CHECK-NEXT: vorrt q0, q3, q2
27 %0 = icmp sge <4 x i32> %a, %c
28 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
29 %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %1, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
33 define arm_aapcs_vfpcc <4 x i32> @vpttt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
34 ; CHECK-LABEL: vpttt_block:
35 ; CHECK: @ %bb.0: @ %entry
36 ; CHECK-NEXT: vpttt.s32 ge, q0, q2
37 ; CHECK-NEXT: vorrt q0, q1, q2
38 ; CHECK-NEXT: vorrt q0, q1, q2
39 ; CHECK-NEXT: vorrt q0, q1, q2
42 %0 = icmp sge <4 x i32> %a, %c
43 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
44 %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
45 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %2)
49 define arm_aapcs_vfpcc <4 x i32> @vptttt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
50 ; CHECK-LABEL: vptttt_block:
51 ; CHECK: @ %bb.0: @ %entry
52 ; CHECK-NEXT: vptttt.s32 ge, q0, q2
53 ; CHECK-NEXT: vorrt q0, q1, q2
54 ; CHECK-NEXT: vorrt q0, q1, q2
55 ; CHECK-NEXT: vorrt q0, q1, q2
56 ; CHECK-NEXT: vorrt q0, q1, q2
59 %0 = icmp sge <4 x i32> %a, %c
60 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
61 %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
62 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %2)
63 %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
68 define arm_aapcs_vfpcc <4 x i32> @vpte_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
69 ; CHECK-LABEL: vpte_block:
70 ; CHECK: @ %bb.0: @ %entry
71 ; CHECK-NEXT: vpte.s32 ge, q0, q2
72 ; CHECK-NEXT: vorrt q0, q1, q2
73 ; CHECK-NEXT: vmove q0, q2
76 %0 = icmp sge <4 x i32> %a, %c
77 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
78 %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
79 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
83 define arm_aapcs_vfpcc <4 x i32> @vptte_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
84 ; CHECK-LABEL: vptte_block:
85 ; CHECK: @ %bb.0: @ %entry
86 ; CHECK-NEXT: vptte.s32 ge, q0, q2
87 ; CHECK-NEXT: vorrt q0, q1, q2
88 ; CHECK-NEXT: vorrt q0, q1, q2
89 ; CHECK-NEXT: vorre q0, q1, q2
92 %0 = icmp sge <4 x i32> %a, %c
93 %1 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
94 %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
95 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %2)
96 %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %1, <4 x i32> %3)
100 define arm_aapcs_vfpcc <4 x i32> @vptee_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
101 ; CHECK-LABEL: vptee_block:
102 ; CHECK: @ %bb.0: @ %entry
103 ; CHECK-NEXT: vptee.s32 ge, q0, q2
104 ; CHECK-NEXT: vorrt q0, q1, q2
105 ; CHECK-NEXT: vorre q0, q1, q2
106 ; CHECK-NEXT: vorre q0, q1, q2
109 %0 = icmp sge <4 x i32> %a, %c
110 %1 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
111 %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
112 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %1, <4 x i32> %2)
113 %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %1, <4 x i32> %3)
117 define arm_aapcs_vfpcc <4 x i32> @vptet_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
118 ; CHECK-LABEL: vptet_block:
119 ; CHECK: @ %bb.0: @ %entry
120 ; CHECK-NEXT: vptet.s32 ge, q0, q2
121 ; CHECK-NEXT: vorrt q0, q1, q2
122 ; CHECK-NEXT: vmove q0, q2
123 ; CHECK-NEXT: vmovt q0, q2
126 %0 = icmp sge <4 x i32> %a, %c
127 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
128 %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
129 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
130 %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
134 define arm_aapcs_vfpcc <4 x i32> @vpttet_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
135 ; CHECK-LABEL: vpttet_block:
136 ; CHECK: @ %bb.0: @ %entry
137 ; CHECK-NEXT: vpttet.s32 ge, q0, q2
138 ; CHECK-NEXT: vorrt q0, q1, q2
139 ; CHECK-NEXT: vmovt q0, q2
140 ; CHECK-NEXT: vmove q0, q2
141 ; CHECK-NEXT: vmovt q0, q2
144 %0 = icmp sge <4 x i32> %a, %c
145 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
146 %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
147 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
148 %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3)
149 %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %4)
153 define arm_aapcs_vfpcc <4 x i32> @vptett_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
154 ; CHECK-LABEL: vptett_block:
155 ; CHECK: @ %bb.0: @ %entry
156 ; CHECK-NEXT: vptett.s32 ge, q0, q2
157 ; CHECK-NEXT: vorrt q0, q1, q2
158 ; CHECK-NEXT: vmove q0, q2
159 ; CHECK-NEXT: vmovt q0, q2
160 ; CHECK-NEXT: vmovt q0, q2
163 %0 = icmp sge <4 x i32> %a, %c
164 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
165 %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
166 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
167 %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
168 %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %4)
172 define arm_aapcs_vfpcc <4 x i32> @vpteet_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
173 ; CHECK-LABEL: vpteet_block:
174 ; CHECK: @ %bb.0: @ %entry
175 ; CHECK-NEXT: vpteet.s32 ge, q0, q2
176 ; CHECK-NEXT: vorrt q0, q1, q2
177 ; CHECK-NEXT: vmove q0, q2
178 ; CHECK-NEXT: vmove q0, q2
179 ; CHECK-NEXT: vmovt q0, q2
182 %0 = icmp sge <4 x i32> %a, %c
183 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
184 %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
185 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
186 %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3)
187 %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %4)
191 define arm_aapcs_vfpcc <4 x i32> @vpteee_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
192 ; CHECK-LABEL: vpteee_block:
193 ; CHECK: @ %bb.0: @ %entry
194 ; CHECK-NEXT: vpteee.s32 ge, q0, q2
195 ; CHECK-NEXT: vorrt q0, q1, q2
196 ; CHECK-NEXT: vmove q0, q2
197 ; CHECK-NEXT: vmove q0, q2
198 ; CHECK-NEXT: vmove q0, q2
201 %0 = icmp sge <4 x i32> %a, %c
202 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
203 %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
204 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
205 %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3)
206 %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4)
210 define arm_aapcs_vfpcc <4 x i32> @vptete_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
211 ; CHECK-LABEL: vptete_block:
212 ; CHECK: @ %bb.0: @ %entry
213 ; CHECK-NEXT: vptete.s32 ge, q0, q2
214 ; CHECK-NEXT: vorrt q0, q1, q2
215 ; CHECK-NEXT: vmove q0, q2
216 ; CHECK-NEXT: vmovt q0, q2
217 ; CHECK-NEXT: vmove q0, q2
220 %0 = icmp sge <4 x i32> %a, %c
221 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
222 %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
223 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
224 %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
225 %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4)
229 define arm_aapcs_vfpcc <4 x i32> @vpttte_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
230 ; CHECK-LABEL: vpttte_block:
231 ; CHECK: @ %bb.0: @ %entry
232 ; CHECK-NEXT: vpttte.s32 ge, q0, q2
233 ; CHECK-NEXT: vorrt q0, q1, q2
234 ; CHECK-NEXT: vmovt q0, q2
235 ; CHECK-NEXT: vmovt q0, q2
236 ; CHECK-NEXT: vmove q0, q2
239 %0 = icmp sge <4 x i32> %a, %c
240 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
241 %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
242 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
243 %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
244 %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4)
248 define arm_aapcs_vfpcc <4 x i32> @vpttee_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
249 ; CHECK-LABEL: vpttee_block:
250 ; CHECK: @ %bb.0: @ %entry
251 ; CHECK-NEXT: vpttee.s32 ge, q0, q2
252 ; CHECK-NEXT: vorrt q0, q1, q2
253 ; CHECK-NEXT: vmovt q0, q2
254 ; CHECK-NEXT: vmove q0, q2
255 ; CHECK-NEXT: vmove q0, q2
258 %0 = icmp sge <4 x i32> %a, %c
259 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
260 %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
261 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
262 %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3)
263 %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4)