1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
6 define <vscale x 16 x i8> @sqadd_b_lowimm(<vscale x 16 x i8> %a) {
7 ; CHECK-LABEL: sqadd_b_lowimm:
9 ; CHECK-NEXT: sqadd z0.b, z0.b, #27 // =0x1b
11 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
12 %elt = insertelement <vscale x 16 x i8> undef, i8 27, i32 0
13 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
14 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.nxv16i8(<vscale x 16 x i1> %pg,
15 <vscale x 16 x i8> %a,
16 <vscale x 16 x i8> %splat)
17 ret <vscale x 16 x i8> %out
20 define <vscale x 8 x i16> @sqadd_h_lowimm(<vscale x 8 x i16> %a) {
21 ; CHECK-LABEL: sqadd_h_lowimm:
23 ; CHECK-NEXT: sqadd z0.h, z0.h, #43 // =0x2b
25 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
26 %elt = insertelement <vscale x 8 x i16> undef, i16 43, i32 0
27 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
28 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x 8 x i1> %pg,
29 <vscale x 8 x i16> %a,
30 <vscale x 8 x i16> %splat)
31 ret <vscale x 8 x i16> %out
34 define <vscale x 8 x i16> @sqadd_h_highimm(<vscale x 8 x i16> %a) {
35 ; CHECK-LABEL: sqadd_h_highimm:
37 ; CHECK-NEXT: sqadd z0.h, z0.h, #2048 // =0x800
39 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
40 %elt = insertelement <vscale x 8 x i16> undef, i16 2048, i32 0
41 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
42 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x 8 x i1> %pg,
43 <vscale x 8 x i16> %a,
44 <vscale x 8 x i16> %splat)
45 ret <vscale x 8 x i16> %out
48 define <vscale x 4 x i32> @sqadd_s_lowimm(<vscale x 4 x i32> %a) {
49 ; CHECK-LABEL: sqadd_s_lowimm:
51 ; CHECK-NEXT: sqadd z0.s, z0.s, #1 // =0x1
53 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
54 %elt = insertelement <vscale x 4 x i32> undef, i32 1, i32 0
55 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
56 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1> %pg,
57 <vscale x 4 x i32> %a,
58 <vscale x 4 x i32> %splat)
59 ret <vscale x 4 x i32> %out
62 define <vscale x 4 x i32> @sqadd_s_highimm(<vscale x 4 x i32> %a) {
63 ; CHECK-LABEL: sqadd_s_highimm:
65 ; CHECK-NEXT: sqadd z0.s, z0.s, #8192 // =0x2000
67 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
68 %elt = insertelement <vscale x 4 x i32> undef, i32 8192, i32 0
69 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
70 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1> %pg,
71 <vscale x 4 x i32> %a,
72 <vscale x 4 x i32> %splat)
73 ret <vscale x 4 x i32> %out
76 define <vscale x 2 x i64> @sqadd_d_lowimm(<vscale x 2 x i64> %a) {
77 ; CHECK-LABEL: sqadd_d_lowimm:
79 ; CHECK-NEXT: sqadd z0.d, z0.d, #255 // =0xff
81 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
82 %elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
83 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
84 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x 2 x i1> %pg,
85 <vscale x 2 x i64> %a,
86 <vscale x 2 x i64> %splat)
87 ret <vscale x 2 x i64> %out
90 define <vscale x 2 x i64> @sqadd_d_highimm(<vscale x 2 x i64> %a) {
91 ; CHECK-LABEL: sqadd_d_highimm:
93 ; CHECK-NEXT: sqadd z0.d, z0.d, #65280 // =0xff00
95 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
96 %elt = insertelement <vscale x 2 x i64> undef, i64 65280, i32 0
97 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
98 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x 2 x i1> %pg,
99 <vscale x 2 x i64> %a,
100 <vscale x 2 x i64> %splat)
101 ret <vscale x 2 x i64> %out
106 define <vscale x 16 x i8> @sqsub_b_lowimm(<vscale x 16 x i8> %a) {
107 ; CHECK-LABEL: sqsub_b_lowimm:
109 ; CHECK-NEXT: sqsub z0.b, z0.b, #27 // =0x1b
111 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
112 %elt = insertelement <vscale x 16 x i8> undef, i8 27, i32 0
113 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
114 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.u.nxv16i8(<vscale x 16 x i1> %pg,
115 <vscale x 16 x i8> %a,
116 <vscale x 16 x i8> %splat)
117 ret <vscale x 16 x i8> %out
120 define <vscale x 8 x i16> @sqsub_h_lowimm(<vscale x 8 x i16> %a) {
121 ; CHECK-LABEL: sqsub_h_lowimm:
123 ; CHECK-NEXT: sqsub z0.h, z0.h, #43 // =0x2b
125 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
126 %elt = insertelement <vscale x 8 x i16> undef, i16 43, i32 0
127 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
128 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.u.nxv8i16(<vscale x 8 x i1> %pg,
129 <vscale x 8 x i16> %a,
130 <vscale x 8 x i16> %splat)
131 ret <vscale x 8 x i16> %out
134 define <vscale x 8 x i16> @sqsub_h_highimm(<vscale x 8 x i16> %a) {
135 ; CHECK-LABEL: sqsub_h_highimm:
137 ; CHECK-NEXT: sqsub z0.h, z0.h, #2048 // =0x800
139 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
140 %elt = insertelement <vscale x 8 x i16> undef, i16 2048, i32 0
141 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
142 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.u.nxv8i16(<vscale x 8 x i1> %pg,
143 <vscale x 8 x i16> %a,
144 <vscale x 8 x i16> %splat)
145 ret <vscale x 8 x i16> %out
148 define <vscale x 4 x i32> @sqsub_s_lowimm(<vscale x 4 x i32> %a) {
149 ; CHECK-LABEL: sqsub_s_lowimm:
151 ; CHECK-NEXT: sqsub z0.s, z0.s, #1 // =0x1
153 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
154 %elt = insertelement <vscale x 4 x i32> undef, i32 1, i32 0
155 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
156 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x 4 x i1> %pg,
157 <vscale x 4 x i32> %a,
158 <vscale x 4 x i32> %splat)
159 ret <vscale x 4 x i32> %out
162 define <vscale x 4 x i32> @sqsub_s_highimm(<vscale x 4 x i32> %a) {
163 ; CHECK-LABEL: sqsub_s_highimm:
165 ; CHECK-NEXT: sqsub z0.s, z0.s, #8192 // =0x2000
167 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
168 %elt = insertelement <vscale x 4 x i32> undef, i32 8192, i32 0
169 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
170 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x 4 x i1> %pg,
171 <vscale x 4 x i32> %a,
172 <vscale x 4 x i32> %splat)
173 ret <vscale x 4 x i32> %out
176 define <vscale x 2 x i64> @sqsub_d_lowimm(<vscale x 2 x i64> %a) {
177 ; CHECK-LABEL: sqsub_d_lowimm:
179 ; CHECK-NEXT: sqsub z0.d, z0.d, #255 // =0xff
181 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
182 %elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
183 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
184 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.u.nxv2i64(<vscale x 2 x i1> %pg,
185 <vscale x 2 x i64> %a,
186 <vscale x 2 x i64> %splat)
187 ret <vscale x 2 x i64> %out
190 define <vscale x 2 x i64> @sqsub_d_highimm(<vscale x 2 x i64> %a) {
191 ; CHECK-LABEL: sqsub_d_highimm:
193 ; CHECK-NEXT: sqsub z0.d, z0.d, #65280 // =0xff00
195 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
196 %elt = insertelement <vscale x 2 x i64> undef, i64 65280, i32 0
197 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
198 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.u.nxv2i64(<vscale x 2 x i1> %pg,
199 <vscale x 2 x i64> %a,
200 <vscale x 2 x i64> %splat)
201 ret <vscale x 2 x i64> %out
206 define <vscale x 16 x i8> @uqadd_b_lowimm(<vscale x 16 x i8> %a) {
207 ; CHECK-LABEL: uqadd_b_lowimm:
209 ; CHECK-NEXT: uqadd z0.b, z0.b, #27 // =0x1b
211 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
212 %elt = insertelement <vscale x 16 x i8> undef, i8 27, i32 0
213 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
214 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.nxv16i8(<vscale x 16 x i1> %pg,
215 <vscale x 16 x i8> %a,
216 <vscale x 16 x i8> %splat)
217 ret <vscale x 16 x i8> %out
220 define <vscale x 8 x i16> @uqadd_h_lowimm(<vscale x 8 x i16> %a) {
221 ; CHECK-LABEL: uqadd_h_lowimm:
223 ; CHECK-NEXT: uqadd z0.h, z0.h, #43 // =0x2b
225 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
226 %elt = insertelement <vscale x 8 x i16> undef, i16 43, i32 0
227 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
228 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x 8 x i1> %pg,
229 <vscale x 8 x i16> %a,
230 <vscale x 8 x i16> %splat)
231 ret <vscale x 8 x i16> %out
234 define <vscale x 8 x i16> @uqadd_h_highimm(<vscale x 8 x i16> %a) {
235 ; CHECK-LABEL: uqadd_h_highimm:
237 ; CHECK-NEXT: uqadd z0.h, z0.h, #2048 // =0x800
239 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
240 %elt = insertelement <vscale x 8 x i16> undef, i16 2048, i32 0
241 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
242 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x 8 x i1> %pg,
243 <vscale x 8 x i16> %a,
244 <vscale x 8 x i16> %splat)
245 ret <vscale x 8 x i16> %out
248 define <vscale x 4 x i32> @uqadd_s_lowimm(<vscale x 4 x i32> %a) {
249 ; CHECK-LABEL: uqadd_s_lowimm:
251 ; CHECK-NEXT: uqadd z0.s, z0.s, #1 // =0x1
253 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
254 %elt = insertelement <vscale x 4 x i32> undef, i32 1, i32 0
255 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
256 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1> %pg,
257 <vscale x 4 x i32> %a,
258 <vscale x 4 x i32> %splat)
259 ret <vscale x 4 x i32> %out
262 define <vscale x 4 x i32> @uqadd_s_highimm(<vscale x 4 x i32> %a) {
263 ; CHECK-LABEL: uqadd_s_highimm:
265 ; CHECK-NEXT: uqadd z0.s, z0.s, #8192 // =0x2000
267 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
268 %elt = insertelement <vscale x 4 x i32> undef, i32 8192, i32 0
269 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
270 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1> %pg,
271 <vscale x 4 x i32> %a,
272 <vscale x 4 x i32> %splat)
273 ret <vscale x 4 x i32> %out
276 define <vscale x 2 x i64> @uqadd_d_lowimm(<vscale x 2 x i64> %a) {
277 ; CHECK-LABEL: uqadd_d_lowimm:
279 ; CHECK-NEXT: uqadd z0.d, z0.d, #255 // =0xff
281 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
282 %elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
283 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
284 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x 2 x i1> %pg,
285 <vscale x 2 x i64> %a,
286 <vscale x 2 x i64> %splat)
287 ret <vscale x 2 x i64> %out
290 define <vscale x 2 x i64> @uqadd_d_highimm(<vscale x 2 x i64> %a) {
291 ; CHECK-LABEL: uqadd_d_highimm:
293 ; CHECK-NEXT: uqadd z0.d, z0.d, #65280 // =0xff00
295 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
296 %elt = insertelement <vscale x 2 x i64> undef, i64 65280, i32 0
297 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
298 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x 2 x i1> %pg,
299 <vscale x 2 x i64> %a,
300 <vscale x 2 x i64> %splat)
301 ret <vscale x 2 x i64> %out
306 define <vscale x 16 x i8> @uqsub_b_lowimm(<vscale x 16 x i8> %a) {
307 ; CHECK-LABEL: uqsub_b_lowimm:
309 ; CHECK-NEXT: uqsub z0.b, z0.b, #27 // =0x1b
311 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
312 %elt = insertelement <vscale x 16 x i8> undef, i8 27, i32 0
313 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
314 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.u.nxv16i8(<vscale x 16 x i1> %pg,
315 <vscale x 16 x i8> %a,
316 <vscale x 16 x i8> %splat)
317 ret <vscale x 16 x i8> %out
320 define <vscale x 8 x i16> @uqsub_h_lowimm(<vscale x 8 x i16> %a) {
321 ; CHECK-LABEL: uqsub_h_lowimm:
323 ; CHECK-NEXT: uqsub z0.h, z0.h, #43 // =0x2b
325 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
326 %elt = insertelement <vscale x 8 x i16> undef, i16 43, i32 0
327 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
328 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.u.nxv8i16(<vscale x 8 x i1> %pg,
329 <vscale x 8 x i16> %a,
330 <vscale x 8 x i16> %splat)
331 ret <vscale x 8 x i16> %out
334 define <vscale x 8 x i16> @uqsub_h_highimm(<vscale x 8 x i16> %a) {
335 ; CHECK-LABEL: uqsub_h_highimm:
337 ; CHECK-NEXT: uqsub z0.h, z0.h, #2048 // =0x800
339 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
340 %elt = insertelement <vscale x 8 x i16> undef, i16 2048, i32 0
341 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
342 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.u.nxv8i16(<vscale x 8 x i1> %pg,
343 <vscale x 8 x i16> %a,
344 <vscale x 8 x i16> %splat)
345 ret <vscale x 8 x i16> %out
348 define <vscale x 4 x i32> @uqsub_s_lowimm(<vscale x 4 x i32> %a) {
349 ; CHECK-LABEL: uqsub_s_lowimm:
351 ; CHECK-NEXT: uqsub z0.s, z0.s, #1 // =0x1
353 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
354 %elt = insertelement <vscale x 4 x i32> undef, i32 1, i32 0
355 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
356 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> %pg,
357 <vscale x 4 x i32> %a,
358 <vscale x 4 x i32> %splat)
359 ret <vscale x 4 x i32> %out
362 define <vscale x 4 x i32> @uqsub_s_highimm(<vscale x 4 x i32> %a) {
363 ; CHECK-LABEL: uqsub_s_highimm:
365 ; CHECK-NEXT: uqsub z0.s, z0.s, #8192 // =0x2000
367 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
368 %elt = insertelement <vscale x 4 x i32> undef, i32 8192, i32 0
369 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
370 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> %pg,
371 <vscale x 4 x i32> %a,
372 <vscale x 4 x i32> %splat)
373 ret <vscale x 4 x i32> %out
376 define <vscale x 2 x i64> @uqsub_d_lowimm(<vscale x 2 x i64> %a) {
377 ; CHECK-LABEL: uqsub_d_lowimm:
379 ; CHECK-NEXT: uqsub z0.d, z0.d, #255 // =0xff
381 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
382 %elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
383 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
384 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.u.nxv2i64(<vscale x 2 x i1> %pg,
385 <vscale x 2 x i64> %a,
386 <vscale x 2 x i64> %splat)
387 ret <vscale x 2 x i64> %out
390 define <vscale x 2 x i64> @uqsub_d_highimm(<vscale x 2 x i64> %a) {
391 ; CHECK-LABEL: uqsub_d_highimm:
393 ; CHECK-NEXT: uqsub z0.d, z0.d, #65280 // =0xff00
395 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
396 %elt = insertelement <vscale x 2 x i64> undef, i64 65280, i32 0
397 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
398 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.u.nxv2i64(<vscale x 2 x i1> %pg,
399 <vscale x 2 x i64> %a,
400 <vscale x 2 x i64> %splat)
401 ret <vscale x 2 x i64> %out
404 ; As uqsub_i32 but where pg is i8 based and thus compatible for i32.
405 define <vscale x 4 x i32> @uqsub_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
406 ; CHECK-LABEL: uqsub_i32_ptrue_all_b:
408 ; CHECK-NEXT: uqsub z0.s, z0.s, #1 // =0x1
410 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
411 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
412 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
413 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> %pg.s,
414 <vscale x 4 x i32> %a,
415 <vscale x 4 x i32> %b)
416 ret <vscale x 4 x i32> %out
419 ; As uqsub_i32 but where pg is i16 based and thus compatible for i32.
420 define <vscale x 4 x i32> @uqsub_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
421 ; CHECK-LABEL: uqsub_i32_ptrue_all_h:
423 ; CHECK-NEXT: uqsub z0.s, z0.s, #1 // =0x1
425 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
426 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
427 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
428 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
429 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> %pg.s,
430 <vscale x 4 x i32> %a,
431 <vscale x 4 x i32> %b)
432 ret <vscale x 4 x i32> %out
435 ; As uqsub_i32 but where pg is i64 based, which is not compatibile for i32 and
436 ; thus inactive lanes are important and the immediate form cannot be used.
437 define <vscale x 4 x i32> @uqsub_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
438 ; CHECK-LABEL: uqsub_i32_ptrue_all_d:
440 ; CHECK-NEXT: ptrue p0.d
441 ; CHECK-NEXT: mov z1.s, #1 // =0x1
442 ; CHECK-NEXT: uqsub z0.s, p0/m, z0.s, z1.s
444 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
445 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
446 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
447 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
448 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %pg.s,
449 <vscale x 4 x i32> %a,
450 <vscale x 4 x i32> %b)
451 ret <vscale x 4 x i32> %out
454 declare <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
455 declare <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
456 declare <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
457 declare <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
459 declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
460 declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
461 declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
462 declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
464 declare <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
465 declare <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
466 declare <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
467 declare <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
469 declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
471 declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
472 declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
473 declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
474 declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
476 declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
477 declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
478 declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
480 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 16 x i1>)
481 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
482 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
484 declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32)
486 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 %pattern)
487 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 %pattern)
488 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 %pattern)
489 declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 %pattern)