1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s | FileCheck %s
4 target triple = "aarch64-unknown-linux-gnu"
10 define <vscale x 16 x i8> @saba_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
11 ; CHECK-LABEL: saba_b:
13 ; CHECK-NEXT: saba z0.b, z1.b, z2.b
15 %b.sext = sext <vscale x 16 x i8> %b to <vscale x 16 x i16>
16 %c.sext = sext <vscale x 16 x i8> %c to <vscale x 16 x i16>
17 %sub = sub <vscale x 16 x i16> %b.sext, %c.sext
18 %abs = call <vscale x 16 x i16> @llvm.abs.nxv16i16(<vscale x 16 x i16> %sub, i1 true)
19 %trunc = trunc <vscale x 16 x i16> %abs to <vscale x 16 x i8>
20 %add = add <vscale x 16 x i8> %a, %trunc
21 ret <vscale x 16 x i8> %add
24 define <vscale x 16 x i8> @saba_b_promoted_ops(<vscale x 16 x i8> %a, <vscale x 16 x i1> %b, <vscale x 16 x i1> %c) #0 {
25 ; CHECK-LABEL: saba_b_promoted_ops:
27 ; CHECK-NEXT: ptrue p2.b
28 ; CHECK-NEXT: mov z1.b, #1 // =0x1
29 ; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
30 ; CHECK-NEXT: add z0.b, p0/m, z0.b, z1.b
32 %b.sext = sext <vscale x 16 x i1> %b to <vscale x 16 x i8>
33 %c.sext = sext <vscale x 16 x i1> %c to <vscale x 16 x i8>
34 %sub = sub <vscale x 16 x i8> %b.sext, %c.sext
35 %abs = call <vscale x 16 x i8> @llvm.abs.nxv16i8(<vscale x 16 x i8> %sub, i1 true)
36 %add = add <vscale x 16 x i8> %a, %abs
37 ret <vscale x 16 x i8> %add
40 define <vscale x 16 x i8> @saba_b_from_sabd(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
41 ; CHECK-LABEL: saba_b_from_sabd:
43 ; CHECK-NEXT: saba z0.b, z1.b, z2.b
45 %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
46 %2 = call <vscale x 16 x i8> @llvm.aarch64.sve.sabd.u.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
47 %3 = add <vscale x 16 x i8> %2, %a
48 ret <vscale x 16 x i8> %3
51 define <vscale x 16 x i8> @saba_b_from_sabd_u(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
52 ; CHECK-LABEL: saba_b_from_sabd_u:
54 ; CHECK-NEXT: saba z0.b, z1.b, z2.b
56 %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.sabd.u.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
57 %2 = add <vscale x 16 x i8> %1, %a
58 ret <vscale x 16 x i8> %2
61 define <vscale x 8 x i16> @saba_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
62 ; CHECK-LABEL: saba_h:
64 ; CHECK-NEXT: saba z0.h, z1.h, z2.h
66 %b.sext = sext <vscale x 8 x i16> %b to <vscale x 8 x i32>
67 %c.sext = sext <vscale x 8 x i16> %c to <vscale x 8 x i32>
68 %sub = sub <vscale x 8 x i32> %b.sext, %c.sext
69 %abs = call <vscale x 8 x i32> @llvm.abs.nxv8i32(<vscale x 8 x i32> %sub, i1 true)
70 %trunc = trunc <vscale x 8 x i32> %abs to <vscale x 8 x i16>
71 %add = add <vscale x 8 x i16> %a, %trunc
72 ret <vscale x 8 x i16> %add
75 define <vscale x 8 x i16> @saba_h_promoted_ops(<vscale x 8 x i16> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c) #0 {
76 ; CHECK-LABEL: saba_h_promoted_ops:
78 ; CHECK-NEXT: ptrue p0.h
79 ; CHECK-NEXT: sxtb z2.h, p0/m, z2.h
80 ; CHECK-NEXT: sxtb z1.h, p0/m, z1.h
81 ; CHECK-NEXT: saba z0.h, z1.h, z2.h
83 %b.sext = sext <vscale x 8 x i8> %b to <vscale x 8 x i16>
84 %c.sext = sext <vscale x 8 x i8> %c to <vscale x 8 x i16>
85 %sub = sub <vscale x 8 x i16> %b.sext, %c.sext
86 %abs = call <vscale x 8 x i16> @llvm.abs.nxv8i16(<vscale x 8 x i16> %sub, i1 true)
87 %add = add <vscale x 8 x i16> %a, %abs
88 ret <vscale x 8 x i16> %add
91 define <vscale x 8 x i16> @saba_h_from_sabd(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
92 ; CHECK-LABEL: saba_h_from_sabd:
94 ; CHECK-NEXT: saba z0.h, z1.h, z2.h
96 %1 = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
97 %2 = call <vscale x 8 x i16> @llvm.aarch64.sve.sabd.u.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
98 %3 = add <vscale x 8 x i16> %2, %a
99 ret <vscale x 8 x i16> %3
102 define <vscale x 8 x i16> @saba_h_from_sabd_u(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
103 ; CHECK-LABEL: saba_h_from_sabd_u:
105 ; CHECK-NEXT: saba z0.h, z1.h, z2.h
107 %1 = call <vscale x 8 x i16> @llvm.aarch64.sve.sabd.u.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
108 %2 = add <vscale x 8 x i16> %1, %a
109 ret <vscale x 8 x i16> %2
112 define <vscale x 4 x i32> @saba_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
113 ; CHECK-LABEL: saba_s:
115 ; CHECK-NEXT: saba z0.s, z1.s, z2.s
117 %b.sext = sext <vscale x 4 x i32> %b to <vscale x 4 x i64>
118 %c.sext = sext <vscale x 4 x i32> %c to <vscale x 4 x i64>
119 %sub = sub <vscale x 4 x i64> %b.sext, %c.sext
120 %abs = call <vscale x 4 x i64> @llvm.abs.nxv4i64(<vscale x 4 x i64> %sub, i1 true)
121 %trunc = trunc <vscale x 4 x i64> %abs to <vscale x 4 x i32>
122 %add = add <vscale x 4 x i32> %a, %trunc
123 ret <vscale x 4 x i32> %add
126 define <vscale x 4 x i32> @saba_s_promoted_ops(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c) #0 {
127 ; CHECK-LABEL: saba_s_promoted_ops:
129 ; CHECK-NEXT: ptrue p0.s
130 ; CHECK-NEXT: sxth z2.s, p0/m, z2.s
131 ; CHECK-NEXT: sxth z1.s, p0/m, z1.s
132 ; CHECK-NEXT: saba z0.s, z1.s, z2.s
134 %b.sext = sext <vscale x 4 x i16> %b to <vscale x 4 x i32>
135 %c.sext = sext <vscale x 4 x i16> %c to <vscale x 4 x i32>
136 %sub = sub <vscale x 4 x i32> %b.sext, %c.sext
137 %abs = call <vscale x 4 x i32> @llvm.abs.nxv4i32(<vscale x 4 x i32> %sub, i1 true)
138 %add = add <vscale x 4 x i32> %a, %abs
139 ret <vscale x 4 x i32> %add
142 define <vscale x 4 x i32> @saba_s_from_sabd(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
143 ; CHECK-LABEL: saba_s_from_sabd:
145 ; CHECK-NEXT: saba z0.s, z1.s, z2.s
147 %1 = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
148 %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.u.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
149 %3 = add <vscale x 4 x i32> %2, %a
150 ret <vscale x 4 x i32> %3
153 define <vscale x 4 x i32> @saba_s_from_sabd_u(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
154 ; CHECK-LABEL: saba_s_from_sabd_u:
156 ; CHECK-NEXT: saba z0.s, z1.s, z2.s
158 %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
159 %2 = add <vscale x 4 x i32> %1, %a
160 ret <vscale x 4 x i32> %2
163 define <vscale x 2 x i64> @saba_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
164 ; CHECK-LABEL: saba_d:
166 ; CHECK-NEXT: saba z0.d, z1.d, z2.d
168 %b.sext = sext <vscale x 2 x i64> %b to <vscale x 2 x i128>
169 %c.sext = sext <vscale x 2 x i64> %c to <vscale x 2 x i128>
170 %sub = sub <vscale x 2 x i128> %b.sext, %c.sext
171 %abs = call <vscale x 2 x i128> @llvm.abs.nxv2i128(<vscale x 2 x i128> %sub, i1 true)
172 %trunc = trunc <vscale x 2 x i128> %abs to <vscale x 2 x i64>
173 %add = add <vscale x 2 x i64> %a, %trunc
174 ret <vscale x 2 x i64> %add
177 define <vscale x 2 x i64> @saba_d_promoted_ops(<vscale x 2 x i64> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c) #0 {
178 ; CHECK-LABEL: saba_d_promoted_ops:
180 ; CHECK-NEXT: ptrue p0.d
181 ; CHECK-NEXT: sxtw z2.d, p0/m, z2.d
182 ; CHECK-NEXT: sxtw z1.d, p0/m, z1.d
183 ; CHECK-NEXT: saba z0.d, z1.d, z2.d
185 %b.sext = sext <vscale x 2 x i32> %b to <vscale x 2 x i64>
186 %c.sext = sext <vscale x 2 x i32> %c to <vscale x 2 x i64>
187 %sub = sub <vscale x 2 x i64> %b.sext, %c.sext
188 %abs = call <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64> %sub, i1 true)
189 %add = add <vscale x 2 x i64> %a, %abs
190 ret <vscale x 2 x i64> %add
193 define <vscale x 2 x i64> @saba_d_from_sabd(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
194 ; CHECK-LABEL: saba_d_from_sabd:
196 ; CHECK-NEXT: saba z0.d, z1.d, z2.d
198 %1 = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
199 %2 = call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.u.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
200 %3 = add <vscale x 2 x i64> %2, %a
201 ret <vscale x 2 x i64> %3
204 define <vscale x 2 x i64> @saba_d_from_sabd_u(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
205 ; CHECK-LABEL: saba_d_from_sabd_u:
207 ; CHECK-NEXT: saba z0.d, z1.d, z2.d
209 %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.u.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
210 %2 = add <vscale x 2 x i64> %1, %a
211 ret <vscale x 2 x i64> %2
218 define <vscale x 16 x i8> @uaba_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
219 ; CHECK-LABEL: uaba_b:
221 ; CHECK-NEXT: uaba z0.b, z1.b, z2.b
223 %b.zext = zext <vscale x 16 x i8> %b to <vscale x 16 x i16>
224 %c.zext = zext <vscale x 16 x i8> %c to <vscale x 16 x i16>
225 %sub = sub <vscale x 16 x i16> %b.zext, %c.zext
226 %abs = call <vscale x 16 x i16> @llvm.abs.nxv16i16(<vscale x 16 x i16> %sub, i1 true)
227 %trunc = trunc <vscale x 16 x i16> %abs to <vscale x 16 x i8>
228 %add = add <vscale x 16 x i8> %a, %trunc
229 ret <vscale x 16 x i8> %add
232 define <vscale x 16 x i8> @uaba_b_promoted_ops(<vscale x 16 x i8> %a, <vscale x 16 x i1> %b, <vscale x 16 x i1> %c) #0 {
233 ; CHECK-LABEL: uaba_b_promoted_ops:
235 ; CHECK-NEXT: ptrue p2.b
236 ; CHECK-NEXT: mov z1.b, #1 // =0x1
237 ; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
238 ; CHECK-NEXT: add z0.b, p0/m, z0.b, z1.b
240 %b.zext = zext <vscale x 16 x i1> %b to <vscale x 16 x i8>
241 %c.zext = zext <vscale x 16 x i1> %c to <vscale x 16 x i8>
242 %sub = sub <vscale x 16 x i8> %b.zext, %c.zext
243 %abs = call <vscale x 16 x i8> @llvm.abs.nxv16i8(<vscale x 16 x i8> %sub, i1 true)
244 %add = add <vscale x 16 x i8> %a, %abs
245 ret <vscale x 16 x i8> %add
248 define <vscale x 16 x i8> @uaba_b_from_uabd(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
249 ; CHECK-LABEL: uaba_b_from_uabd:
251 ; CHECK-NEXT: uaba z0.b, z1.b, z2.b
253 %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
254 %2 = call <vscale x 16 x i8> @llvm.aarch64.sve.uabd.u.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
255 %3 = add <vscale x 16 x i8> %2, %a
256 ret <vscale x 16 x i8> %3
259 define <vscale x 16 x i8> @uaba_b_from_uabd_u(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
260 ; CHECK-LABEL: uaba_b_from_uabd_u:
262 ; CHECK-NEXT: uaba z0.b, z1.b, z2.b
264 %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.uabd.u.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
265 %2 = add <vscale x 16 x i8> %1, %a
266 ret <vscale x 16 x i8> %2
269 define <vscale x 8 x i16> @uaba_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
270 ; CHECK-LABEL: uaba_h:
272 ; CHECK-NEXT: uaba z0.h, z1.h, z2.h
274 %b.zext = zext <vscale x 8 x i16> %b to <vscale x 8 x i32>
275 %c.zext = zext <vscale x 8 x i16> %c to <vscale x 8 x i32>
276 %sub = sub <vscale x 8 x i32> %b.zext, %c.zext
277 %abs = call <vscale x 8 x i32> @llvm.abs.nxv8i32(<vscale x 8 x i32> %sub, i1 true)
278 %trunc = trunc <vscale x 8 x i32> %abs to <vscale x 8 x i16>
279 %add = add <vscale x 8 x i16> %a, %trunc
280 ret <vscale x 8 x i16> %add
283 define <vscale x 8 x i16> @uaba_h_promoted_ops(<vscale x 8 x i16> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c) #0 {
284 ; CHECK-LABEL: uaba_h_promoted_ops:
286 ; CHECK-NEXT: and z2.h, z2.h, #0xff
287 ; CHECK-NEXT: and z1.h, z1.h, #0xff
288 ; CHECK-NEXT: uaba z0.h, z1.h, z2.h
290 %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i16>
291 %c.zext = zext <vscale x 8 x i8> %c to <vscale x 8 x i16>
292 %sub = sub <vscale x 8 x i16> %b.zext, %c.zext
293 %abs = call <vscale x 8 x i16> @llvm.abs.nxv8i16(<vscale x 8 x i16> %sub, i1 true)
294 %add = add <vscale x 8 x i16> %a, %abs
295 ret <vscale x 8 x i16> %add
298 define <vscale x 8 x i16> @uaba_h_from_uabd(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
299 ; CHECK-LABEL: uaba_h_from_uabd:
301 ; CHECK-NEXT: uaba z0.h, z1.h, z2.h
303 %1 = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
304 %2 = call <vscale x 8 x i16> @llvm.aarch64.sve.uabd.u.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
305 %3 = add <vscale x 8 x i16> %2, %a
306 ret <vscale x 8 x i16> %3
309 define <vscale x 8 x i16> @uaba_h_from_uabd_u(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
310 ; CHECK-LABEL: uaba_h_from_uabd_u:
312 ; CHECK-NEXT: uaba z0.h, z1.h, z2.h
314 %1 = call <vscale x 8 x i16> @llvm.aarch64.sve.uabd.u.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
315 %2 = add <vscale x 8 x i16> %1, %a
316 ret <vscale x 8 x i16> %2
319 define <vscale x 4 x i32> @uaba_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
320 ; CHECK-LABEL: uaba_s:
322 ; CHECK-NEXT: uaba z0.s, z1.s, z2.s
324 %b.zext = zext <vscale x 4 x i32> %b to <vscale x 4 x i64>
325 %c.zext = zext <vscale x 4 x i32> %c to <vscale x 4 x i64>
326 %sub = sub <vscale x 4 x i64> %b.zext, %c.zext
327 %abs = call <vscale x 4 x i64> @llvm.abs.nxv4i64(<vscale x 4 x i64> %sub, i1 true)
328 %trunc = trunc <vscale x 4 x i64> %abs to <vscale x 4 x i32>
329 %add = add <vscale x 4 x i32> %a, %trunc
330 ret <vscale x 4 x i32> %add
333 define <vscale x 4 x i32> @uaba_s_promoted_ops(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c) #0 {
334 ; CHECK-LABEL: uaba_s_promoted_ops:
336 ; CHECK-NEXT: and z2.s, z2.s, #0xffff
337 ; CHECK-NEXT: and z1.s, z1.s, #0xffff
338 ; CHECK-NEXT: uaba z0.s, z1.s, z2.s
340 %b.zext = zext <vscale x 4 x i16> %b to <vscale x 4 x i32>
341 %c.zext = zext <vscale x 4 x i16> %c to <vscale x 4 x i32>
342 %sub = sub <vscale x 4 x i32> %b.zext, %c.zext
343 %abs = call <vscale x 4 x i32> @llvm.abs.nxv4i32(<vscale x 4 x i32> %sub, i1 true)
344 %add = add <vscale x 4 x i32> %a, %abs
345 ret <vscale x 4 x i32> %add
348 define <vscale x 4 x i32> @uaba_s_from_uabd(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
349 ; CHECK-LABEL: uaba_s_from_uabd:
351 ; CHECK-NEXT: uaba z0.s, z1.s, z2.s
353 %1 = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
354 %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.u.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
355 %3 = add <vscale x 4 x i32> %2, %a
356 ret <vscale x 4 x i32> %3
359 define <vscale x 4 x i32> @uaba_s_from_uabd_u(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
360 ; CHECK-LABEL: uaba_s_from_uabd_u:
362 ; CHECK-NEXT: uaba z0.s, z1.s, z2.s
364 %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
365 %2 = add <vscale x 4 x i32> %1, %a
366 ret <vscale x 4 x i32> %2
369 define <vscale x 2 x i64> @uaba_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
370 ; CHECK-LABEL: uaba_d:
372 ; CHECK-NEXT: uaba z0.d, z1.d, z2.d
374 %b.zext = zext <vscale x 2 x i64> %b to <vscale x 2 x i128>
375 %c.zext = zext <vscale x 2 x i64> %c to <vscale x 2 x i128>
376 %sub = sub <vscale x 2 x i128> %b.zext, %c.zext
377 %abs = call <vscale x 2 x i128> @llvm.abs.nxv2i128(<vscale x 2 x i128> %sub, i1 true)
378 %trunc = trunc <vscale x 2 x i128> %abs to <vscale x 2 x i64>
379 %add = add <vscale x 2 x i64> %a, %trunc
380 ret <vscale x 2 x i64> %add
383 define <vscale x 2 x i64> @uaba_d_promoted_ops(<vscale x 2 x i64> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c) #0 {
384 ; CHECK-LABEL: uaba_d_promoted_ops:
386 ; CHECK-NEXT: and z2.d, z2.d, #0xffffffff
387 ; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
388 ; CHECK-NEXT: uaba z0.d, z1.d, z2.d
390 %b.zext = zext <vscale x 2 x i32> %b to <vscale x 2 x i64>
391 %c.zext = zext <vscale x 2 x i32> %c to <vscale x 2 x i64>
392 %sub = sub <vscale x 2 x i64> %b.zext, %c.zext
393 %abs = call <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64> %sub, i1 true)
394 %add = add <vscale x 2 x i64> %a, %abs
395 ret <vscale x 2 x i64> %add
398 define <vscale x 2 x i64> @uaba_d_from_uabd(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
399 ; CHECK-LABEL: uaba_d_from_uabd:
401 ; CHECK-NEXT: uaba z0.d, z1.d, z2.d
403 %1 = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
404 %2 = call <vscale x 2 x i64> @llvm.aarch64.sve.uabd.u.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
405 %3 = add <vscale x 2 x i64> %2, %a
406 ret <vscale x 2 x i64> %3
409 define <vscale x 2 x i64> @uaba_d_from_uabd_u(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
410 ; CHECK-LABEL: uaba_d_from_uabd_u:
412 ; CHECK-NEXT: uaba z0.d, z1.d, z2.d
414 %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.uabd.u.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
415 %2 = add <vscale x 2 x i64> %1, %a
416 ret <vscale x 2 x i64> %2
419 ; A variant of uaba_s but with the add operands switched.
420 define <vscale x 4 x i32> @uaba_s_commutative(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
421 ; CHECK-LABEL: uaba_s_commutative:
423 ; CHECK-NEXT: uaba z0.s, z1.s, z2.s
425 %b.zext = zext <vscale x 4 x i32> %b to <vscale x 4 x i64>
426 %c.zext = zext <vscale x 4 x i32> %c to <vscale x 4 x i64>
427 %sub = sub <vscale x 4 x i64> %b.zext, %c.zext
428 %abs = call <vscale x 4 x i64> @llvm.abs.nxv4i64(<vscale x 4 x i64> %sub, i1 true)
429 %trunc = trunc <vscale x 4 x i64> %abs to <vscale x 4 x i32>
430 %add = add <vscale x 4 x i32> %trunc, %a
431 ret <vscale x 4 x i32> %add
434 declare <vscale x 16 x i8> @llvm.abs.nxv16i8(<vscale x 16 x i8>, i1)
435 declare <vscale x 8 x i16> @llvm.abs.nxv8i16(<vscale x 8 x i16>, i1)
436 declare <vscale x 16 x i16> @llvm.abs.nxv16i16(<vscale x 16 x i16>, i1)
437 declare <vscale x 4 x i32> @llvm.abs.nxv4i32(<vscale x 4 x i32>, i1)
438 declare <vscale x 8 x i32> @llvm.abs.nxv8i32(<vscale x 8 x i32>, i1)
439 declare <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64>, i1)
440 declare <vscale x 4 x i64> @llvm.abs.nxv4i64(<vscale x 4 x i64>, i1)
441 declare <vscale x 2 x i128> @llvm.abs.nxv2i128(<vscale x 2 x i128>, i1)
443 declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
444 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
445 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
446 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
448 declare <vscale x 16 x i8> @llvm.aarch64.sve.sabd.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
449 declare <vscale x 8 x i16> @llvm.aarch64.sve.sabd.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
450 declare <vscale x 4 x i32> @llvm.aarch64.sve.sabd.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
451 declare <vscale x 2 x i64> @llvm.aarch64.sve.sabd.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
453 declare <vscale x 16 x i8> @llvm.aarch64.sve.uabd.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
454 declare <vscale x 8 x i16> @llvm.aarch64.sve.uabd.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
455 declare <vscale x 4 x i32> @llvm.aarch64.sve.uabd.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
456 declare <vscale x 2 x i64> @llvm.aarch64.sve.uabd.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
458 attributes #0 = { "target-features"="+neon,+sve,+sve2" }