1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s | FileCheck %s
4 target triple = "aarch64-unknown-linux-gnu"
7 ; SQABS (sve2_int_un_pred_arit)
10 ; Check movprfx is not inserted when dstReg == srcReg
11 define <vscale x 16 x i8> @sqabs_i8_dupreg(<vscale x 16 x i8> %a) #0 {
12 ; CHECK-LABEL: sqabs_i8_dupreg:
14 ; CHECK-NEXT: ptrue p0.b
15 ; CHECK-NEXT: sqabs z0.b, p0/m, z0.b
17 %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
18 %ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a)
19 ret <vscale x 16 x i8> %ret
22 ; Check movprfx is inserted when passthru is undef
23 define <vscale x 16 x i8> @sqabs_i8_undef(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
24 ; CHECK-LABEL: sqabs_i8_undef:
26 ; CHECK-NEXT: ptrue p0.b
27 ; CHECK-NEXT: movprfx z0, z1
28 ; CHECK-NEXT: sqabs z0.b, p0/m, z1.b
30 %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
31 %ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
32 ret <vscale x 16 x i8> %ret
35 ; Check movprfx is inserted when predicate is all active, making the passthru dead
36 define <vscale x 16 x i8> @sqabs_i8_active(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
37 ; CHECK-LABEL: sqabs_i8_active:
39 ; CHECK-NEXT: ptrue p0.b
40 ; CHECK-NEXT: movprfx z0, z1
41 ; CHECK-NEXT: sqabs z0.b, p0/m, z1.b
43 %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
44 %ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
45 ret <vscale x 16 x i8> %ret
48 ; Check movprfx is not inserted when predicate is not all active, making the passthru used
49 define <vscale x 16 x i8> @sqabs_i8_not_active(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
50 ; CHECK-LABEL: sqabs_i8_not_active:
52 ; CHECK-NEXT: ptrue p0.d
53 ; CHECK-NEXT: sqabs z0.b, p0/m, z1.b
55 %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
56 %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
57 %ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg.to, <vscale x 16 x i8> %b)
58 ret <vscale x 16 x i8> %ret
61 define <vscale x 8 x i16> @sqabs_i16_dupreg(<vscale x 8 x i16> %a) #0 {
62 ; CHECK-LABEL: sqabs_i16_dupreg:
64 ; CHECK-NEXT: ptrue p0.h
65 ; CHECK-NEXT: sqabs z0.h, p0/m, z0.h
67 %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
68 %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
69 ret <vscale x 8 x i16> %ret
72 define <vscale x 8 x i16> @sqabs_i16_undef(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
73 ; CHECK-LABEL: sqabs_i16_undef:
75 ; CHECK-NEXT: ptrue p0.h
76 ; CHECK-NEXT: movprfx z0, z1
77 ; CHECK-NEXT: sqabs z0.h, p0/m, z1.h
79 %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
80 %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b)
81 ret <vscale x 8 x i16> %ret
84 define <vscale x 8 x i16> @sqabs_i16_active(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
85 ; CHECK-LABEL: sqabs_i16_active:
87 ; CHECK-NEXT: ptrue p0.h
88 ; CHECK-NEXT: movprfx z0, z1
89 ; CHECK-NEXT: sqabs z0.h, p0/m, z1.h
91 %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
92 %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b)
93 ret <vscale x 8 x i16> %ret
96 define <vscale x 8 x i16> @sqabs_i16_not_active(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
97 ; CHECK-LABEL: sqabs_i16_not_active:
99 ; CHECK-NEXT: ptrue p0.d
100 ; CHECK-NEXT: sqabs z0.h, p0/m, z1.h
102 %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
103 %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
104 %pg.from = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg.to)
105 %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg.from, <vscale x 8 x i16> %b)
106 ret <vscale x 8 x i16> %ret
109 define <vscale x 4 x i32> @sqabs_i32_dupreg(<vscale x 4 x i32> %a) #0 {
110 ; CHECK-LABEL: sqabs_i32_dupreg:
112 ; CHECK-NEXT: ptrue p0.s
113 ; CHECK-NEXT: sqabs z0.s, p0/m, z0.s
115 %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
116 %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a)
117 ret <vscale x 4 x i32> %ret
120 define <vscale x 4 x i32> @sqabs_i32_undef(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
121 ; CHECK-LABEL: sqabs_i32_undef:
123 ; CHECK-NEXT: ptrue p0.s
124 ; CHECK-NEXT: movprfx z0, z1
125 ; CHECK-NEXT: sqabs z0.s, p0/m, z1.s
127 %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
128 %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
129 ret <vscale x 4 x i32> %ret
132 define <vscale x 4 x i32> @sqabs_i32_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
133 ; CHECK-LABEL: sqabs_i32_active:
135 ; CHECK-NEXT: ptrue p0.s
136 ; CHECK-NEXT: movprfx z0, z1
137 ; CHECK-NEXT: sqabs z0.s, p0/m, z1.s
139 %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
140 %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
141 ret <vscale x 4 x i32> %ret
144 define <vscale x 4 x i32> @sqabs_i32_not_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
145 ; CHECK-LABEL: sqabs_i32_not_active:
147 ; CHECK-NEXT: ptrue p0.d
148 ; CHECK-NEXT: sqabs z0.s, p0/m, z1.s
150 %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
151 %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
152 %pg.from = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.to)
153 %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg.from, <vscale x 4 x i32> %b)
154 ret <vscale x 4 x i32> %ret
157 define <vscale x 2 x i64> @sqabs_i64_dupreg(<vscale x 2 x i64> %a) #0 {
158 ; CHECK-LABEL: sqabs_i64_dupreg:
160 ; CHECK-NEXT: ptrue p0.d
161 ; CHECK-NEXT: sqabs z0.d, p0/m, z0.d
163 %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
164 %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a)
165 ret <vscale x 2 x i64> %ret
168 define <vscale x 2 x i64> @sqabs_i64_undef(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
169 ; CHECK-LABEL: sqabs_i64_undef:
171 ; CHECK-NEXT: ptrue p0.d
172 ; CHECK-NEXT: movprfx z0, z1
173 ; CHECK-NEXT: sqabs z0.d, p0/m, z1.d
175 %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
176 %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
177 ret <vscale x 2 x i64> %ret
180 define <vscale x 2 x i64> @sqabs_i64_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
181 ; CHECK-LABEL: sqabs_i64_active:
183 ; CHECK-NEXT: ptrue p0.d
184 ; CHECK-NEXT: movprfx z0, z1
185 ; CHECK-NEXT: sqabs z0.d, p0/m, z1.d
187 %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
188 %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
189 ret <vscale x 2 x i64> %ret
192 define <vscale x 2 x i64> @sqabs_i64_not_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %pg) #0 {
193 ; CHECK-LABEL: sqabs_i64_not_active:
195 ; CHECK: sqabs z0.d, p0/m, z1.d
197 %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
198 ret <vscale x 2 x i64> %ret
202 ; URECPE (sve2_int_un_pred_arit_s)
205 define <vscale x 4 x i32> @urecpe_i32_dupreg(<vscale x 4 x i32> %a) #0 {
206 ; CHECK-LABEL: urecpe_i32_dupreg:
208 ; CHECK-NEXT: ptrue p0.s
209 ; CHECK-NEXT: urecpe z0.s, p0/m, z0.s
211 %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
212 %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a)
213 ret <vscale x 4 x i32> %ret
216 define <vscale x 4 x i32> @urecpe_i32_undef(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
217 ; CHECK-LABEL: urecpe_i32_undef:
219 ; CHECK-NEXT: ptrue p0.s
220 ; CHECK-NEXT: movprfx z0, z1
221 ; CHECK-NEXT: urecpe z0.s, p0/m, z1.s
223 %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
224 %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
225 ret <vscale x 4 x i32> %ret
228 define <vscale x 4 x i32> @urecpe_i32_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
229 ; CHECK-LABEL: urecpe_i32_active:
231 ; CHECK-NEXT: ptrue p0.s
232 ; CHECK-NEXT: movprfx z0, z1
233 ; CHECK-NEXT: urecpe z0.s, p0/m, z1.s
235 %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
236 %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
237 ret <vscale x 4 x i32> %ret
240 define <vscale x 4 x i32> @urecpe_i32_not_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
241 ; CHECK-LABEL: urecpe_i32_not_active:
243 ; CHECK-NEXT: ptrue p0.d
244 ; CHECK-NEXT: urecpe z0.s, p0/m, z1.s
246 %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
247 %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
248 %pg.from = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.to)
249 %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg.from, <vscale x 4 x i32> %b)
250 ret <vscale x 4 x i32> %ret
253 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
254 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
255 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
256 declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
258 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
259 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
260 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
262 declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
263 declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
264 declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
266 declare <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>)
267 declare <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
268 declare <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
269 declare <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
271 declare <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
273 attributes #0 = { nounwind "target-features"="+sve2" }