1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
8 define <vscale x 16 x i8> @and_i8(<vscale x 16 x i8> %a) {
11 ; CHECK-NEXT: and z0.b, z0.b, #0x7
13 %pg = shufflevector <vscale x 16 x i1> insertelement (<vscale x 16 x i1> undef, i1 true, i32 0), <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
14 %b = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 7, i32 0), <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
15 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.and.u.nxv16i8(<vscale x 16 x i1> %pg,
16 <vscale x 16 x i8> %a,
17 <vscale x 16 x i8> %b)
18 ret <vscale x 16 x i8> %out
21 define <vscale x 8 x i16> @and_i16(<vscale x 8 x i16> %a) {
22 ; CHECK-LABEL: and_i16:
24 ; CHECK-NEXT: and z0.h, z0.h, #0xf0
26 %pg = shufflevector <vscale x 8 x i1> insertelement (<vscale x 8 x i1> undef, i1 true, i32 0), <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
27 %b = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 240, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
28 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1> %pg,
29 <vscale x 8 x i16> %a,
30 <vscale x 8 x i16> %b)
31 ret <vscale x 8 x i16> %out
34 define <vscale x 4 x i32> @and_i32(<vscale x 4 x i32> %a) {
35 ; CHECK-LABEL: and_i32:
37 ; CHECK-NEXT: and z0.s, z0.s, #0xffff00
39 %pg = shufflevector <vscale x 4 x i1> insertelement (<vscale x 4 x i1> undef, i1 true, i32 0), <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
40 %b = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 16776960, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
41 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> %pg,
42 <vscale x 4 x i32> %a,
43 <vscale x 4 x i32> %b)
44 ret <vscale x 4 x i32> %out
47 define <vscale x 2 x i64> @and_i64(<vscale x 2 x i64> %a) {
48 ; CHECK-LABEL: and_i64:
50 ; CHECK-NEXT: and z0.d, z0.d, #0xfffc000000000000
52 %pg = shufflevector <vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
53 %b = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 18445618173802708992, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
54 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> %pg,
55 <vscale x 2 x i64> %a,
56 <vscale x 2 x i64> %b)
57 ret <vscale x 2 x i64> %out
64 define <vscale x 16 x i8> @bic_i8(<vscale x 16 x i8> %a) {
65 ; CHECK-LABEL: bic_i8:
67 ; CHECK-NEXT: and z0.b, z0.b, #0x1
69 %pg = shufflevector <vscale x 16 x i1> insertelement (<vscale x 16 x i1> undef, i1 true, i32 0), <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
70 %b = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 254, i32 0), <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
71 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.bic.u.nxv16i8(<vscale x 16 x i1> %pg,
72 <vscale x 16 x i8> %a,
73 <vscale x 16 x i8> %b)
74 ret <vscale x 16 x i8> %out
77 define <vscale x 8 x i16> @bic_i16(<vscale x 8 x i16> %a) {
78 ; CHECK-LABEL: bic_i16:
80 ; CHECK-NEXT: and z0.h, z0.h, #0x1
82 %pg = shufflevector <vscale x 8 x i1> insertelement (<vscale x 8 x i1> undef, i1 true, i32 0), <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
83 %b = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 65534, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
84 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.bic.u.nxv8i16(<vscale x 8 x i1> %pg,
85 <vscale x 8 x i16> %a,
86 <vscale x 8 x i16> %b)
87 ret <vscale x 8 x i16> %out
90 define <vscale x 4 x i32> @bic_i32(<vscale x 4 x i32> %a) {
91 ; CHECK-LABEL: bic_i32:
93 ; CHECK-NEXT: and z0.s, z0.s, #0xff0000ff
95 %pg = shufflevector <vscale x 4 x i1> insertelement (<vscale x 4 x i1> undef, i1 true, i32 0), <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
96 %b = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 16776960, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
97 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.bic.u.nxv4i32(<vscale x 4 x i1> %pg,
98 <vscale x 4 x i32> %a,
99 <vscale x 4 x i32> %b)
100 ret <vscale x 4 x i32> %out
103 define <vscale x 2 x i64> @bic_i64(<vscale x 2 x i64> %a) {
104 ; CHECK-LABEL: bic_i64:
106 ; CHECK-NEXT: and z0.d, z0.d, #0x3ffffffffffff
108 %pg = shufflevector <vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
109 %b = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 18445618173802708992, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
110 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.bic.u.nxv2i64(<vscale x 2 x i1> %pg,
111 <vscale x 2 x i64> %a,
112 <vscale x 2 x i64> %b)
113 ret <vscale x 2 x i64> %out
120 define <vscale x 16 x i8> @eor_i8(<vscale x 16 x i8> %a) {
121 ; CHECK-LABEL: eor_i8:
123 ; CHECK-NEXT: eor z0.b, z0.b, #0xf
125 %pg = shufflevector <vscale x 16 x i1> insertelement (<vscale x 16 x i1> undef, i1 true, i32 0), <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
126 %b = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 15, i32 0), <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
127 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.eor.u.nxv16i8(<vscale x 16 x i1> %pg,
128 <vscale x 16 x i8> %a,
129 <vscale x 16 x i8> %b)
130 ret <vscale x 16 x i8> %out
133 define <vscale x 8 x i16> @eor_i16(<vscale x 8 x i16> %a) {
134 ; CHECK-LABEL: eor_i16:
136 ; CHECK-NEXT: eor z0.h, z0.h, #0xfc07
138 %pg = shufflevector <vscale x 8 x i1> insertelement (<vscale x 8 x i1> undef, i1 true, i32 0), <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
139 %b = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 64519, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
140 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.eor.u.nxv8i16(<vscale x 8 x i1> %pg,
141 <vscale x 8 x i16> %a,
142 <vscale x 8 x i16> %b)
143 ret <vscale x 8 x i16> %out
146 define <vscale x 4 x i32> @eor_i32(<vscale x 4 x i32> %a) {
147 ; CHECK-LABEL: eor_i32:
149 ; CHECK-NEXT: eor z0.s, z0.s, #0xffff00
151 %pg = shufflevector <vscale x 4 x i1> insertelement (<vscale x 4 x i1> undef, i1 true, i32 0), <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
152 %b = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 16776960, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
153 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1> %pg,
154 <vscale x 4 x i32> %a,
155 <vscale x 4 x i32> %b)
156 ret <vscale x 4 x i32> %out
159 define <vscale x 2 x i64> @eor_i64(<vscale x 2 x i64> %a) {
160 ; CHECK-LABEL: eor_i64:
162 ; CHECK-NEXT: eor z0.d, z0.d, #0x1000000000000
164 %pg = shufflevector <vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
165 %b = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 281474976710656, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
166 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.eor.u.nxv2i64(<vscale x 2 x i1> %pg,
167 <vscale x 2 x i64> %a,
168 <vscale x 2 x i64> %b)
169 ret <vscale x 2 x i64> %out
176 define <vscale x 16 x i8> @orr_i8(<vscale x 16 x i8> %a) {
177 ; CHECK-LABEL: orr_i8:
179 ; CHECK-NEXT: orr z0.b, z0.b, #0x6
181 %pg = shufflevector <vscale x 16 x i1> insertelement (<vscale x 16 x i1> undef, i1 true, i32 0), <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
182 %b = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 6, i32 0), <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
183 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.orr.u.nxv16i8(<vscale x 16 x i1> %pg,
184 <vscale x 16 x i8> %a,
185 <vscale x 16 x i8> %b)
186 ret <vscale x 16 x i8> %out
189 define <vscale x 8 x i16> @orr_i16(<vscale x 8 x i16> %a) {
190 ; CHECK-LABEL: orr_i16:
192 ; CHECK-NEXT: orr z0.h, z0.h, #0x8001
194 %pg = shufflevector <vscale x 8 x i1> insertelement (<vscale x 8 x i1> undef, i1 true, i32 0), <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
195 %b = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 32769, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
196 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.orr.u.nxv8i16(<vscale x 8 x i1> %pg,
197 <vscale x 8 x i16> %a,
198 <vscale x 8 x i16> %b)
199 ret <vscale x 8 x i16> %out
202 define <vscale x 4 x i32> @orr_i32(<vscale x 4 x i32> %a) {
203 ; CHECK-LABEL: orr_i32:
205 ; CHECK-NEXT: orr z0.s, z0.s, #0xffff
207 %pg = shufflevector <vscale x 4 x i1> insertelement (<vscale x 4 x i1> undef, i1 true, i32 0), <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
208 %b = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 65535, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
209 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> %pg,
210 <vscale x 4 x i32> %a,
211 <vscale x 4 x i32> %b)
212 ret <vscale x 4 x i32> %out
215 define <vscale x 2 x i64> @orr_i64(<vscale x 2 x i64> %a) {
216 ; CHECK-LABEL: orr_i64:
218 ; CHECK-NEXT: orr z0.d, z0.d, #0x7ffc000000000000
220 %pg = shufflevector <vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
221 %b = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 9222246136947933184, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
222 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.orr.u.nxv2i64(<vscale x 2 x i1> %pg,
223 <vscale x 2 x i64> %a,
224 <vscale x 2 x i64> %b)
225 ret <vscale x 2 x i64> %out
228 ; As orr_i32 but where pg is i8 based and thus compatible for i32.
229 define <vscale x 4 x i32> @orr_i32_ptrue_all_b(<vscale x 4 x i32> %a) {
230 ; CHECK-LABEL: orr_i32_ptrue_all_b:
232 ; CHECK-NEXT: orr z0.s, z0.s, #0xffff
234 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
235 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
236 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 65535)
237 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> %pg.s,
238 <vscale x 4 x i32> %a,
239 <vscale x 4 x i32> %b)
240 ret <vscale x 4 x i32> %out
243 ; As orr_i32 but where pg is i16 based and thus compatible for i32.
244 define <vscale x 4 x i32> @orr_i32_ptrue_all_h(<vscale x 4 x i32> %a) {
245 ; CHECK-LABEL: orr_i32_ptrue_all_h:
247 ; CHECK-NEXT: orr z0.s, z0.s, #0xffff
249 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
250 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
251 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
252 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 65535)
253 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> %pg.s,
254 <vscale x 4 x i32> %a,
255 <vscale x 4 x i32> %b)
256 ret <vscale x 4 x i32> %out
259 ; As orr_i32 but where pg is i64 based, which is not compatibile for i32 and
260 ; thus inactive lanes are important and the immediate form cannot be used.
261 define <vscale x 4 x i32> @orr_i32_ptrue_all_d(<vscale x 4 x i32> %a) {
262 ; CHECK-LABEL: orr_i32_ptrue_all_d:
264 ; CHECK-NEXT: ptrue p0.d
265 ; CHECK-NEXT: mov z1.s, #65535 // =0xffff
266 ; CHECK-NEXT: orr z0.s, p0/m, z0.s, z1.s
268 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
269 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
270 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
271 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 65535)
272 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %pg.s,
273 <vscale x 4 x i32> %a,
274 <vscale x 4 x i32> %b)
275 ret <vscale x 4 x i32> %out
278 declare <vscale x 16 x i8> @llvm.aarch64.sve.and.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
279 declare <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
280 declare <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
281 declare <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
283 declare <vscale x 16 x i8> @llvm.aarch64.sve.bic.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
284 declare <vscale x 8 x i16> @llvm.aarch64.sve.bic.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
285 declare <vscale x 4 x i32> @llvm.aarch64.sve.bic.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
286 declare <vscale x 2 x i64> @llvm.aarch64.sve.bic.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
288 declare <vscale x 16 x i8> @llvm.aarch64.sve.eor.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
289 declare <vscale x 8 x i16> @llvm.aarch64.sve.eor.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
290 declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
291 declare <vscale x 2 x i64> @llvm.aarch64.sve.eor.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
293 declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
295 declare <vscale x 16 x i8> @llvm.aarch64.sve.orr.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
296 declare <vscale x 8 x i16> @llvm.aarch64.sve.orr.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
297 declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
298 declare <vscale x 2 x i64> @llvm.aarch64.sve.orr.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
300 declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
301 declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
302 declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
304 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 16 x i1>)
305 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
306 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
308 declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32)
310 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
311 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
312 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
313 declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)