1 ; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s
7 define <vscale x 16 x i8> @and_i8(<vscale x 16 x i8> %a) #0 {
9 ; CHECK: and z0.b, z0.b, #0x7
11 %pg = shufflevector <vscale x 16 x i1> insertelement (<vscale x 16 x i1> undef, i1 true, i32 0), <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
12 %b = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 7, i32 0), <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
13 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1> %pg,
14 <vscale x 16 x i8> %a,
15 <vscale x 16 x i8> %b)
16 ret <vscale x 16 x i8> %out
19 define <vscale x 8 x i16> @and_i16(<vscale x 8 x i16> %a) #0 {
20 ; CHECK-LABEL: and_i16:
21 ; CHECK: and z0.h, z0.h, #0xf0
23 %pg = shufflevector <vscale x 8 x i1> insertelement (<vscale x 8 x i1> undef, i1 true, i32 0), <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
24 %b = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 240, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
25 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1> %pg,
26 <vscale x 8 x i16> %a,
27 <vscale x 8 x i16> %b)
28 ret <vscale x 8 x i16> %out
31 define <vscale x 4 x i32> @and_i32(<vscale x 4 x i32> %a) #0 {
32 ; CHECK-LABEL: and_i32:
33 ; CHECK: and z0.s, z0.s, #0xffff00
35 %pg = shufflevector <vscale x 4 x i1> insertelement (<vscale x 4 x i1> undef, i1 true, i32 0), <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
36 %b = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 16776960, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
37 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> %pg,
38 <vscale x 4 x i32> %a,
39 <vscale x 4 x i32> %b)
40 ret <vscale x 4 x i32> %out
43 define <vscale x 2 x i64> @and_i64(<vscale x 2 x i64> %a) #0 {
44 ; CHECK-LABEL: and_i64:
45 ; CHECK: and z0.d, z0.d, #0xfffc000000000000
47 %pg = shufflevector <vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
48 %b = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 18445618173802708992, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
49 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1> %pg,
50 <vscale x 2 x i64> %a,
51 <vscale x 2 x i64> %b)
52 ret <vscale x 2 x i64> %out
59 define <vscale x 16 x i8> @bic_i8(<vscale x 16 x i8> %a) #0 {
60 ; CHECK-LABEL: bic_i8:
61 ; CHECK: and z0.b, z0.b, #0x1
63 %pg = shufflevector <vscale x 16 x i1> insertelement (<vscale x 16 x i1> undef, i1 true, i32 0), <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
64 %b = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 254, i32 0), <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
65 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1> %pg,
66 <vscale x 16 x i8> %a,
67 <vscale x 16 x i8> %b)
68 ret <vscale x 16 x i8> %out
71 define <vscale x 8 x i16> @bic_i16(<vscale x 8 x i16> %a) #0 {
72 ; CHECK-LABEL: bic_i16:
73 ; CHECK: and z0.h, z0.h, #0x1
75 %pg = shufflevector <vscale x 8 x i1> insertelement (<vscale x 8 x i1> undef, i1 true, i32 0), <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
76 %b = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 65534, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
77 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> %pg,
78 <vscale x 8 x i16> %a,
79 <vscale x 8 x i16> %b)
80 ret <vscale x 8 x i16> %out
83 define <vscale x 4 x i32> @bic_i32(<vscale x 4 x i32> %a) #0 {
84 ; CHECK-LABEL: bic_i32:
85 ; CHECK: and z0.s, z0.s, #0xff0000ff
87 %pg = shufflevector <vscale x 4 x i1> insertelement (<vscale x 4 x i1> undef, i1 true, i32 0), <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
88 %b = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 16776960, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
89 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> %pg,
90 <vscale x 4 x i32> %a,
91 <vscale x 4 x i32> %b)
92 ret <vscale x 4 x i32> %out
95 define <vscale x 2 x i64> @bic_i64(<vscale x 2 x i64> %a) #0 {
96 ; CHECK-LABEL: bic_i64:
97 ; CHECK: and z0.d, z0.d, #0x3ffffffffffff
99 %pg = shufflevector <vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
100 %b = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 18445618173802708992, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
101 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> %pg,
102 <vscale x 2 x i64> %a,
103 <vscale x 2 x i64> %b)
104 ret <vscale x 2 x i64> %out
111 define <vscale x 16 x i8> @eor_i8(<vscale x 16 x i8> %a) #0 {
112 ; CHECK-LABEL: eor_i8:
113 ; CHECK: eor z0.b, z0.b, #0xf
115 %pg = shufflevector <vscale x 16 x i1> insertelement (<vscale x 16 x i1> undef, i1 true, i32 0), <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
116 %b = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 15, i32 0), <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
117 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1> %pg,
118 <vscale x 16 x i8> %a,
119 <vscale x 16 x i8> %b)
120 ret <vscale x 16 x i8> %out
123 define <vscale x 8 x i16> @eor_i16(<vscale x 8 x i16> %a) #0 {
124 ; CHECK-LABEL: eor_i16:
125 ; CHECK: eor z0.h, z0.h, #0xfc07
127 %pg = shufflevector <vscale x 8 x i1> insertelement (<vscale x 8 x i1> undef, i1 true, i32 0), <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
128 %b = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 64519, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
129 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1> %pg,
130 <vscale x 8 x i16> %a,
131 <vscale x 8 x i16> %b)
132 ret <vscale x 8 x i16> %out
135 define <vscale x 4 x i32> @eor_i32(<vscale x 4 x i32> %a) #0 {
136 ; CHECK-LABEL: eor_i32:
137 ; CHECK: eor z0.s, z0.s, #0xffff00
139 %pg = shufflevector <vscale x 4 x i1> insertelement (<vscale x 4 x i1> undef, i1 true, i32 0), <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
140 %b = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 16776960, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
141 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> %pg,
142 <vscale x 4 x i32> %a,
143 <vscale x 4 x i32> %b)
144 ret <vscale x 4 x i32> %out
147 define <vscale x 2 x i64> @eor_i64(<vscale x 2 x i64> %a) #0 {
148 ; CHECK-LABEL: eor_i64:
149 ; CHECK: eor z0.d, z0.d, #0x1000000000000
151 %pg = shufflevector <vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
152 %b = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 281474976710656, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
153 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1> %pg,
154 <vscale x 2 x i64> %a,
155 <vscale x 2 x i64> %b)
156 ret <vscale x 2 x i64> %out
163 define <vscale x 16 x i8> @orr_i8(<vscale x 16 x i8> %a) #0 {
164 ; CHECK-LABEL: orr_i8:
165 ; CHECK: orr z0.b, z0.b, #0x6
167 %pg = shufflevector <vscale x 16 x i1> insertelement (<vscale x 16 x i1> undef, i1 true, i32 0), <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
168 %b = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 6, i32 0), <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
169 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1> %pg,
170 <vscale x 16 x i8> %a,
171 <vscale x 16 x i8> %b)
172 ret <vscale x 16 x i8> %out
175 define <vscale x 8 x i16> @orr_i16(<vscale x 8 x i16> %a) #0 {
176 ; CHECK-LABEL: orr_i16:
177 ; CHECK: orr z0.h, z0.h, #0x8001
179 %pg = shufflevector <vscale x 8 x i1> insertelement (<vscale x 8 x i1> undef, i1 true, i32 0), <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
180 %b = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 32769, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
181 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1> %pg,
182 <vscale x 8 x i16> %a,
183 <vscale x 8 x i16> %b)
184 ret <vscale x 8 x i16> %out
187 define <vscale x 4 x i32> @orr_i32(<vscale x 4 x i32> %a) #0 {
188 ; CHECK-LABEL: orr_i32:
189 ; CHECK: orr z0.s, z0.s, #0xffff
191 %pg = shufflevector <vscale x 4 x i1> insertelement (<vscale x 4 x i1> undef, i1 true, i32 0), <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
192 %b = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 65535, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
193 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %pg,
194 <vscale x 4 x i32> %a,
195 <vscale x 4 x i32> %b)
196 ret <vscale x 4 x i32> %out
199 define <vscale x 2 x i64> @orr_i64(<vscale x 2 x i64> %a) #0 {
200 ; CHECK-LABEL: orr_i64:
201 ; CHECK: orr z0.d, z0.d, #0x7ffc000000000000
203 %pg = shufflevector <vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
204 %b = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 9222246136947933184, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
205 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1> %pg,
206 <vscale x 2 x i64> %a,
207 <vscale x 2 x i64> %b)
208 ret <vscale x 2 x i64> %out
211 ; As orr_i32 but where pg is i8 based and thus compatible for i32.
212 define <vscale x 4 x i32> @orr_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
213 ; CHECK-LABEL: orr_i32_ptrue_all_b:
214 ; CHECK: orr z0.s, z0.s, #0xffff
216 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
217 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
218 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 65535)
219 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %pg.s,
220 <vscale x 4 x i32> %a,
221 <vscale x 4 x i32> %b)
222 ret <vscale x 4 x i32> %out
225 ; As orr_i32 but where pg is i16 based and thus compatible for i32.
226 define <vscale x 4 x i32> @orr_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
227 ; CHECK-LABEL: orr_i32_ptrue_all_h:
228 ; CHECK: orr z0.s, z0.s, #0xffff
230 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
231 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
232 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
233 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 65535)
234 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %pg.s,
235 <vscale x 4 x i32> %a,
236 <vscale x 4 x i32> %b)
237 ret <vscale x 4 x i32> %out
240 ; As orr_i32 but where pg is i64 based, which is not compatibile for i32 and
241 ; thus inactive lanes are important and the immediate form cannot be used.
242 define <vscale x 4 x i32> @orr_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
243 ; CHECK-LABEL: orr_i32_ptrue_all_d:
244 ; CHECK-DAG: mov [[IMM:w[0-9]+]], #65535
245 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
246 ; CHECK-DAG: mov [[DUP:z[0-9]+]].s, [[IMM]]
247 ; CHECK-DAG: orr z0.s, [[PG]]/m, z0.s, [[DUP]].s
249 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
250 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
251 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
252 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 65535)
253 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %pg.s,
254 <vscale x 4 x i32> %a,
255 <vscale x 4 x i32> %b)
256 ret <vscale x 4 x i32> %out
259 declare <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
260 declare <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
261 declare <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
262 declare <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
264 declare <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
265 declare <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
266 declare <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
267 declare <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
269 declare <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
270 declare <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
271 declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
272 declare <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
274 declare <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
275 declare <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
276 declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
277 declare <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
279 declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
280 declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
281 declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
283 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 16 x i1>)
284 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
285 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
287 declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32)
289 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
290 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
291 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
292 declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
294 attributes #0 = { "target-features"="+sve" }