1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s | FileCheck %s
4 target triple = "aarch64-unknown-linux-gnu"
8 define <vscale x 16 x i8> @usra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
9 ; CHECK-LABEL: usra_i8:
11 ; CHECK-NEXT: usra z0.b, z1.b, #1
13 %shift = lshr <vscale x 16 x i8> %b, splat(i8 1)
14 %add = add <vscale x 16 x i8> %a, %shift
15 ret <vscale x 16 x i8> %add
18 define <vscale x 8 x i16> @usra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
19 ; CHECK-LABEL: usra_i16:
21 ; CHECK-NEXT: usra z0.h, z1.h, #2
23 %shift = lshr <vscale x 8 x i16> %b, splat(i16 2)
24 %add = add <vscale x 8 x i16> %a, %shift
25 ret <vscale x 8 x i16> %add
28 define <vscale x 4 x i32> @usra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
29 ; CHECK-LABEL: usra_i32:
31 ; CHECK-NEXT: usra z0.s, z1.s, #3
33 %shift = lshr <vscale x 4 x i32> %b, splat(i32 3)
34 %add = add <vscale x 4 x i32> %a, %shift
35 ret <vscale x 4 x i32> %add
38 define <vscale x 2 x i64> @usra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
39 ; CHECK-LABEL: usra_i64:
41 ; CHECK-NEXT: usra z0.d, z1.d, #4
43 %shift = lshr <vscale x 2 x i64> %b, splat(i64 4)
44 %add = add <vscale x 2 x i64> %a, %shift
45 ret <vscale x 2 x i64> %add
48 define <vscale x 16 x i8> @usra_intr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
49 ; CHECK-LABEL: usra_intr_i8:
51 ; CHECK-NEXT: usra z0.b, z1.b, #1
53 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
54 %shift = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.u.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %b, <vscale x 16 x i8> splat(i8 1))
55 %add = add <vscale x 16 x i8> %a, %shift
56 ret <vscale x 16 x i8> %add
59 define <vscale x 8 x i16> @usra_intr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
60 ; CHECK-LABEL: usra_intr_i16:
62 ; CHECK-NEXT: usra z0.h, z1.h, #2
64 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
65 %shift = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.u.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %b, <vscale x 8 x i16> splat(i16 2))
66 %add = add <vscale x 8 x i16> %a, %shift
67 ret <vscale x 8 x i16> %add
70 define <vscale x 4 x i32> @usra_intr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
71 ; CHECK-LABEL: usra_intr_i32:
73 ; CHECK-NEXT: usra z0.s, z1.s, #3
75 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
76 %shift = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %b, <vscale x 4 x i32> splat(i32 3))
77 %add = add <vscale x 4 x i32> %a, %shift
78 ret <vscale x 4 x i32> %add
81 define <vscale x 2 x i64> @usra_intr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
82 ; CHECK-LABEL: usra_intr_i64:
84 ; CHECK-NEXT: usra z0.d, z1.d, #4
86 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
87 %shift = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.u.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %b, <vscale x 2 x i64> splat(i64 4))
88 %add = add <vscale x 2 x i64> %a, %shift
89 ret <vscale x 2 x i64> %add
92 define <vscale x 16 x i8> @usra_intr_u_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
93 ; CHECK-LABEL: usra_intr_u_i8:
95 ; CHECK-NEXT: usra z0.b, z1.b, #1
97 %shift = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.u.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %b, <vscale x 16 x i8> splat(i8 1))
98 %add = add <vscale x 16 x i8> %a, %shift
99 ret <vscale x 16 x i8> %add
102 define <vscale x 8 x i16> @usra_intr_u_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
103 ; CHECK-LABEL: usra_intr_u_i16:
105 ; CHECK-NEXT: usra z0.h, z1.h, #2
107 %shift = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.u.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %b, <vscale x 8 x i16> splat(i16 2))
108 %add = add <vscale x 8 x i16> %a, %shift
109 ret <vscale x 8 x i16> %add
112 define <vscale x 4 x i32> @usra_intr_u_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
113 ; CHECK-LABEL: usra_intr_u_i32:
115 ; CHECK-NEXT: usra z0.s, z1.s, #3
117 %shift = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %b, <vscale x 4 x i32> splat(i32 3))
118 %add = add <vscale x 4 x i32> %a, %shift
119 ret <vscale x 4 x i32> %add
122 define <vscale x 2 x i64> @usra_intr_u_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
123 ; CHECK-LABEL: usra_intr_u_i64:
125 ; CHECK-NEXT: usra z0.d, z1.d, #4
127 %shift = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.u.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %b, <vscale x 2 x i64> splat(i64 4))
128 %add = add <vscale x 2 x i64> %a, %shift
129 ret <vscale x 2 x i64> %add
134 define <vscale x 16 x i8> @ssra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
135 ; CHECK-LABEL: ssra_i8:
137 ; CHECK-NEXT: ssra z0.b, z1.b, #1
139 %shift = ashr <vscale x 16 x i8> %b, splat(i8 1)
140 %add = add <vscale x 16 x i8> %a, %shift
141 ret <vscale x 16 x i8> %add
144 define <vscale x 8 x i16> @ssra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
145 ; CHECK-LABEL: ssra_i16:
147 ; CHECK-NEXT: ssra z0.h, z1.h, #2
149 %shift = ashr <vscale x 8 x i16> %b, splat(i16 2)
150 %add = add <vscale x 8 x i16> %a, %shift
151 ret <vscale x 8 x i16> %add
154 define <vscale x 4 x i32> @ssra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
155 ; CHECK-LABEL: ssra_i32:
157 ; CHECK-NEXT: ssra z0.s, z1.s, #3
159 %shift = ashr <vscale x 4 x i32> %b, splat(i32 3)
160 %add = add <vscale x 4 x i32> %a, %shift
161 ret <vscale x 4 x i32> %add
164 define <vscale x 2 x i64> @ssra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
165 ; CHECK-LABEL: ssra_i64:
167 ; CHECK-NEXT: ssra z0.d, z1.d, #4
169 %shift = ashr <vscale x 2 x i64> %b, splat(i64 4)
170 %add = add <vscale x 2 x i64> %a, %shift
171 ret <vscale x 2 x i64> %add
174 define <vscale x 16 x i8> @ssra_intr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
175 ; CHECK-LABEL: ssra_intr_i8:
177 ; CHECK-NEXT: ssra z0.b, z1.b, #1
179 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
180 %shift = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.u.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %b, <vscale x 16 x i8> splat(i8 1))
181 %add = add <vscale x 16 x i8> %a, %shift
182 ret <vscale x 16 x i8> %add
185 define <vscale x 8 x i16> @ssra_intr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
186 ; CHECK-LABEL: ssra_intr_i16:
188 ; CHECK-NEXT: ssra z0.h, z1.h, #2
190 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
191 %shift = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.u.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %b, <vscale x 8 x i16> splat(i16 2))
192 %add = add <vscale x 8 x i16> %a, %shift
193 ret <vscale x 8 x i16> %add
196 define <vscale x 4 x i32> @ssra_intr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
197 ; CHECK-LABEL: ssra_intr_i32:
199 ; CHECK-NEXT: ssra z0.s, z1.s, #3
201 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
202 %shift = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %b, <vscale x 4 x i32> splat(i32 3))
203 %add = add <vscale x 4 x i32> %a, %shift
204 ret <vscale x 4 x i32> %add
207 define <vscale x 2 x i64> @ssra_intr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
208 ; CHECK-LABEL: ssra_intr_i64:
210 ; CHECK-NEXT: ssra z0.d, z1.d, #4
212 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
213 %shift = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.u.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %b, <vscale x 2 x i64> splat(i64 4))
214 %add = add <vscale x 2 x i64> %a, %shift
215 ret <vscale x 2 x i64> %add
218 define <vscale x 16 x i8> @ssra_intr_u_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
219 ; CHECK-LABEL: ssra_intr_u_i8:
221 ; CHECK-NEXT: ssra z0.b, z1.b, #1
223 %shift = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.u.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %b, <vscale x 16 x i8> splat(i8 1))
224 %add = add <vscale x 16 x i8> %a, %shift
225 ret <vscale x 16 x i8> %add
228 define <vscale x 8 x i16> @ssra_intr_u_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
229 ; CHECK-LABEL: ssra_intr_u_i16:
231 ; CHECK-NEXT: ssra z0.h, z1.h, #2
233 %shift = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.u.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %b, <vscale x 8 x i16> splat(i16 2))
234 %add = add <vscale x 8 x i16> %a, %shift
235 ret <vscale x 8 x i16> %add
238 define <vscale x 4 x i32> @ssra_intr_u_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
239 ; CHECK-LABEL: ssra_intr_u_i32:
241 ; CHECK-NEXT: ssra z0.s, z1.s, #3
243 %shift = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %b, <vscale x 4 x i32> splat(i32 3))
244 %add = add <vscale x 4 x i32> %a, %shift
245 ret <vscale x 4 x i32> %add
248 define <vscale x 2 x i64> @ssra_intr_u_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
249 ; CHECK-LABEL: ssra_intr_u_i64:
251 ; CHECK-NEXT: ssra z0.d, z1.d, #4
253 %shift = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.u.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %b, <vscale x 2 x i64> splat(i64 4))
254 %add = add <vscale x 2 x i64> %a, %shift
255 ret <vscale x 2 x i64> %add
258 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 immarg)
259 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 immarg)
260 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 immarg)
261 declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 immarg)
263 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
264 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
265 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
266 declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
268 declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
269 declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
270 declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
271 declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
273 attributes #0 = { "target-features"="+sve,+sve2" }