1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
3 declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>)
5 declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
7 declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
9 declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
11 declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
13 declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
15 declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
17 declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
19 declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
21 declare <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>)
23 declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
25 declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
27 declare <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8>, <8 x i8>)
29 declare <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32>, <2 x i32>)
31 declare <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16>, <4 x i16>)
33 declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>)
35 declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>)
37 declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>)
39 declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>)
41 declare <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64>, <2 x i64>)
43 declare <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32>, <4 x i32>)
45 declare <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>)
47 declare <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64>, <2 x i64>)
49 declare <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>)
51 declare <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16>, <8 x i16>)
53 define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) {
54 ; CHECK-LABEL: test_vaddl_s8:
55 ; CHECK: saddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
57 %vmovl.i.i = sext <8 x i8> %a to <8 x i16>
58 %vmovl.i2.i = sext <8 x i8> %b to <8 x i16>
59 %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i
63 define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) {
64 ; CHECK-LABEL: test_vaddl_s16:
65 ; CHECK: saddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
67 %vmovl.i.i = sext <4 x i16> %a to <4 x i32>
68 %vmovl.i2.i = sext <4 x i16> %b to <4 x i32>
69 %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i
73 define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) {
74 ; CHECK-LABEL: test_vaddl_s32:
75 ; CHECK: saddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
77 %vmovl.i.i = sext <2 x i32> %a to <2 x i64>
78 %vmovl.i2.i = sext <2 x i32> %b to <2 x i64>
79 %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i
83 define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) {
84 ; CHECK-LABEL: test_vaddl_u8:
85 ; CHECK: uaddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
87 %vmovl.i.i = zext <8 x i8> %a to <8 x i16>
88 %vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
89 %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i
93 define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) {
94 ; CHECK-LABEL: test_vaddl_u16:
95 ; CHECK: uaddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
97 %vmovl.i.i = zext <4 x i16> %a to <4 x i32>
98 %vmovl.i2.i = zext <4 x i16> %b to <4 x i32>
99 %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i
103 define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) {
104 ; CHECK-LABEL: test_vaddl_u32:
105 ; CHECK: uaddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
107 %vmovl.i.i = zext <2 x i32> %a to <2 x i64>
108 %vmovl.i2.i = zext <2 x i32> %b to <2 x i64>
109 %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i
113 define <8 x i16> @test_vaddl_high_s8(<16 x i8> %a, <16 x i8> %b) {
114 ; CHECK-LABEL: test_vaddl_high_s8:
115 ; CHECK: saddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
117 %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
118 %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
119 %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
120 %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16>
121 %add.i = add <8 x i16> %0, %1
125 define <4 x i32> @test_vaddl_high_s16(<8 x i16> %a, <8 x i16> %b) {
126 ; CHECK-LABEL: test_vaddl_high_s16:
127 ; CHECK: saddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
129 %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
130 %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
131 %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
132 %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32>
133 %add.i = add <4 x i32> %0, %1
137 define <2 x i64> @test_vaddl_high_s32(<4 x i32> %a, <4 x i32> %b) {
138 ; CHECK-LABEL: test_vaddl_high_s32:
139 ; CHECK: saddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
141 %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
142 %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
143 %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
144 %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64>
145 %add.i = add <2 x i64> %0, %1
149 define <8 x i16> @test_vaddl_high_u8(<16 x i8> %a, <16 x i8> %b) {
150 ; CHECK-LABEL: test_vaddl_high_u8:
151 ; CHECK: uaddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
153 %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
154 %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
155 %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
156 %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16>
157 %add.i = add <8 x i16> %0, %1
161 define <4 x i32> @test_vaddl_high_u16(<8 x i16> %a, <8 x i16> %b) {
162 ; CHECK-LABEL: test_vaddl_high_u16:
163 ; CHECK: uaddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
165 %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
166 %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
167 %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
168 %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32>
169 %add.i = add <4 x i32> %0, %1
173 define <2 x i64> @test_vaddl_high_u32(<4 x i32> %a, <4 x i32> %b) {
174 ; CHECK-LABEL: test_vaddl_high_u32:
175 ; CHECK: uaddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
177 %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
178 %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
179 %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
180 %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64>
181 %add.i = add <2 x i64> %0, %1
185 define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) {
186 ; CHECK-LABEL: test_vaddw_s8:
187 ; CHECK: saddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
189 %vmovl.i.i = sext <8 x i8> %b to <8 x i16>
190 %add.i = add <8 x i16> %vmovl.i.i, %a
194 define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) {
195 ; CHECK-LABEL: test_vaddw_s16:
196 ; CHECK: saddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
198 %vmovl.i.i = sext <4 x i16> %b to <4 x i32>
199 %add.i = add <4 x i32> %vmovl.i.i, %a
203 define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) {
204 ; CHECK-LABEL: test_vaddw_s32:
205 ; CHECK: saddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
207 %vmovl.i.i = sext <2 x i32> %b to <2 x i64>
208 %add.i = add <2 x i64> %vmovl.i.i, %a
212 define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) {
213 ; CHECK-LABEL: test_vaddw_u8:
214 ; CHECK: uaddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
216 %vmovl.i.i = zext <8 x i8> %b to <8 x i16>
217 %add.i = add <8 x i16> %vmovl.i.i, %a
221 define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) {
222 ; CHECK-LABEL: test_vaddw_u16:
223 ; CHECK: uaddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
225 %vmovl.i.i = zext <4 x i16> %b to <4 x i32>
226 %add.i = add <4 x i32> %vmovl.i.i, %a
230 define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) {
231 ; CHECK-LABEL: test_vaddw_u32:
232 ; CHECK: uaddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
234 %vmovl.i.i = zext <2 x i32> %b to <2 x i64>
235 %add.i = add <2 x i64> %vmovl.i.i, %a
239 define <8 x i16> @test_vaddw_high_s8(<8 x i16> %a, <16 x i8> %b) {
240 ; CHECK-LABEL: test_vaddw_high_s8:
241 ; CHECK: saddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
243 %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
244 %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
245 %add.i = add <8 x i16> %0, %a
249 define <4 x i32> @test_vaddw_high_s16(<4 x i32> %a, <8 x i16> %b) {
250 ; CHECK-LABEL: test_vaddw_high_s16:
251 ; CHECK: saddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
253 %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
254 %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
255 %add.i = add <4 x i32> %0, %a
259 define <2 x i64> @test_vaddw_high_s32(<2 x i64> %a, <4 x i32> %b) {
260 ; CHECK-LABEL: test_vaddw_high_s32:
261 ; CHECK: saddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
263 %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
264 %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
265 %add.i = add <2 x i64> %0, %a
269 define <8 x i16> @test_vaddw_high_u8(<8 x i16> %a, <16 x i8> %b) {
270 ; CHECK-LABEL: test_vaddw_high_u8:
271 ; CHECK: uaddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
273 %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
274 %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
275 %add.i = add <8 x i16> %0, %a
279 define <4 x i32> @test_vaddw_high_u16(<4 x i32> %a, <8 x i16> %b) {
280 ; CHECK-LABEL: test_vaddw_high_u16:
281 ; CHECK: uaddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
283 %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
284 %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
285 %add.i = add <4 x i32> %0, %a
289 define <2 x i64> @test_vaddw_high_u32(<2 x i64> %a, <4 x i32> %b) {
290 ; CHECK-LABEL: test_vaddw_high_u32:
291 ; CHECK: uaddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
293 %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
294 %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
295 %add.i = add <2 x i64> %0, %a
299 define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) {
300 ; CHECK-LABEL: test_vsubl_s8:
301 ; CHECK: ssubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
303 %vmovl.i.i = sext <8 x i8> %a to <8 x i16>
304 %vmovl.i2.i = sext <8 x i8> %b to <8 x i16>
305 %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i
309 define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) {
310 ; CHECK-LABEL: test_vsubl_s16:
311 ; CHECK: ssubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
313 %vmovl.i.i = sext <4 x i16> %a to <4 x i32>
314 %vmovl.i2.i = sext <4 x i16> %b to <4 x i32>
315 %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i
319 define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) {
320 ; CHECK-LABEL: test_vsubl_s32:
321 ; CHECK: ssubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
323 %vmovl.i.i = sext <2 x i32> %a to <2 x i64>
324 %vmovl.i2.i = sext <2 x i32> %b to <2 x i64>
325 %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i
329 define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) {
330 ; CHECK-LABEL: test_vsubl_u8:
331 ; CHECK: usubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
333 %vmovl.i.i = zext <8 x i8> %a to <8 x i16>
334 %vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
335 %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i
339 define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) {
340 ; CHECK-LABEL: test_vsubl_u16:
341 ; CHECK: usubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
343 %vmovl.i.i = zext <4 x i16> %a to <4 x i32>
344 %vmovl.i2.i = zext <4 x i16> %b to <4 x i32>
345 %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i
349 define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) {
350 ; CHECK-LABEL: test_vsubl_u32:
351 ; CHECK: usubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
353 %vmovl.i.i = zext <2 x i32> %a to <2 x i64>
354 %vmovl.i2.i = zext <2 x i32> %b to <2 x i64>
355 %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i
359 define <8 x i16> @test_vsubl_high_s8(<16 x i8> %a, <16 x i8> %b) {
360 ; CHECK-LABEL: test_vsubl_high_s8:
361 ; CHECK: ssubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
363 %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
364 %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
365 %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
366 %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16>
367 %sub.i = sub <8 x i16> %0, %1
371 define <4 x i32> @test_vsubl_high_s16(<8 x i16> %a, <8 x i16> %b) {
372 ; CHECK-LABEL: test_vsubl_high_s16:
373 ; CHECK: ssubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
375 %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
376 %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
377 %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
378 %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32>
379 %sub.i = sub <4 x i32> %0, %1
383 define <2 x i64> @test_vsubl_high_s32(<4 x i32> %a, <4 x i32> %b) {
384 ; CHECK-LABEL: test_vsubl_high_s32:
385 ; CHECK: ssubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
387 %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
388 %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
389 %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
390 %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64>
391 %sub.i = sub <2 x i64> %0, %1
395 define <8 x i16> @test_vsubl_high_u8(<16 x i8> %a, <16 x i8> %b) {
396 ; CHECK-LABEL: test_vsubl_high_u8:
397 ; CHECK: usubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
399 %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
400 %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
401 %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
402 %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16>
403 %sub.i = sub <8 x i16> %0, %1
407 define <4 x i32> @test_vsubl_high_u16(<8 x i16> %a, <8 x i16> %b) {
408 ; CHECK-LABEL: test_vsubl_high_u16:
409 ; CHECK: usubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
411 %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
412 %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
413 %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
414 %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32>
415 %sub.i = sub <4 x i32> %0, %1
419 define <2 x i64> @test_vsubl_high_u32(<4 x i32> %a, <4 x i32> %b) {
420 ; CHECK-LABEL: test_vsubl_high_u32:
421 ; CHECK: usubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
423 %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
424 %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
425 %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
426 %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64>
427 %sub.i = sub <2 x i64> %0, %1
431 define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) {
432 ; CHECK-LABEL: test_vsubw_s8:
433 ; CHECK: ssubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
435 %vmovl.i.i = sext <8 x i8> %b to <8 x i16>
436 %sub.i = sub <8 x i16> %a, %vmovl.i.i
440 define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) {
441 ; CHECK-LABEL: test_vsubw_s16:
442 ; CHECK: ssubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
444 %vmovl.i.i = sext <4 x i16> %b to <4 x i32>
445 %sub.i = sub <4 x i32> %a, %vmovl.i.i
449 define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) {
450 ; CHECK-LABEL: test_vsubw_s32:
451 ; CHECK: ssubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
453 %vmovl.i.i = sext <2 x i32> %b to <2 x i64>
454 %sub.i = sub <2 x i64> %a, %vmovl.i.i
458 define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) {
459 ; CHECK-LABEL: test_vsubw_u8:
460 ; CHECK: usubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
462 %vmovl.i.i = zext <8 x i8> %b to <8 x i16>
463 %sub.i = sub <8 x i16> %a, %vmovl.i.i
467 define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) {
468 ; CHECK-LABEL: test_vsubw_u16:
469 ; CHECK: usubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
471 %vmovl.i.i = zext <4 x i16> %b to <4 x i32>
472 %sub.i = sub <4 x i32> %a, %vmovl.i.i
476 define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) {
477 ; CHECK-LABEL: test_vsubw_u32:
478 ; CHECK: usubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
480 %vmovl.i.i = zext <2 x i32> %b to <2 x i64>
481 %sub.i = sub <2 x i64> %a, %vmovl.i.i
485 define <8 x i16> @test_vsubw_high_s8(<8 x i16> %a, <16 x i8> %b) {
486 ; CHECK-LABEL: test_vsubw_high_s8:
487 ; CHECK: ssubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
489 %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
490 %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
491 %sub.i = sub <8 x i16> %a, %0
495 define <4 x i32> @test_vsubw_high_s16(<4 x i32> %a, <8 x i16> %b) {
496 ; CHECK-LABEL: test_vsubw_high_s16:
497 ; CHECK: ssubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
499 %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
500 %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
501 %sub.i = sub <4 x i32> %a, %0
505 define <2 x i64> @test_vsubw_high_s32(<2 x i64> %a, <4 x i32> %b) {
506 ; CHECK-LABEL: test_vsubw_high_s32:
507 ; CHECK: ssubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
509 %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
510 %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
511 %sub.i = sub <2 x i64> %a, %0
515 define <8 x i16> @test_vsubw_high_u8(<8 x i16> %a, <16 x i8> %b) {
516 ; CHECK-LABEL: test_vsubw_high_u8:
517 ; CHECK: usubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
519 %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
520 %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
521 %sub.i = sub <8 x i16> %a, %0
525 define <4 x i32> @test_vsubw_high_u16(<4 x i32> %a, <8 x i16> %b) {
526 ; CHECK-LABEL: test_vsubw_high_u16:
527 ; CHECK: usubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
529 %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
530 %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
531 %sub.i = sub <4 x i32> %a, %0
535 define <2 x i64> @test_vsubw_high_u32(<2 x i64> %a, <4 x i32> %b) {
536 ; CHECK-LABEL: test_vsubw_high_u32:
537 ; CHECK: usubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
539 %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
540 %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
541 %sub.i = sub <2 x i64> %a, %0
545 define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) {
546 ; CHECK-LABEL: test_vaddhn_s16:
547 ; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
549 %vaddhn.i = add <8 x i16> %a, %b
550 %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
551 %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8>
552 ret <8 x i8> %vaddhn2.i
555 define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) {
556 ; CHECK-LABEL: test_vaddhn_s32:
557 ; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
559 %vaddhn.i = add <4 x i32> %a, %b
560 %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
561 %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16>
562 ret <4 x i16> %vaddhn2.i
565 define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) {
566 ; CHECK-LABEL: test_vaddhn_s64:
567 ; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
569 %vaddhn.i = add <2 x i64> %a, %b
570 %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
571 %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32>
572 ret <2 x i32> %vaddhn2.i
575 define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) {
576 ; CHECK-LABEL: test_vaddhn_u16:
577 ; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
579 %vaddhn.i = add <8 x i16> %a, %b
580 %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
581 %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8>
582 ret <8 x i8> %vaddhn2.i
585 define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) {
586 ; CHECK-LABEL: test_vaddhn_u32:
587 ; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
589 %vaddhn.i = add <4 x i32> %a, %b
590 %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
591 %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16>
592 ret <4 x i16> %vaddhn2.i
595 define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) {
596 ; CHECK-LABEL: test_vaddhn_u64:
597 ; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
599 %vaddhn.i = add <2 x i64> %a, %b
600 %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
601 %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32>
602 ret <2 x i32> %vaddhn2.i
605 define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
606 ; CHECK-LABEL: test_vaddhn_high_s16:
607 ; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
609 %vaddhn.i.i = add <8 x i16> %a, %b
610 %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
611 %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8>
612 %0 = bitcast <8 x i8> %r to <1 x i64>
613 %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64>
614 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
615 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
619 define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
620 ; CHECK-LABEL: test_vaddhn_high_s32:
621 ; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
623 %vaddhn.i.i = add <4 x i32> %a, %b
624 %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16>
625 %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16>
626 %0 = bitcast <4 x i16> %r to <1 x i64>
627 %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64>
628 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
629 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
633 define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
634 ; CHECK-LABEL: test_vaddhn_high_s64:
635 ; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
637 %vaddhn.i.i = add <2 x i64> %a, %b
638 %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32>
639 %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32>
640 %0 = bitcast <2 x i32> %r to <1 x i64>
641 %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64>
642 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
643 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
647 define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
648 ; CHECK-LABEL: test_vaddhn_high_u16:
649 ; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
651 %vaddhn.i.i = add <8 x i16> %a, %b
652 %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
653 %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8>
654 %0 = bitcast <8 x i8> %r to <1 x i64>
655 %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64>
656 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
657 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
661 define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
662 ; CHECK-LABEL: test_vaddhn_high_u32:
663 ; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
665 %vaddhn.i.i = add <4 x i32> %a, %b
666 %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16>
667 %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16>
668 %0 = bitcast <4 x i16> %r to <1 x i64>
669 %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64>
670 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
671 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
675 define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
676 ; CHECK-LABEL: test_vaddhn_high_u64:
677 ; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
679 %vaddhn.i.i = add <2 x i64> %a, %b
680 %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32>
681 %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32>
682 %0 = bitcast <2 x i32> %r to <1 x i64>
683 %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64>
684 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
685 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
689 define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) {
690 ; CHECK-LABEL: test_vraddhn_s16:
691 ; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
693 %vraddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
694 ret <8 x i8> %vraddhn2.i
697 define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) {
698 ; CHECK-LABEL: test_vraddhn_s32:
699 ; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
701 %vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
702 ret <4 x i16> %vraddhn2.i
705 define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) {
706 ; CHECK-LABEL: test_vraddhn_s64:
707 ; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
709 %vraddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
710 ret <2 x i32> %vraddhn2.i
713 define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) {
714 ; CHECK-LABEL: test_vraddhn_u16:
715 ; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
717 %vraddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
718 ret <8 x i8> %vraddhn2.i
721 define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) {
722 ; CHECK-LABEL: test_vraddhn_u32:
723 ; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
725 %vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
726 ret <4 x i16> %vraddhn2.i
729 define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) {
730 ; CHECK-LABEL: test_vraddhn_u64:
731 ; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
733 %vraddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
734 ret <2 x i32> %vraddhn2.i
737 define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
738 ; CHECK-LABEL: test_vraddhn_high_s16:
739 ; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
741 %vraddhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
742 %0 = bitcast <8 x i8> %r to <1 x i64>
743 %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64>
744 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
745 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
749 define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
750 ; CHECK-LABEL: test_vraddhn_high_s32:
751 ; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
753 %vraddhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
754 %0 = bitcast <4 x i16> %r to <1 x i64>
755 %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64>
756 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
757 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
761 define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
762 ; CHECK-LABEL: test_vraddhn_high_s64:
763 ; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
765 %vraddhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
766 %0 = bitcast <2 x i32> %r to <1 x i64>
767 %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64>
768 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
769 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
773 define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
774 ; CHECK-LABEL: test_vraddhn_high_u16:
775 ; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
777 %vraddhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
778 %0 = bitcast <8 x i8> %r to <1 x i64>
779 %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64>
780 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
781 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
785 define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
786 ; CHECK-LABEL: test_vraddhn_high_u32:
787 ; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
789 %vraddhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
790 %0 = bitcast <4 x i16> %r to <1 x i64>
791 %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64>
792 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
793 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
797 define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
798 ; CHECK-LABEL: test_vraddhn_high_u64:
799 ; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
801 %vraddhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
802 %0 = bitcast <2 x i32> %r to <1 x i64>
803 %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64>
804 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
805 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
809 define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
810 ; CHECK-LABEL: test_vsubhn_s16:
811 ; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
813 %vsubhn.i = sub <8 x i16> %a, %b
814 %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
815 %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8>
816 ret <8 x i8> %vsubhn2.i
819 define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
820 ; CHECK-LABEL: test_vsubhn_s32:
821 ; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
823 %vsubhn.i = sub <4 x i32> %a, %b
824 %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
825 %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16>
826 ret <4 x i16> %vsubhn2.i
829 define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
830 ; CHECK-LABEL: test_vsubhn_s64:
831 ; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
833 %vsubhn.i = sub <2 x i64> %a, %b
834 %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
835 %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32>
836 ret <2 x i32> %vsubhn2.i
839 define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
840 ; CHECK-LABEL: test_vsubhn_u16:
841 ; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
843 %vsubhn.i = sub <8 x i16> %a, %b
844 %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
845 %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8>
846 ret <8 x i8> %vsubhn2.i
849 define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
850 ; CHECK-LABEL: test_vsubhn_u32:
851 ; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
853 %vsubhn.i = sub <4 x i32> %a, %b
854 %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
855 %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16>
856 ret <4 x i16> %vsubhn2.i
859 define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
860 ; CHECK-LABEL: test_vsubhn_u64:
861 ; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
863 %vsubhn.i = sub <2 x i64> %a, %b
864 %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
865 %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32>
866 ret <2 x i32> %vsubhn2.i
869 define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
870 ; CHECK-LABEL: test_vsubhn_high_s16:
871 ; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
873 %vsubhn.i.i = sub <8 x i16> %a, %b
874 %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
875 %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8>
876 %0 = bitcast <8 x i8> %r to <1 x i64>
877 %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64>
878 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
879 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
883 define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
884 ; CHECK-LABEL: test_vsubhn_high_s32:
885 ; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
887 %vsubhn.i.i = sub <4 x i32> %a, %b
888 %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16>
889 %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16>
890 %0 = bitcast <4 x i16> %r to <1 x i64>
891 %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64>
892 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
893 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
897 define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
898 ; CHECK-LABEL: test_vsubhn_high_s64:
899 ; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
901 %vsubhn.i.i = sub <2 x i64> %a, %b
902 %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32>
903 %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32>
904 %0 = bitcast <2 x i32> %r to <1 x i64>
905 %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64>
906 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
907 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
911 define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
912 ; CHECK-LABEL: test_vsubhn_high_u16:
913 ; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
915 %vsubhn.i.i = sub <8 x i16> %a, %b
916 %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
917 %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8>
918 %0 = bitcast <8 x i8> %r to <1 x i64>
919 %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64>
920 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
921 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
925 define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
926 ; CHECK-LABEL: test_vsubhn_high_u32:
927 ; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
929 %vsubhn.i.i = sub <4 x i32> %a, %b
930 %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16>
931 %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16>
932 %0 = bitcast <4 x i16> %r to <1 x i64>
933 %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64>
934 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
935 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
939 define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
940 ; CHECK-LABEL: test_vsubhn_high_u64:
941 ; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
943 %vsubhn.i.i = sub <2 x i64> %a, %b
944 %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32>
945 %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32>
946 %0 = bitcast <2 x i32> %r to <1 x i64>
947 %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64>
948 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
949 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
953 define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
954 ; CHECK-LABEL: test_vrsubhn_s16:
955 ; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
957 %vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
958 ret <8 x i8> %vrsubhn2.i
961 define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
962 ; CHECK-LABEL: test_vrsubhn_s32:
963 ; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
965 %vrsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
966 ret <4 x i16> %vrsubhn2.i
969 define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
970 ; CHECK-LABEL: test_vrsubhn_s64:
971 ; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
973 %vrsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
974 ret <2 x i32> %vrsubhn2.i
977 define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
978 ; CHECK-LABEL: test_vrsubhn_u16:
979 ; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
981 %vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
982 ret <8 x i8> %vrsubhn2.i
985 define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
986 ; CHECK-LABEL: test_vrsubhn_u32:
987 ; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
989 %vrsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
990 ret <4 x i16> %vrsubhn2.i
993 define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
994 ; CHECK-LABEL: test_vrsubhn_u64:
995 ; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
997 %vrsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
998 ret <2 x i32> %vrsubhn2.i
1001 define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
1002 ; CHECK-LABEL: test_vrsubhn_high_s16:
1003 ; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1005 %vrsubhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
1006 %0 = bitcast <8 x i8> %r to <1 x i64>
1007 %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64>
1008 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
1009 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
1013 define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
1014 ; CHECK-LABEL: test_vrsubhn_high_s32:
1015 ; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1017 %vrsubhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
1018 %0 = bitcast <4 x i16> %r to <1 x i64>
1019 %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64>
1020 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
1021 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
1025 define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
1026 ; CHECK-LABEL: test_vrsubhn_high_s64:
1027 ; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
1029 %vrsubhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
1030 %0 = bitcast <2 x i32> %r to <1 x i64>
1031 %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64>
1032 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
1033 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
1037 define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
1038 ; CHECK-LABEL: test_vrsubhn_high_u16:
1039 ; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1041 %vrsubhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
1042 %0 = bitcast <8 x i8> %r to <1 x i64>
1043 %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64>
1044 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
1045 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
1049 define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
1050 ; CHECK-LABEL: test_vrsubhn_high_u32:
1051 ; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1053 %vrsubhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
1054 %0 = bitcast <4 x i16> %r to <1 x i64>
1055 %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64>
1056 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
1057 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
1061 define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
1062 ; CHECK-LABEL: test_vrsubhn_high_u64:
1063 ; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
1065 %vrsubhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
1066 %0 = bitcast <2 x i32> %r to <1 x i64>
1067 %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64>
1068 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
1069 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
1073 define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) {
1074 ; CHECK-LABEL: test_vabdl_s8:
1075 ; CHECK: sabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
1077 %vabd.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b)
1078 %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16>
1079 ret <8 x i16> %vmovl.i.i
1082 define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) {
1083 ; CHECK-LABEL: test_vabdl_s16:
1084 ; CHECK: sabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1086 %vabd2.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b)
1087 %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32>
1088 ret <4 x i32> %vmovl.i.i
1091 define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) {
1092 ; CHECK-LABEL: test_vabdl_s32:
1093 ; CHECK: sabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1095 %vabd2.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b)
1096 %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64>
1097 ret <2 x i64> %vmovl.i.i
1100 define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) {
1101 ; CHECK-LABEL: test_vabdl_u8:
1102 ; CHECK: uabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
1104 %vabd.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b)
1105 %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16>
1106 ret <8 x i16> %vmovl.i.i
1109 define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) {
1110 ; CHECK-LABEL: test_vabdl_u16:
1111 ; CHECK: uabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1113 %vabd2.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b)
1114 %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32>
1115 ret <4 x i32> %vmovl.i.i
1118 define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) {
1119 ; CHECK-LABEL: test_vabdl_u32:
1120 ; CHECK: uabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1122 %vabd2.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b)
1123 %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64>
1124 ret <2 x i64> %vmovl.i.i
1127 define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
1128 ; CHECK-LABEL: test_vabal_s8:
1129 ; CHECK: sabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
1131 %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c)
1132 %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
1133 %add.i = add <8 x i16> %vmovl.i.i.i, %a
1134 ret <8 x i16> %add.i
1137 define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
1138 ; CHECK-LABEL: test_vabal_s16:
1139 ; CHECK: sabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1141 %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c)
1142 %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
1143 %add.i = add <4 x i32> %vmovl.i.i.i, %a
1144 ret <4 x i32> %add.i
1147 define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
1148 ; CHECK-LABEL: test_vabal_s32:
1149 ; CHECK: sabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1151 %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c)
1152 %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
1153 %add.i = add <2 x i64> %vmovl.i.i.i, %a
1154 ret <2 x i64> %add.i
1157 define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
1158 ; CHECK-LABEL: test_vabal_u8:
1159 ; CHECK: uabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
1161 %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c)
1162 %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
1163 %add.i = add <8 x i16> %vmovl.i.i.i, %a
1164 ret <8 x i16> %add.i
1167 define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
1168 ; CHECK-LABEL: test_vabal_u16:
1169 ; CHECK: uabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1171 %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c)
1172 %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
1173 %add.i = add <4 x i32> %vmovl.i.i.i, %a
1174 ret <4 x i32> %add.i
1177 define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
1178 ; CHECK-LABEL: test_vabal_u32:
1179 ; CHECK: uabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1181 %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c)
1182 %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
1183 %add.i = add <2 x i64> %vmovl.i.i.i, %a
1184 ret <2 x i64> %add.i
1187 define <8 x i16> @test_vabdl_high_s8(<16 x i8> %a, <16 x i8> %b) {
1188 ; CHECK-LABEL: test_vabdl_high_s8:
1189 ; CHECK: sabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
1191 %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1192 %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1193 %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
1194 %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
1195 ret <8 x i16> %vmovl.i.i.i
1198 define <4 x i32> @test_vabdl_high_s16(<8 x i16> %a, <8 x i16> %b) {
1199 ; CHECK-LABEL: test_vabdl_high_s16:
1200 ; CHECK: sabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1202 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1203 %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1204 %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1205 %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
1206 ret <4 x i32> %vmovl.i.i.i
1209 define <2 x i64> @test_vabdl_high_s32(<4 x i32> %a, <4 x i32> %b) {
1210 ; CHECK-LABEL: test_vabdl_high_s32:
1211 ; CHECK: sabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1213 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1214 %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1215 %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1216 %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
1217 ret <2 x i64> %vmovl.i.i.i
1220 define <8 x i16> @test_vabdl_high_u8(<16 x i8> %a, <16 x i8> %b) {
1221 ; CHECK-LABEL: test_vabdl_high_u8:
1222 ; CHECK: uabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
1224 %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1225 %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1226 %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
1227 %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
1228 ret <8 x i16> %vmovl.i.i.i
1231 define <4 x i32> @test_vabdl_high_u16(<8 x i16> %a, <8 x i16> %b) {
1232 ; CHECK-LABEL: test_vabdl_high_u16:
1233 ; CHECK: uabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1235 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1236 %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1237 %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1238 %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
1239 ret <4 x i32> %vmovl.i.i.i
1242 define <2 x i64> @test_vabdl_high_u32(<4 x i32> %a, <4 x i32> %b) {
1243 ; CHECK-LABEL: test_vabdl_high_u32:
1244 ; CHECK: uabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1246 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1247 %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1248 %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1249 %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
1250 ret <2 x i64> %vmovl.i.i.i
1253 define <8 x i16> @test_vabal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
1254 ; CHECK-LABEL: test_vabal_high_s8:
1255 ; CHECK: sabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
1257 %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1258 %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1259 %vabd.i.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
1260 %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16>
1261 %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a
1262 ret <8 x i16> %add.i.i
1265 define <4 x i32> @test_vabal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
1266 ; CHECK-LABEL: test_vabal_high_s16:
1267 ; CHECK: sabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1269 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1270 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1271 %vabd2.i.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1272 %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32>
1273 %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a
1274 ret <4 x i32> %add.i.i
1277 define <2 x i64> @test_vabal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
1278 ; CHECK-LABEL: test_vabal_high_s32:
1279 ; CHECK: sabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1281 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1282 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1283 %vabd2.i.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1284 %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64>
1285 %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a
1286 ret <2 x i64> %add.i.i
1289 define <8 x i16> @test_vabal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
1290 ; CHECK-LABEL: test_vabal_high_u8:
1291 ; CHECK: uabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
1293 %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1294 %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1295 %vabd.i.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
1296 %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16>
1297 %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a
1298 ret <8 x i16> %add.i.i
1301 define <4 x i32> @test_vabal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
1302 ; CHECK-LABEL: test_vabal_high_u16:
1303 ; CHECK: uabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1305 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1306 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1307 %vabd2.i.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1308 %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32>
1309 %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a
1310 ret <4 x i32> %add.i.i
1313 define <2 x i64> @test_vabal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
1314 ; CHECK-LABEL: test_vabal_high_u32:
1315 ; CHECK: uabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1317 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1318 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1319 %vabd2.i.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1320 %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64>
1321 %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a
1322 ret <2 x i64> %add.i.i
1325 define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) {
1326 ; CHECK-LABEL: test_vmull_s8:
1327 ; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
1329 %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b)
1330 ret <8 x i16> %vmull.i
1333 define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) {
1334 ; CHECK-LABEL: test_vmull_s16:
1335 ; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1337 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b)
1338 ret <4 x i32> %vmull2.i
1341 define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) {
1342 ; CHECK-LABEL: test_vmull_s32:
1343 ; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1345 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b)
1346 ret <2 x i64> %vmull2.i
1349 define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) {
1350 ; CHECK-LABEL: test_vmull_u8:
1351 ; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
1353 %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b)
1354 ret <8 x i16> %vmull.i
1357 define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) {
1358 ; CHECK-LABEL: test_vmull_u16:
1359 ; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1361 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b)
1362 ret <4 x i32> %vmull2.i
1365 define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) {
1366 ; CHECK-LABEL: test_vmull_u32:
1367 ; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1369 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b)
1370 ret <2 x i64> %vmull2.i
1373 define <8 x i16> @test_vmull_high_s8(<16 x i8> %a, <16 x i8> %b) {
1374 ; CHECK-LABEL: test_vmull_high_s8:
1375 ; CHECK: smull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
1377 %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1378 %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1379 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
1380 ret <8 x i16> %vmull.i.i
1383 define <4 x i32> @test_vmull_high_s16(<8 x i16> %a, <8 x i16> %b) {
1384 ; CHECK-LABEL: test_vmull_high_s16:
1385 ; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1387 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1388 %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1389 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1390 ret <4 x i32> %vmull2.i.i
1393 define <2 x i64> @test_vmull_high_s32(<4 x i32> %a, <4 x i32> %b) {
1394 ; CHECK-LABEL: test_vmull_high_s32:
1395 ; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1397 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1398 %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1399 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1400 ret <2 x i64> %vmull2.i.i
1403 define <8 x i16> @test_vmull_high_u8(<16 x i8> %a, <16 x i8> %b) {
1404 ; CHECK-LABEL: test_vmull_high_u8:
1405 ; CHECK: umull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
1407 %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1408 %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1409 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
1410 ret <8 x i16> %vmull.i.i
1413 define <4 x i32> @test_vmull_high_u16(<8 x i16> %a, <8 x i16> %b) {
1414 ; CHECK-LABEL: test_vmull_high_u16:
1415 ; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1417 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1418 %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1419 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1420 ret <4 x i32> %vmull2.i.i
1423 define <2 x i64> @test_vmull_high_u32(<4 x i32> %a, <4 x i32> %b) {
1424 ; CHECK-LABEL: test_vmull_high_u32:
1425 ; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1427 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1428 %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1429 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1430 ret <2 x i64> %vmull2.i.i
1433 define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
1434 ; CHECK-LABEL: test_vmlal_s8:
1435 ; CHECK: smlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
1437 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
1438 %add.i = add <8 x i16> %vmull.i.i, %a
1439 ret <8 x i16> %add.i
1442 define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
1443 ; CHECK-LABEL: test_vmlal_s16:
1444 ; CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1446 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
1447 %add.i = add <4 x i32> %vmull2.i.i, %a
1448 ret <4 x i32> %add.i
1451 define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
1452 ; CHECK-LABEL: test_vmlal_s32:
1453 ; CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1455 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
1456 %add.i = add <2 x i64> %vmull2.i.i, %a
1457 ret <2 x i64> %add.i
1460 define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
1461 ; CHECK-LABEL: test_vmlal_u8:
1462 ; CHECK: umlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
1464 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
1465 %add.i = add <8 x i16> %vmull.i.i, %a
1466 ret <8 x i16> %add.i
1469 define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
1470 ; CHECK-LABEL: test_vmlal_u16:
1471 ; CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1473 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
1474 %add.i = add <4 x i32> %vmull2.i.i, %a
1475 ret <4 x i32> %add.i
1478 define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
1479 ; CHECK-LABEL: test_vmlal_u32:
1480 ; CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1482 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
1483 %add.i = add <2 x i64> %vmull2.i.i, %a
1484 ret <2 x i64> %add.i
1487 define <8 x i16> @test_vmlal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
1488 ; CHECK-LABEL: test_vmlal_high_s8:
1489 ; CHECK: smlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
1491 %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1492 %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1493 %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
1494 %add.i.i = add <8 x i16> %vmull.i.i.i, %a
1495 ret <8 x i16> %add.i.i
1498 define <4 x i32> @test_vmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
1499 ; CHECK-LABEL: test_vmlal_high_s16:
1500 ; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1502 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1503 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1504 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1505 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
1506 ret <4 x i32> %add.i.i
1509 define <2 x i64> @test_vmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
1510 ; CHECK-LABEL: test_vmlal_high_s32:
1511 ; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1513 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1514 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1515 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1516 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
1517 ret <2 x i64> %add.i.i
1520 define <8 x i16> @test_vmlal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
1521 ; CHECK-LABEL: test_vmlal_high_u8:
1522 ; CHECK: umlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
1524 %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1525 %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1526 %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
1527 %add.i.i = add <8 x i16> %vmull.i.i.i, %a
1528 ret <8 x i16> %add.i.i
1531 define <4 x i32> @test_vmlal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
1532 ; CHECK-LABEL: test_vmlal_high_u16:
1533 ; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1535 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1536 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1537 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1538 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
1539 ret <4 x i32> %add.i.i
1542 define <2 x i64> @test_vmlal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
1543 ; CHECK-LABEL: test_vmlal_high_u32:
1544 ; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1546 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1547 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1548 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1549 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
1550 ret <2 x i64> %add.i.i
1553 define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
1554 ; CHECK-LABEL: test_vmlsl_s8:
1555 ; CHECK: smlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
1557 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
1558 %sub.i = sub <8 x i16> %a, %vmull.i.i
1559 ret <8 x i16> %sub.i
1562 define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
1563 ; CHECK-LABEL: test_vmlsl_s16:
1564 ; CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1566 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
1567 %sub.i = sub <4 x i32> %a, %vmull2.i.i
1568 ret <4 x i32> %sub.i
1571 define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
1572 ; CHECK-LABEL: test_vmlsl_s32:
1573 ; CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1575 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
1576 %sub.i = sub <2 x i64> %a, %vmull2.i.i
1577 ret <2 x i64> %sub.i
1580 define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
1581 ; CHECK-LABEL: test_vmlsl_u8:
1582 ; CHECK: umlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
1584 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
1585 %sub.i = sub <8 x i16> %a, %vmull.i.i
1586 ret <8 x i16> %sub.i
1589 define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
1590 ; CHECK-LABEL: test_vmlsl_u16:
1591 ; CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1593 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
1594 %sub.i = sub <4 x i32> %a, %vmull2.i.i
1595 ret <4 x i32> %sub.i
1598 define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
1599 ; CHECK-LABEL: test_vmlsl_u32:
1600 ; CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1602 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
1603 %sub.i = sub <2 x i64> %a, %vmull2.i.i
1604 ret <2 x i64> %sub.i
1607 define <8 x i16> @test_vmlsl_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
1608 ; CHECK-LABEL: test_vmlsl_high_s8:
1609 ; CHECK: smlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
1611 %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1612 %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1613 %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
1614 %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i
1615 ret <8 x i16> %sub.i.i
1618 define <4 x i32> @test_vmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
1619 ; CHECK-LABEL: test_vmlsl_high_s16:
1620 ; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1622 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1623 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1624 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1625 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
1626 ret <4 x i32> %sub.i.i
1629 define <2 x i64> @test_vmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
1630 ; CHECK-LABEL: test_vmlsl_high_s32:
1631 ; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1633 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1634 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1635 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1636 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
1637 ret <2 x i64> %sub.i.i
1640 define <8 x i16> @test_vmlsl_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
1641 ; CHECK-LABEL: test_vmlsl_high_u8:
1642 ; CHECK: umlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
1644 %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1645 %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1646 %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
1647 %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i
1648 ret <8 x i16> %sub.i.i
1651 define <4 x i32> @test_vmlsl_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
1652 ; CHECK-LABEL: test_vmlsl_high_u16:
1653 ; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1655 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1656 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1657 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1658 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
1659 ret <4 x i32> %sub.i.i
1662 define <2 x i64> @test_vmlsl_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
1663 ; CHECK-LABEL: test_vmlsl_high_u32:
1664 ; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1666 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1667 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1668 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1669 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
1670 ret <2 x i64> %sub.i.i
1673 define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) {
1674 ; CHECK-LABEL: test_vqdmull_s16:
1675 ; CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1677 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
1678 ret <4 x i32> %vqdmull2.i
1681 define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) {
1682 ; CHECK-LABEL: test_vqdmull_s32:
1683 ; CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1685 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
1686 ret <2 x i64> %vqdmull2.i
1689 define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
1690 ; CHECK-LABEL: test_vqdmlal_s16:
1691 ; CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1693 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
1694 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
1695 ret <4 x i32> %vqdmlal4.i
1698 define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
1699 ; CHECK-LABEL: test_vqdmlal_s32:
1700 ; CHECK: sqdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1702 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
1703 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
1704 ret <2 x i64> %vqdmlal4.i
1707 define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
1708 ; CHECK-LABEL: test_vqdmlsl_s16:
1709 ; CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
1711 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
1712 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
1713 ret <4 x i32> %vqdmlsl4.i
1716 define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
1717 ; CHECK-LABEL: test_vqdmlsl_s32:
1718 ; CHECK: sqdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
1720 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
1721 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
1722 ret <2 x i64> %vqdmlsl4.i
1725 define <4 x i32> @test_vqdmull_high_s16(<8 x i16> %a, <8 x i16> %b) {
1726 ; CHECK-LABEL: test_vqdmull_high_s16:
1727 ; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1729 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1730 %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1731 %vqdmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1732 ret <4 x i32> %vqdmull2.i.i
1735 define <2 x i64> @test_vqdmull_high_s32(<4 x i32> %a, <4 x i32> %b) {
1736 ; CHECK-LABEL: test_vqdmull_high_s32:
1737 ; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1739 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1740 %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1741 %vqdmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1742 ret <2 x i64> %vqdmull2.i.i
1745 define <4 x i32> @test_vqdmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
1746 ; CHECK-LABEL: test_vqdmlal_high_s16:
1747 ; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1749 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1750 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1751 %vqdmlal2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1752 %vqdmlal4.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i.i)
1753 ret <4 x i32> %vqdmlal4.i.i
1756 define <2 x i64> @test_vqdmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
1757 ; CHECK-LABEL: test_vqdmlal_high_s32:
1758 ; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1760 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1761 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1762 %vqdmlal2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1763 %vqdmlal4.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i.i)
1764 ret <2 x i64> %vqdmlal4.i.i
1767 define <4 x i32> @test_vqdmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
1768 ; CHECK-LABEL: test_vqdmlsl_high_s16:
1769 ; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
1771 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1772 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1773 %vqdmlsl2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
1774 %vqdmlsl4.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i.i)
1775 ret <4 x i32> %vqdmlsl4.i.i
1778 define <2 x i64> @test_vqdmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
1779 ; CHECK-LABEL: test_vqdmlsl_high_s32:
1780 ; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
1782 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1783 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1784 %vqdmlsl2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
1785 %vqdmlsl4.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i.i)
1786 ret <2 x i64> %vqdmlsl4.i.i
1789 define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) {
1790 ; CHECK-LABEL: test_vmull_p8:
1791 ; CHECK: pmull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
1793 %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b)
1794 ret <8 x i16> %vmull.i
1797 define <8 x i16> @test_vmull_high_p8(<16 x i8> %a, <16 x i8> %b) {
1798 ; CHECK-LABEL: test_vmull_high_p8:
1799 ; CHECK: pmull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
1801 %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1802 %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1803 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
1804 ret <8 x i16> %vmull.i.i
1807 define i128 @test_vmull_p64(i64 %a, i64 %b) #4 {
1808 ; CHECK-LABEL: test_vmull_p64
1809 ; CHECK: pmull {{v[0-9]+}}.1q, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d
1811 %vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %a, i64 %b)
1812 %vmull3.i = bitcast <16 x i8> %vmull2.i to i128
1816 define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 {
1817 ; CHECK-LABEL: test_vmull_high_p64
1818 ; CHECK: pmull2 {{v[0-9]+}}.1q, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
1820 %0 = extractelement <2 x i64> %a, i32 1
1821 %1 = extractelement <2 x i64> %b, i32 1
1822 %vmull2.i.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %0, i64 %1) #1
1823 %vmull3.i.i = bitcast <16 x i8> %vmull2.i.i to i128
1824 ret i128 %vmull3.i.i
1827 declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) #5