1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s --check-prefix=CHECK
3 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast -mcpu=exynos-m3 | FileCheck %s --check-prefix=CHECK
5 declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>)
7 declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>)
9 declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>)
11 declare double @llvm.aarch64.neon.fmulx.f64(double, double)
13 declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>)
14 declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.lane.v4i32.v2i32(<4 x i32>, <2 x i32>, i32)
15 declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v4i32.v4i32(<4 x i32>, <4 x i32>, i32)
17 declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>)
18 declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.lane.v2i32.v2i32(<2 x i32>, <2 x i32>, i32)
19 declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v2i32.v4i32(<2 x i32>, <4 x i32>, i32)
21 declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>)
22 declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.lane.v8i16.v4i16(<8 x i16>, <4 x i16>, i32)
23 declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v8i16.v8i16(<8 x i16>, <8 x i16>, i32)
25 declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>)
26 declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.lane.v4i16.v4i16(<4 x i16>, <4 x i16>, i32)
27 declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v4i16.v8i16(<4 x i16>, <8 x i16>, i32)
29 declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>)
30 declare <4 x i32> @llvm.aarch64.neon.sqdmulh.lane.v4i32.v2i32(<4 x i32>, <2 x i32>, i32)
31 declare <4 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v4i32.v4i32(<4 x i32>, <4 x i32>, i32)
33 declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>)
34 declare <2 x i32> @llvm.aarch64.neon.sqdmulh.lane.v2i32.v2i32(<2 x i32>, <2 x i32>, i32)
35 declare <2 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v2i32.v4i32(<2 x i32>, <4 x i32>, i32)
37 declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>)
38 declare <8 x i16> @llvm.aarch64.neon.sqdmulh.lane.v8i16.v4i16(<8 x i16>, <4 x i16>, i32)
39 declare <8 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v8i16.v8i16(<8 x i16>, <8 x i16>, i32)
41 declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>)
42 declare <4 x i16> @llvm.aarch64.neon.sqdmulh.lane.v4i16.v4i16(<4 x i16>, <4 x i16>, i32)
43 declare <4 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v4i16.v8i16(<4 x i16>, <8 x i16>, i32)
45 declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
47 declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
49 declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
51 declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
53 declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
55 declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
57 declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
59 declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
61 declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
63 declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
65 define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
66 ; CHECK-LABEL: test_vmla_lane_s16:
67 ; CHECK: // %bb.0: // %entry
68 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
69 ; CHECK-NEXT: mla v0.4h, v1.4h, v2.h[3]
72 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
73 %mul = mul <4 x i16> %shuffle, %b
74 %add = add <4 x i16> %mul, %a
78 define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
79 ; CHECK-LABEL: test_vmlaq_lane_s16:
80 ; CHECK: // %bb.0: // %entry
81 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
82 ; CHECK-NEXT: mla v0.8h, v1.8h, v2.h[3]
85 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
86 %mul = mul <8 x i16> %shuffle, %b
87 %add = add <8 x i16> %mul, %a
91 define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
92 ; CHECK-LABEL: test_vmla_lane_s32:
93 ; CHECK: // %bb.0: // %entry
94 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
95 ; CHECK-NEXT: mla v0.2s, v1.2s, v2.s[1]
98 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
99 %mul = mul <2 x i32> %shuffle, %b
100 %add = add <2 x i32> %mul, %a
104 define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
105 ; CHECK-LABEL: test_vmlaq_lane_s32:
106 ; CHECK: // %bb.0: // %entry
107 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
108 ; CHECK-NEXT: mla v0.4s, v1.4s, v2.s[1]
111 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
112 %mul = mul <4 x i32> %shuffle, %b
113 %add = add <4 x i32> %mul, %a
117 define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
118 ; CHECK-LABEL: test_vmla_laneq_s16:
119 ; CHECK: // %bb.0: // %entry
120 ; CHECK-NEXT: mla v0.4h, v1.4h, v2.h[7]
123 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
124 %mul = mul <4 x i16> %shuffle, %b
125 %add = add <4 x i16> %mul, %a
129 define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
130 ; CHECK-LABEL: test_vmlaq_laneq_s16:
131 ; CHECK: // %bb.0: // %entry
132 ; CHECK-NEXT: mla v0.8h, v1.8h, v2.h[7]
135 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
136 %mul = mul <8 x i16> %shuffle, %b
137 %add = add <8 x i16> %mul, %a
141 define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
142 ; CHECK-LABEL: test_vmla_laneq_s32:
143 ; CHECK: // %bb.0: // %entry
144 ; CHECK-NEXT: mla v0.2s, v1.2s, v2.s[3]
147 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
148 %mul = mul <2 x i32> %shuffle, %b
149 %add = add <2 x i32> %mul, %a
153 define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
154 ; CHECK-LABEL: test_vmlaq_laneq_s32:
155 ; CHECK: // %bb.0: // %entry
156 ; CHECK-NEXT: mla v0.4s, v1.4s, v2.s[3]
159 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
160 %mul = mul <4 x i32> %shuffle, %b
161 %add = add <4 x i32> %mul, %a
165 define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
166 ; CHECK-LABEL: test_vmls_lane_s16:
167 ; CHECK: // %bb.0: // %entry
168 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
169 ; CHECK-NEXT: mls v0.4h, v1.4h, v2.h[3]
172 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
173 %mul = mul <4 x i16> %shuffle, %b
174 %sub = sub <4 x i16> %a, %mul
178 define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
179 ; CHECK-LABEL: test_vmlsq_lane_s16:
180 ; CHECK: // %bb.0: // %entry
181 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
182 ; CHECK-NEXT: mls v0.8h, v1.8h, v2.h[3]
185 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
186 %mul = mul <8 x i16> %shuffle, %b
187 %sub = sub <8 x i16> %a, %mul
191 define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
192 ; CHECK-LABEL: test_vmls_lane_s32:
193 ; CHECK: // %bb.0: // %entry
194 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
195 ; CHECK-NEXT: mls v0.2s, v1.2s, v2.s[1]
198 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
199 %mul = mul <2 x i32> %shuffle, %b
200 %sub = sub <2 x i32> %a, %mul
204 define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
205 ; CHECK-LABEL: test_vmlsq_lane_s32:
206 ; CHECK: // %bb.0: // %entry
207 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
208 ; CHECK-NEXT: mls v0.4s, v1.4s, v2.s[1]
211 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
212 %mul = mul <4 x i32> %shuffle, %b
213 %sub = sub <4 x i32> %a, %mul
217 define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
218 ; CHECK-LABEL: test_vmls_laneq_s16:
219 ; CHECK: // %bb.0: // %entry
220 ; CHECK-NEXT: mls v0.4h, v1.4h, v2.h[7]
223 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
224 %mul = mul <4 x i16> %shuffle, %b
225 %sub = sub <4 x i16> %a, %mul
229 define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
230 ; CHECK-LABEL: test_vmlsq_laneq_s16:
231 ; CHECK: // %bb.0: // %entry
232 ; CHECK-NEXT: mls v0.8h, v1.8h, v2.h[7]
235 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
236 %mul = mul <8 x i16> %shuffle, %b
237 %sub = sub <8 x i16> %a, %mul
241 define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
242 ; CHECK-LABEL: test_vmls_laneq_s32:
243 ; CHECK: // %bb.0: // %entry
244 ; CHECK-NEXT: mls v0.2s, v1.2s, v2.s[3]
247 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
248 %mul = mul <2 x i32> %shuffle, %b
249 %sub = sub <2 x i32> %a, %mul
253 define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
254 ; CHECK-LABEL: test_vmlsq_laneq_s32:
255 ; CHECK: // %bb.0: // %entry
256 ; CHECK-NEXT: mls v0.4s, v1.4s, v2.s[3]
259 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
260 %mul = mul <4 x i32> %shuffle, %b
261 %sub = sub <4 x i32> %a, %mul
265 define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) {
266 ; CHECK-LABEL: test_vmul_lane_s16:
267 ; CHECK: // %bb.0: // %entry
268 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
269 ; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[3]
272 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
273 %mul = mul <4 x i16> %shuffle, %a
277 define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
278 ; CHECK-LABEL: test_vmulq_lane_s16:
279 ; CHECK: // %bb.0: // %entry
280 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
281 ; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[3]
284 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
285 %mul = mul <8 x i16> %shuffle, %a
289 define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) {
290 ; CHECK-LABEL: test_vmul_lane_s32:
291 ; CHECK: // %bb.0: // %entry
292 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
293 ; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[1]
296 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
297 %mul = mul <2 x i32> %shuffle, %a
301 define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
302 ; CHECK-LABEL: test_vmulq_lane_s32:
303 ; CHECK: // %bb.0: // %entry
304 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
305 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[1]
308 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
309 %mul = mul <4 x i32> %shuffle, %a
313 define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) {
314 ; CHECK-LABEL: test_vmul_lane_u16:
315 ; CHECK: // %bb.0: // %entry
316 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
317 ; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[3]
320 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
321 %mul = mul <4 x i16> %shuffle, %a
325 define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) {
326 ; CHECK-LABEL: test_vmulq_lane_u16:
327 ; CHECK: // %bb.0: // %entry
328 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
329 ; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[3]
332 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
333 %mul = mul <8 x i16> %shuffle, %a
337 define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) {
338 ; CHECK-LABEL: test_vmul_lane_u32:
339 ; CHECK: // %bb.0: // %entry
340 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
341 ; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[1]
344 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
345 %mul = mul <2 x i32> %shuffle, %a
349 define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) {
350 ; CHECK-LABEL: test_vmulq_lane_u32:
351 ; CHECK: // %bb.0: // %entry
352 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
353 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[1]
356 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
357 %mul = mul <4 x i32> %shuffle, %a
361 define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
362 ; CHECK-LABEL: test_vmul_laneq_s16:
363 ; CHECK: // %bb.0: // %entry
364 ; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[7]
367 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
368 %mul = mul <4 x i16> %shuffle, %a
372 define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
373 ; CHECK-LABEL: test_vmulq_laneq_s16:
374 ; CHECK: // %bb.0: // %entry
375 ; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[7]
378 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
379 %mul = mul <8 x i16> %shuffle, %a
383 define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
384 ; CHECK-LABEL: test_vmul_laneq_s32:
385 ; CHECK: // %bb.0: // %entry
386 ; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[3]
389 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
390 %mul = mul <2 x i32> %shuffle, %a
394 define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
395 ; CHECK-LABEL: test_vmulq_laneq_s32:
396 ; CHECK: // %bb.0: // %entry
397 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[3]
400 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
401 %mul = mul <4 x i32> %shuffle, %a
405 define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
406 ; CHECK-LABEL: test_vmul_laneq_u16:
407 ; CHECK: // %bb.0: // %entry
408 ; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[7]
411 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
412 %mul = mul <4 x i16> %shuffle, %a
416 define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
417 ; CHECK-LABEL: test_vmulq_laneq_u16:
418 ; CHECK: // %bb.0: // %entry
419 ; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[7]
422 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
423 %mul = mul <8 x i16> %shuffle, %a
427 define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
428 ; CHECK-LABEL: test_vmul_laneq_u32:
429 ; CHECK: // %bb.0: // %entry
430 ; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[3]
433 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
434 %mul = mul <2 x i32> %shuffle, %a
438 define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
439 ; CHECK-LABEL: test_vmulq_laneq_u32:
440 ; CHECK: // %bb.0: // %entry
441 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[3]
444 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
445 %mul = mul <4 x i32> %shuffle, %a
449 define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
450 ; CHECK-LABEL: test_vfma_lane_f32:
451 ; CHECK: // %bb.0: // %entry
452 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
453 ; CHECK-NEXT: fmla v0.2s, v1.2s, v2.s[1]
456 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
457 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
461 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
463 define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
464 ; CHECK-LABEL: test_vfmaq_lane_f32:
465 ; CHECK: // %bb.0: // %entry
466 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
467 ; CHECK-NEXT: fmla v0.4s, v1.4s, v2.s[1]
470 %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
471 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
475 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
477 define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
478 ; CHECK-LABEL: test_vfma_laneq_f32:
479 ; CHECK: // %bb.0: // %entry
480 ; CHECK-NEXT: fmla v0.2s, v1.2s, v2.s[3]
483 %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
484 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
488 define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
489 ; CHECK-LABEL: test_vfmaq_laneq_f32:
490 ; CHECK: // %bb.0: // %entry
491 ; CHECK-NEXT: fmla v0.4s, v1.4s, v2.s[3]
494 %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
495 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
499 define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
500 ; CHECK-LABEL: test_vfms_lane_f32:
501 ; CHECK: // %bb.0: // %entry
502 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
503 ; CHECK-NEXT: fmls v0.2s, v1.2s, v2.s[1]
506 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
507 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> <i32 1, i32 1>
508 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
512 define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
513 ; CHECK-LABEL: test_vfmsq_lane_f32:
514 ; CHECK: // %bb.0: // %entry
515 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
516 ; CHECK-NEXT: fmls v0.4s, v1.4s, v2.s[1]
519 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
520 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
521 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
525 define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
526 ; CHECK-LABEL: test_vfms_laneq_f32:
527 ; CHECK: // %bb.0: // %entry
528 ; CHECK-NEXT: fmls v0.2s, v1.2s, v2.s[3]
531 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
532 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> <i32 3, i32 3>
533 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
537 define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
538 ; CHECK-LABEL: test_vfmsq_laneq_f32:
539 ; CHECK: // %bb.0: // %entry
540 ; CHECK-NEXT: fmls v0.4s, v1.4s, v2.s[3]
543 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
544 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
545 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
549 define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
550 ; CHECK-LABEL: test_vfmaq_lane_f64:
551 ; CHECK: // %bb.0: // %entry
552 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
553 ; CHECK-NEXT: fmla v0.2d, v1.2d, v2.d[0]
556 %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
557 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
561 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
563 define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
564 ; CHECK-LABEL: test_vfmaq_laneq_f64:
565 ; CHECK: // %bb.0: // %entry
566 ; CHECK-NEXT: fmla v0.2d, v1.2d, v2.d[1]
569 %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
570 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
574 define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
575 ; CHECK-LABEL: test_vfmsq_lane_f64:
576 ; CHECK: // %bb.0: // %entry
577 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
578 ; CHECK-NEXT: fmls v0.2d, v1.2d, v2.d[0]
581 %sub = fsub <1 x double> <double -0.000000e+00>, %v
582 %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer
583 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
587 define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
588 ; CHECK-LABEL: test_vfmsq_laneq_f64:
589 ; CHECK: // %bb.0: // %entry
590 ; CHECK-NEXT: fmls v0.2d, v1.2d, v2.d[1]
593 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
594 %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> <i32 1, i32 1>
595 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
599 define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) {
600 ; CHECK-LABEL: test_vfmas_laneq_f32:
601 ; CHECK: // %bb.0: // %entry
602 ; CHECK-NEXT: fmla s0, s1, v2.s[3]
605 %extract = extractelement <4 x float> %v, i32 3
606 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a)
610 declare float @llvm.fma.f32(float, float, float)
612 define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) {
613 ; CHECK-LABEL: test_vfmsd_lane_f64:
614 ; CHECK: // %bb.0: // %entry
615 ; CHECK-NEXT: fmsub d0, d1, d2, d0
618 %extract.rhs = extractelement <1 x double> %v, i32 0
619 %extract = fsub double -0.000000e+00, %extract.rhs
620 %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a)
624 declare double @llvm.fma.f64(double, double, double)
626 define float @test_vfmss_lane_f32(float %a, float %b, <2 x float> %v) {
627 ; CHECK-LABEL: test_vfmss_lane_f32:
628 ; CHECK: // %bb.0: // %entry
629 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
630 ; CHECK-NEXT: fmls s0, s1, v2.s[1]
633 %extract.rhs = extractelement <2 x float> %v, i32 1
634 %extract = fsub float -0.000000e+00, %extract.rhs
635 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a)
639 define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) {
640 ; CHECK-LABEL: test_vfmss_laneq_f32:
641 ; CHECK: // %bb.0: // %entry
642 ; CHECK-NEXT: fmls s0, s1, v2.s[3]
645 %extract.rhs = extractelement <4 x float> %v, i32 3
646 %extract = fsub float -0.000000e+00, %extract.rhs
647 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a)
651 define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) {
652 ; CHECK-LABEL: test_vfmsd_laneq_f64:
653 ; CHECK: // %bb.0: // %entry
654 ; CHECK-NEXT: fmls d0, d1, v2.d[1]
657 %extract.rhs = extractelement <2 x double> %v, i32 1
658 %extract = fsub double -0.000000e+00, %extract.rhs
659 %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a)
663 define double @test_vfmsd_lane_f64_0(double %a, double %b, <1 x double> %v) {
664 ; CHECK-LABEL: test_vfmsd_lane_f64_0:
665 ; CHECK: // %bb.0: // %entry
666 ; CHECK-NEXT: fmsub d0, d1, d2, d0
669 %tmp0 = fsub <1 x double> <double -0.000000e+00>, %v
670 %tmp1 = extractelement <1 x double> %tmp0, i32 0
671 %0 = tail call double @llvm.fma.f64(double %b, double %tmp1, double %a)
675 define float @test_vfmss_lane_f32_0(float %a, float %b, <2 x float> %v) {
676 ; CHECK-LABEL: test_vfmss_lane_f32_0:
677 ; CHECK: // %bb.0: // %entry
678 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
679 ; CHECK-NEXT: fmls s0, s1, v2.s[1]
682 %tmp0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
683 %tmp1 = extractelement <2 x float> %tmp0, i32 1
684 %0 = tail call float @llvm.fma.f32(float %b, float %tmp1, float %a)
688 define float @test_vfmss_laneq_f32_0(float %a, float %b, <4 x float> %v) {
689 ; CHECK-LABEL: test_vfmss_laneq_f32_0:
690 ; CHECK: // %bb.0: // %entry
691 ; CHECK-NEXT: fmls s0, s1, v2.s[3]
694 %tmp0 = fsub <4 x float><float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
695 %tmp1 = extractelement <4 x float> %tmp0, i32 3
696 %0 = tail call float @llvm.fma.f32(float %b, float %tmp1, float %a)
700 define double @test_vfmsd_laneq_f64_0(double %a, double %b, <2 x double> %v) {
701 ; CHECK-LABEL: test_vfmsd_laneq_f64_0:
702 ; CHECK: // %bb.0: // %entry
703 ; CHECK-NEXT: fmls d0, d1, v2.d[1]
706 %tmp0 = fsub <2 x double><double -0.000000e+00, double -0.000000e+00>, %v
707 %tmp1 = extractelement <2 x double> %tmp0, i32 1
708 %0 = tail call double @llvm.fma.f64(double %b, double %tmp1, double %a)
712 define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
713 ; CHECK-LABEL: test_vmlal_lane_s16:
714 ; CHECK: // %bb.0: // %entry
715 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
716 ; CHECK-NEXT: smlal v0.4s, v1.4h, v2.h[3]
719 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
720 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
721 %add = add <4 x i32> %vmull2.i, %a
725 define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
726 ; CHECK-LABEL: test_vmlal_lane_s32:
727 ; CHECK: // %bb.0: // %entry
728 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
729 ; CHECK-NEXT: smlal v0.2d, v1.2s, v2.s[1]
732 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
733 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
734 %add = add <2 x i64> %vmull2.i, %a
738 define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
739 ; CHECK-LABEL: test_vmlal_laneq_s16:
740 ; CHECK: // %bb.0: // %entry
741 ; CHECK-NEXT: smlal v0.4s, v1.4h, v2.h[7]
744 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
745 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
746 %add = add <4 x i32> %vmull2.i, %a
750 define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
751 ; CHECK-LABEL: test_vmlal_laneq_s32:
752 ; CHECK: // %bb.0: // %entry
753 ; CHECK-NEXT: smlal v0.2d, v1.2s, v2.s[3]
756 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
757 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
758 %add = add <2 x i64> %vmull2.i, %a
762 define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
763 ; CHECK-LABEL: test_vmlal_high_lane_s16:
764 ; CHECK: // %bb.0: // %entry
765 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
766 ; CHECK-NEXT: smlal2 v0.4s, v1.8h, v2.h[3]
769 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
770 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
771 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
772 %add = add <4 x i32> %vmull2.i, %a
776 define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
777 ; CHECK-LABEL: test_vmlal_high_lane_s32:
778 ; CHECK: // %bb.0: // %entry
779 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
780 ; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.s[1]
783 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
784 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
785 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
786 %add = add <2 x i64> %vmull2.i, %a
790 define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
791 ; CHECK-LABEL: test_vmlal_high_laneq_s16:
792 ; CHECK: // %bb.0: // %entry
793 ; CHECK-NEXT: smlal2 v0.4s, v1.8h, v2.h[7]
796 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
797 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
798 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
799 %add = add <4 x i32> %vmull2.i, %a
803 define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
804 ; CHECK-LABEL: test_vmlal_high_laneq_s32:
805 ; CHECK: // %bb.0: // %entry
806 ; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.s[3]
809 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
810 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
811 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
812 %add = add <2 x i64> %vmull2.i, %a
816 define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
817 ; CHECK-LABEL: test_vmlsl_lane_s16:
818 ; CHECK: // %bb.0: // %entry
819 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
820 ; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.h[3]
823 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
824 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
825 %sub = sub <4 x i32> %a, %vmull2.i
829 define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
830 ; CHECK-LABEL: test_vmlsl_lane_s32:
831 ; CHECK: // %bb.0: // %entry
832 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
833 ; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.s[1]
836 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
837 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
838 %sub = sub <2 x i64> %a, %vmull2.i
842 define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
843 ; CHECK-LABEL: test_vmlsl_laneq_s16:
844 ; CHECK: // %bb.0: // %entry
845 ; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.h[7]
848 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
849 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
850 %sub = sub <4 x i32> %a, %vmull2.i
854 define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
855 ; CHECK-LABEL: test_vmlsl_laneq_s32:
856 ; CHECK: // %bb.0: // %entry
857 ; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.s[3]
860 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
861 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
862 %sub = sub <2 x i64> %a, %vmull2.i
866 define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
867 ; CHECK-LABEL: test_vmlsl_high_lane_s16:
868 ; CHECK: // %bb.0: // %entry
869 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
870 ; CHECK-NEXT: smlsl2 v0.4s, v1.8h, v2.h[3]
873 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
874 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
875 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
876 %sub = sub <4 x i32> %a, %vmull2.i
880 define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
881 ; CHECK-LABEL: test_vmlsl_high_lane_s32:
882 ; CHECK: // %bb.0: // %entry
883 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
884 ; CHECK-NEXT: smlsl2 v0.2d, v1.4s, v2.s[1]
887 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
888 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
889 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
890 %sub = sub <2 x i64> %a, %vmull2.i
894 define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
895 ; CHECK-LABEL: test_vmlsl_high_laneq_s16:
896 ; CHECK: // %bb.0: // %entry
897 ; CHECK-NEXT: smlsl2 v0.4s, v1.8h, v2.h[7]
900 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
901 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
902 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
903 %sub = sub <4 x i32> %a, %vmull2.i
907 define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
908 ; CHECK-LABEL: test_vmlsl_high_laneq_s32:
909 ; CHECK: // %bb.0: // %entry
910 ; CHECK-NEXT: smlsl2 v0.2d, v1.4s, v2.s[3]
913 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
914 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
915 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
916 %sub = sub <2 x i64> %a, %vmull2.i
920 define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
921 ; CHECK-LABEL: test_vmlal_lane_u16:
922 ; CHECK: // %bb.0: // %entry
923 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
924 ; CHECK-NEXT: umlal v0.4s, v1.4h, v2.h[3]
927 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
928 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
929 %add = add <4 x i32> %vmull2.i, %a
933 define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
934 ; CHECK-LABEL: test_vmlal_lane_u32:
935 ; CHECK: // %bb.0: // %entry
936 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
937 ; CHECK-NEXT: umlal v0.2d, v1.2s, v2.s[1]
940 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
941 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
942 %add = add <2 x i64> %vmull2.i, %a
946 define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
947 ; CHECK-LABEL: test_vmlal_laneq_u16:
948 ; CHECK: // %bb.0: // %entry
949 ; CHECK-NEXT: umlal v0.4s, v1.4h, v2.h[7]
952 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
953 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
954 %add = add <4 x i32> %vmull2.i, %a
958 define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
959 ; CHECK-LABEL: test_vmlal_laneq_u32:
960 ; CHECK: // %bb.0: // %entry
961 ; CHECK-NEXT: umlal v0.2d, v1.2s, v2.s[3]
964 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
965 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
966 %add = add <2 x i64> %vmull2.i, %a
970 define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
971 ; CHECK-LABEL: test_vmlal_high_lane_u16:
972 ; CHECK: // %bb.0: // %entry
973 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
974 ; CHECK-NEXT: umlal2 v0.4s, v1.8h, v2.h[3]
977 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
978 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
979 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
980 %add = add <4 x i32> %vmull2.i, %a
984 define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
985 ; CHECK-LABEL: test_vmlal_high_lane_u32:
986 ; CHECK: // %bb.0: // %entry
987 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
988 ; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.s[1]
991 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
992 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
993 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
994 %add = add <2 x i64> %vmull2.i, %a
998 define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
999 ; CHECK-LABEL: test_vmlal_high_laneq_u16:
1000 ; CHECK: // %bb.0: // %entry
1001 ; CHECK-NEXT: umlal2 v0.4s, v1.8h, v2.h[7]
1004 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1005 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1006 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1007 %add = add <4 x i32> %vmull2.i, %a
1011 define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
1012 ; CHECK-LABEL: test_vmlal_high_laneq_u32:
1013 ; CHECK: // %bb.0: // %entry
1014 ; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.s[3]
1017 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1018 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1019 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1020 %add = add <2 x i64> %vmull2.i, %a
1024 define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
1025 ; CHECK-LABEL: test_vmlsl_lane_u16:
1026 ; CHECK: // %bb.0: // %entry
1027 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1028 ; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.h[3]
1031 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1032 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
1033 %sub = sub <4 x i32> %a, %vmull2.i
1037 define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
1038 ; CHECK-LABEL: test_vmlsl_lane_u32:
1039 ; CHECK: // %bb.0: // %entry
1040 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1041 ; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.s[1]
1044 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1045 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
1046 %sub = sub <2 x i64> %a, %vmull2.i
1050 define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
1051 ; CHECK-LABEL: test_vmlsl_laneq_u16:
1052 ; CHECK: // %bb.0: // %entry
1053 ; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.h[7]
1056 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1057 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
1058 %sub = sub <4 x i32> %a, %vmull2.i
1062 define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
1063 ; CHECK-LABEL: test_vmlsl_laneq_u32:
1064 ; CHECK: // %bb.0: // %entry
1065 ; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.s[3]
1068 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1069 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
1070 %sub = sub <2 x i64> %a, %vmull2.i
1074 define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
1075 ; CHECK-LABEL: test_vmlsl_high_lane_u16:
1076 ; CHECK: // %bb.0: // %entry
1077 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1078 ; CHECK-NEXT: umlsl2 v0.4s, v1.8h, v2.h[3]
1081 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1082 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1083 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1084 %sub = sub <4 x i32> %a, %vmull2.i
1088 define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
1089 ; CHECK-LABEL: test_vmlsl_high_lane_u32:
1090 ; CHECK: // %bb.0: // %entry
1091 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1092 ; CHECK-NEXT: umlsl2 v0.2d, v1.4s, v2.s[1]
1095 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1096 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1097 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1098 %sub = sub <2 x i64> %a, %vmull2.i
1102 define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
1103 ; CHECK-LABEL: test_vmlsl_high_laneq_u16:
1104 ; CHECK: // %bb.0: // %entry
1105 ; CHECK-NEXT: umlsl2 v0.4s, v1.8h, v2.h[7]
1108 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1109 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1110 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1111 %sub = sub <4 x i32> %a, %vmull2.i
1115 define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
1116 ; CHECK-LABEL: test_vmlsl_high_laneq_u32:
1117 ; CHECK: // %bb.0: // %entry
1118 ; CHECK-NEXT: umlsl2 v0.2d, v1.4s, v2.s[3]
1121 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1122 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1123 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1124 %sub = sub <2 x i64> %a, %vmull2.i
1128 define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
1129 ; CHECK-LABEL: test_vmull_lane_s16:
1130 ; CHECK: // %bb.0: // %entry
1131 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1132 ; CHECK-NEXT: smull v0.4s, v0.4h, v1.h[3]
1135 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1136 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
1137 ret <4 x i32> %vmull2.i
1140 define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
1141 ; CHECK-LABEL: test_vmull_lane_s32:
1142 ; CHECK: // %bb.0: // %entry
1143 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1144 ; CHECK-NEXT: smull v0.2d, v0.2s, v1.s[1]
1147 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1148 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
1149 ret <2 x i64> %vmull2.i
1152 define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) {
1153 ; CHECK-LABEL: test_vmull_lane_u16:
1154 ; CHECK: // %bb.0: // %entry
1155 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1156 ; CHECK-NEXT: umull v0.4s, v0.4h, v1.h[3]
1159 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1160 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
1161 ret <4 x i32> %vmull2.i
1164 define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) {
1165 ; CHECK-LABEL: test_vmull_lane_u32:
1166 ; CHECK: // %bb.0: // %entry
1167 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1168 ; CHECK-NEXT: umull v0.2d, v0.2s, v1.s[1]
1171 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1172 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
1173 ret <2 x i64> %vmull2.i
1176 define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
1177 ; CHECK-LABEL: test_vmull_high_lane_s16:
1178 ; CHECK: // %bb.0: // %entry
1179 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1180 ; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.h[3]
1183 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1184 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1185 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1186 ret <4 x i32> %vmull2.i
1189 define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
1190 ; CHECK-LABEL: test_vmull_high_lane_s32:
1191 ; CHECK: // %bb.0: // %entry
1192 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1193 ; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.s[1]
1196 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1197 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1198 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1199 ret <2 x i64> %vmull2.i
1202 define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) {
1203 ; CHECK-LABEL: test_vmull_high_lane_u16:
1204 ; CHECK: // %bb.0: // %entry
1205 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1206 ; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.h[3]
1209 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1210 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1211 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1212 ret <4 x i32> %vmull2.i
1215 define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) {
1216 ; CHECK-LABEL: test_vmull_high_lane_u32:
1217 ; CHECK: // %bb.0: // %entry
1218 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1219 ; CHECK-NEXT: umull2 v0.2d, v0.4s, v1.s[1]
1222 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1223 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1224 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1225 ret <2 x i64> %vmull2.i
1228 define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
1229 ; CHECK-LABEL: test_vmull_laneq_s16:
1230 ; CHECK: // %bb.0: // %entry
1231 ; CHECK-NEXT: smull v0.4s, v0.4h, v1.h[7]
1234 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1235 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
1236 ret <4 x i32> %vmull2.i
1239 define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
1240 ; CHECK-LABEL: test_vmull_laneq_s32:
1241 ; CHECK: // %bb.0: // %entry
1242 ; CHECK-NEXT: smull v0.2d, v0.2s, v1.s[3]
1245 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1246 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
1247 ret <2 x i64> %vmull2.i
1250 define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
1251 ; CHECK-LABEL: test_vmull_laneq_u16:
1252 ; CHECK: // %bb.0: // %entry
1253 ; CHECK-NEXT: umull v0.4s, v0.4h, v1.h[7]
1256 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1257 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
1258 ret <4 x i32> %vmull2.i
1261 define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
1262 ; CHECK-LABEL: test_vmull_laneq_u32:
1263 ; CHECK: // %bb.0: // %entry
1264 ; CHECK-NEXT: umull v0.2d, v0.2s, v1.s[3]
1267 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1268 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
1269 ret <2 x i64> %vmull2.i
1272 define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
1273 ; CHECK-LABEL: test_vmull_high_laneq_s16:
1274 ; CHECK: // %bb.0: // %entry
1275 ; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.h[7]
1278 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1279 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1280 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1281 ret <4 x i32> %vmull2.i
1284 define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
1285 ; CHECK-LABEL: test_vmull_high_laneq_s32:
1286 ; CHECK: // %bb.0: // %entry
1287 ; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.s[3]
1290 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1291 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1292 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1293 ret <2 x i64> %vmull2.i
1296 define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
1297 ; CHECK-LABEL: test_vmull_high_laneq_u16:
1298 ; CHECK: // %bb.0: // %entry
1299 ; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.h[7]
1302 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1303 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1304 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1305 ret <4 x i32> %vmull2.i
1308 define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
1309 ; CHECK-LABEL: test_vmull_high_laneq_u32:
1310 ; CHECK: // %bb.0: // %entry
1311 ; CHECK-NEXT: umull2 v0.2d, v0.4s, v1.s[3]
1314 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1315 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1316 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1317 ret <2 x i64> %vmull2.i
1320 define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
1321 ; CHECK-LABEL: test_vqdmlal_lane_s16:
1322 ; CHECK: // %bb.0: // %entry
1323 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1324 ; CHECK-NEXT: sqdmlal v0.4s, v1.4h, v2.h[3]
1327 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1328 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
1329 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
1330 ret <4 x i32> %vqdmlal4.i
1333 define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
1334 ; CHECK-LABEL: test_vqdmlal_lane_s32:
1335 ; CHECK: // %bb.0: // %entry
1336 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1337 ; CHECK-NEXT: sqdmlal v0.2d, v1.2s, v2.s[1]
1340 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1341 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
1342 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
1343 ret <2 x i64> %vqdmlal4.i
1346 define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
1347 ; CHECK-LABEL: test_vqdmlal_high_lane_s16:
1348 ; CHECK: // %bb.0: // %entry
1349 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1350 ; CHECK-NEXT: sqdmlal2 v0.4s, v1.8h, v2.h[3]
1353 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1354 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1355 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1356 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
1357 ret <4 x i32> %vqdmlal4.i
1360 define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
1361 ; CHECK-LABEL: test_vqdmlal_high_lane_s32:
1362 ; CHECK: // %bb.0: // %entry
1363 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1364 ; CHECK-NEXT: sqdmlal2 v0.2d, v1.4s, v2.s[1]
1367 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1368 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1369 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1370 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
1371 ret <2 x i64> %vqdmlal4.i
1374 define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
1375 ; CHECK-LABEL: test_vqdmlsl_lane_s16:
1376 ; CHECK: // %bb.0: // %entry
1377 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1378 ; CHECK-NEXT: sqdmlsl v0.4s, v1.4h, v2.h[3]
1381 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1382 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
1383 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
1384 ret <4 x i32> %vqdmlsl4.i
1387 define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
1388 ; CHECK-LABEL: test_vqdmlsl_lane_s32:
1389 ; CHECK: // %bb.0: // %entry
1390 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1391 ; CHECK-NEXT: sqdmlsl v0.2d, v1.2s, v2.s[1]
1394 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1395 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
1396 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
1397 ret <2 x i64> %vqdmlsl4.i
1400 define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
1401 ; CHECK-LABEL: test_vqdmlsl_high_lane_s16:
1402 ; CHECK: // %bb.0: // %entry
1403 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1404 ; CHECK-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.h[3]
1407 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1408 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1409 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1410 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
1411 ret <4 x i32> %vqdmlsl4.i
1414 define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
1415 ; CHECK-LABEL: test_vqdmlsl_high_lane_s32:
1416 ; CHECK: // %bb.0: // %entry
1417 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1418 ; CHECK-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.s[1]
1421 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1422 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1423 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1424 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
1425 ret <2 x i64> %vqdmlsl4.i
1428 define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
1429 ; CHECK-LABEL: test_vqdmull_lane_s16:
1430 ; CHECK: // %bb.0: // %entry
1431 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1432 ; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.h[3]
1435 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1436 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
1437 ret <4 x i32> %vqdmull2.i
1440 define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
1441 ; CHECK-LABEL: test_vqdmull_lane_s32:
1442 ; CHECK: // %bb.0: // %entry
1443 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1444 ; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.s[1]
1447 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1448 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
1449 ret <2 x i64> %vqdmull2.i
1452 define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
1453 ; CHECK-LABEL: test_vqdmull_laneq_s16:
1454 ; CHECK: // %bb.0: // %entry
1455 ; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.h[3]
1458 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1459 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
1460 ret <4 x i32> %vqdmull2.i
1463 define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
1464 ; CHECK-LABEL: test_vqdmull_laneq_s32:
1465 ; CHECK: // %bb.0: // %entry
1466 ; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.s[3]
1469 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1470 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
1471 ret <2 x i64> %vqdmull2.i
1474 define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
1475 ; CHECK-LABEL: test_vqdmull_high_lane_s16:
1476 ; CHECK: // %bb.0: // %entry
1477 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1478 ; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[3]
1481 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1482 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1483 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1484 ret <4 x i32> %vqdmull2.i
1487 define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
1488 ; CHECK-LABEL: test_vqdmull_high_lane_s32:
1489 ; CHECK: // %bb.0: // %entry
1490 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1491 ; CHECK-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[1]
1494 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1495 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1496 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1497 ret <2 x i64> %vqdmull2.i
1500 define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
1501 ; CHECK-LABEL: test_vqdmull_high_laneq_s16:
1502 ; CHECK: // %bb.0: // %entry
1503 ; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[7]
1506 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1507 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1508 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1509 ret <4 x i32> %vqdmull2.i
1512 define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
1513 ; CHECK-LABEL: test_vqdmull_high_laneq_s32:
1514 ; CHECK: // %bb.0: // %entry
1515 ; CHECK-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[3]
1518 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1519 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1520 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1521 ret <2 x i64> %vqdmull2.i
1524 define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
1525 ; CHECK-LABEL: test_vqdmulh_lane_s16:
1526 ; CHECK: // %bb.0: // %entry
1527 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1528 ; CHECK-NEXT: sqdmulh v0.4h, v0.4h, v1.h[3]
1531 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1532 %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
1533 ret <4 x i16> %vqdmulh2.i
1536 define <4 x i16> @test_vqdmulh_lane_s16_intrinsic(<4 x i16> %a, <4 x i16> %v) {
1537 ; CHECK-LABEL: test_vqdmulh_lane_s16_intrinsic:
1538 ; CHECK: // %bb.0: // %entry
1539 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1540 ; CHECK-NEXT: sqdmulh v0.4h, v0.4h, v1.h[3]
1543 %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.lane.v4i16.v4i16(<4 x i16> %a, <4 x i16> %v, i32 3)
1544 ret <4 x i16> %vqdmulh2.i
1547 define <4 x i16> @test_vqdmulh_laneq_s16_intrinsic_lo(<4 x i16> %a, <8 x i16> %v) {
1548 ; CHECK-LABEL: test_vqdmulh_laneq_s16_intrinsic_lo:
1549 ; CHECK: // %bb.0: // %entry
1550 ; CHECK-NEXT: sqdmulh v0.4h, v0.4h, v1.h[3]
1553 %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v4i16.v8i16(<4 x i16> %a, <8 x i16> %v, i32 3)
1554 ret <4 x i16> %vqdmulh2.i
1557 define <4 x i16> @test_vqdmulh_laneq_s16_intrinsic_hi(<4 x i16> %a, <8 x i16> %v) {
1558 ; CHECK-LABEL: test_vqdmulh_laneq_s16_intrinsic_hi:
1559 ; CHECK: // %bb.0: // %entry
1560 ; CHECK-NEXT: sqdmulh v0.4h, v0.4h, v1.h[7]
1563 %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v4i16.v8i16(<4 x i16> %a, <8 x i16> %v, i32 7)
1564 ret <4 x i16> %vqdmulh2.i
1567 define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
1568 ; CHECK-LABEL: test_vqdmulhq_lane_s16:
1569 ; CHECK: // %bb.0: // %entry
1570 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1571 ; CHECK-NEXT: sqdmulh v0.8h, v0.8h, v1.h[3]
1574 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
1575 %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
1576 ret <8 x i16> %vqdmulh2.i
1579 define <8 x i16> @test_vqdmulhq_lane_s16_intrinsic(<8 x i16> %a, <4 x i16> %v) {
1580 ; CHECK-LABEL: test_vqdmulhq_lane_s16_intrinsic:
1581 ; CHECK: // %bb.0: // %entry
1582 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1583 ; CHECK-NEXT: sqdmulh v0.8h, v0.8h, v1.h[3]
1586 %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.lane.v8i16.v4i16(<8 x i16> %a, <4 x i16> %v, i32 3)
1587 ret <8 x i16> %vqdmulh2.i
1590 define <8 x i16> @test_vqdmulhq_laneq_s16_intrinsic_lo(<8 x i16> %a, <8 x i16> %v) {
1591 ; CHECK-LABEL: test_vqdmulhq_laneq_s16_intrinsic_lo:
1592 ; CHECK: // %bb.0: // %entry
1593 ; CHECK-NEXT: sqdmulh v0.8h, v0.8h, v1.h[3]
1596 %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v8i16.v8i16(<8 x i16> %a, <8 x i16> %v, i32 3)
1597 ret <8 x i16> %vqdmulh2.i
1600 define <8 x i16> @test_vqdmulhq_laneq_s16_intrinsic_hi(<8 x i16> %a, <8 x i16> %v) {
1601 ; CHECK-LABEL: test_vqdmulhq_laneq_s16_intrinsic_hi:
1602 ; CHECK: // %bb.0: // %entry
1603 ; CHECK-NEXT: sqdmulh v0.8h, v0.8h, v1.h[7]
1606 %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v8i16.v8i16(<8 x i16> %a, <8 x i16> %v, i32 7)
1607 ret <8 x i16> %vqdmulh2.i
1610 define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
1611 ; CHECK-LABEL: test_vqdmulh_lane_s32:
1612 ; CHECK: // %bb.0: // %entry
1613 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1614 ; CHECK-NEXT: sqdmulh v0.2s, v0.2s, v1.s[1]
1617 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1618 %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
1619 ret <2 x i32> %vqdmulh2.i
1622 define <2 x i32> @test_vqdmulh_lane_s32_intrinsic(<2 x i32> %a, <2 x i32> %v) {
1623 ; CHECK-LABEL: test_vqdmulh_lane_s32_intrinsic:
1624 ; CHECK: // %bb.0: // %entry
1625 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1626 ; CHECK-NEXT: sqdmulh v0.2s, v0.2s, v1.s[1]
1629 %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.lane.v2i32.v2i32(<2 x i32> %a, <2 x i32> %v, i32 1)
1630 ret <2 x i32> %vqdmulh2.i
1633 define <2 x i32> @test_vqdmulh_laneq_s32_intrinsic_lo(<2 x i32> %a, <4 x i32> %v) {
1634 ; CHECK-LABEL: test_vqdmulh_laneq_s32_intrinsic_lo:
1635 ; CHECK: // %bb.0: // %entry
1636 ; CHECK-NEXT: sqdmulh v0.2s, v0.2s, v1.s[1]
1639 %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v2i32.v4i32(<2 x i32> %a, <4 x i32> %v, i32 1)
1640 ret <2 x i32> %vqdmulh2.i
1643 define <2 x i32> @test_vqdmulh_laneq_s32_intrinsic_hi(<2 x i32> %a, <4 x i32> %v) {
1644 ; CHECK-LABEL: test_vqdmulh_laneq_s32_intrinsic_hi:
1645 ; CHECK: // %bb.0: // %entry
1646 ; CHECK-NEXT: sqdmulh v0.2s, v0.2s, v1.s[3]
1649 %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v2i32.v4i32(<2 x i32> %a, <4 x i32> %v, i32 3)
1650 ret <2 x i32> %vqdmulh2.i
1653 define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
1654 ; CHECK-LABEL: test_vqdmulhq_lane_s32:
1655 ; CHECK: // %bb.0: // %entry
1656 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1657 ; CHECK-NEXT: sqdmulh v0.4s, v0.4s, v1.s[1]
1660 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1661 %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
1662 ret <4 x i32> %vqdmulh2.i
1665 define <4 x i32> @test_vqdmulhq_lane_s32_intrinsic(<4 x i32> %a, <2 x i32> %v) {
1666 ; CHECK-LABEL: test_vqdmulhq_lane_s32_intrinsic:
1667 ; CHECK: // %bb.0: // %entry
1668 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1669 ; CHECK-NEXT: sqdmulh v0.4s, v0.4s, v1.s[1]
1672 %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.lane.v4i32.v2i32(<4 x i32> %a, <2 x i32> %v, i32 1)
1673 ret <4 x i32> %vqdmulh2.i
1676 define <4 x i32> @test_vqdmulhq_laneq_s32_intrinsic_lo(<4 x i32> %a, <4 x i32> %v) {
1677 ; CHECK-LABEL: test_vqdmulhq_laneq_s32_intrinsic_lo:
1678 ; CHECK: // %bb.0: // %entry
1679 ; CHECK-NEXT: sqdmulh v0.4s, v0.4s, v1.s[1]
1682 %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v4i32.v4i32(<4 x i32> %a, <4 x i32> %v, i32 1)
1683 ret <4 x i32> %vqdmulh2.i
1686 define <4 x i32> @test_vqdmulhq_laneq_s32_intrinsic_hi(<4 x i32> %a, <4 x i32> %v) {
1687 ; CHECK-LABEL: test_vqdmulhq_laneq_s32_intrinsic_hi:
1688 ; CHECK: // %bb.0: // %entry
1689 ; CHECK-NEXT: sqdmulh v0.4s, v0.4s, v1.s[3]
1692 %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v4i32.v4i32(<4 x i32> %a, <4 x i32> %v, i32 3)
1693 ret <4 x i32> %vqdmulh2.i
1696 define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
1697 ; CHECK-LABEL: test_vqrdmulh_lane_s16:
1698 ; CHECK: // %bb.0: // %entry
1699 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1700 ; CHECK-NEXT: sqrdmulh v0.4h, v0.4h, v1.h[3]
1703 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1704 %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
1705 ret <4 x i16> %vqrdmulh2.i
1708 define <4 x i16> @test_vqrdmulh_lane_s16_intrinsic(<4 x i16> %a, <4 x i16> %v) {
1709 ; CHECK-LABEL: test_vqrdmulh_lane_s16_intrinsic:
1710 ; CHECK: // %bb.0: // %entry
1711 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1712 ; CHECK-NEXT: sqrdmulh v0.4h, v0.4h, v1.h[3]
1715 %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.lane.v4i16.v4i16(<4 x i16> %a, <4 x i16> %v, i32 3)
1716 ret <4 x i16> %vqrdmulh2.i
1719 define <4 x i16> @test_vqrdmulh_laneq_s16_intrinsic_lo(<4 x i16> %a, <8 x i16> %v) {
1720 ; CHECK-LABEL: test_vqrdmulh_laneq_s16_intrinsic_lo:
1721 ; CHECK: // %bb.0: // %entry
1722 ; CHECK-NEXT: sqrdmulh v0.4h, v0.4h, v1.h[3]
1725 %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v4i16.v8i16(<4 x i16> %a, <8 x i16> %v, i32 3)
1726 ret <4 x i16> %vqrdmulh2.i
1729 define <4 x i16> @test_vqrdmulh_laneq_s16_intrinsic_hi(<4 x i16> %a, <8 x i16> %v) {
1730 ; CHECK-LABEL: test_vqrdmulh_laneq_s16_intrinsic_hi:
1731 ; CHECK: // %bb.0: // %entry
1732 ; CHECK-NEXT: sqrdmulh v0.4h, v0.4h, v1.h[7]
1735 %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v4i16.v8i16(<4 x i16> %a, <8 x i16> %v, i32 7)
1736 ret <4 x i16> %vqrdmulh2.i
1739 define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
1740 ; CHECK-LABEL: test_vqrdmulhq_lane_s16:
1741 ; CHECK: // %bb.0: // %entry
1742 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1743 ; CHECK-NEXT: sqrdmulh v0.8h, v0.8h, v1.h[3]
1746 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
1747 %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
1748 ret <8 x i16> %vqrdmulh2.i
1751 define <8 x i16> @test_vqrdmulhq_lane_s16_intrinsic(<8 x i16> %a, <4 x i16> %v) {
1752 ; CHECK-LABEL: test_vqrdmulhq_lane_s16_intrinsic:
1753 ; CHECK: // %bb.0: // %entry
1754 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1755 ; CHECK-NEXT: sqrdmulh v0.8h, v0.8h, v1.h[3]
1758 %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.lane.v8i16.v4i16(<8 x i16> %a, <4 x i16> %v, i32 3)
1759 ret <8 x i16> %vqrdmulh2.i
1762 define <8 x i16> @test_vqrdmulhq_laneq_s16_intrinsic_lo(<8 x i16> %a, <8 x i16> %v) {
1763 ; CHECK-LABEL: test_vqrdmulhq_laneq_s16_intrinsic_lo:
1764 ; CHECK: // %bb.0: // %entry
1765 ; CHECK-NEXT: sqrdmulh v0.8h, v0.8h, v1.h[3]
1768 %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v8i16.v8i16(<8 x i16> %a, <8 x i16> %v, i32 3)
1769 ret <8 x i16> %vqrdmulh2.i
1772 define <8 x i16> @test_vqrdmulhq_laneq_s16_intrinsic_hi(<8 x i16> %a, <8 x i16> %v) {
1773 ; CHECK-LABEL: test_vqrdmulhq_laneq_s16_intrinsic_hi:
1774 ; CHECK: // %bb.0: // %entry
1775 ; CHECK-NEXT: sqrdmulh v0.8h, v0.8h, v1.h[7]
1778 %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v8i16.v8i16(<8 x i16> %a, <8 x i16> %v, i32 7)
1779 ret <8 x i16> %vqrdmulh2.i
1782 define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
1783 ; CHECK-LABEL: test_vqrdmulh_lane_s32:
1784 ; CHECK: // %bb.0: // %entry
1785 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1786 ; CHECK-NEXT: sqrdmulh v0.2s, v0.2s, v1.s[1]
1789 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1790 %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
1791 ret <2 x i32> %vqrdmulh2.i
1794 define <2 x i32> @test_vqrdmulh_lane_s32_intrinsic(<2 x i32> %a, <2 x i32> %v) {
1795 ; CHECK-LABEL: test_vqrdmulh_lane_s32_intrinsic:
1796 ; CHECK: // %bb.0: // %entry
1797 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1798 ; CHECK-NEXT: sqrdmulh v0.2s, v0.2s, v1.s[1]
1801 %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.lane.v2i32.v2i32(<2 x i32> %a, <2 x i32> %v, i32 1)
1802 ret <2 x i32> %vqrdmulh2.i
1805 define <2 x i32> @test_vqrdmulh_laneq_s32_intrinsic_lo(<2 x i32> %a, <4 x i32> %v) {
1806 ; CHECK-LABEL: test_vqrdmulh_laneq_s32_intrinsic_lo:
1807 ; CHECK: // %bb.0: // %entry
1808 ; CHECK-NEXT: sqrdmulh v0.2s, v0.2s, v1.s[1]
1811 %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v2i32.v4i32(<2 x i32> %a, <4 x i32> %v, i32 1)
1812 ret <2 x i32> %vqrdmulh2.i
1815 define <2 x i32> @test_vqrdmulh_laneq_s32_intrinsic_hi(<2 x i32> %a, <4 x i32> %v) {
1816 ; CHECK-LABEL: test_vqrdmulh_laneq_s32_intrinsic_hi:
1817 ; CHECK: // %bb.0: // %entry
1818 ; CHECK-NEXT: sqrdmulh v0.2s, v0.2s, v1.s[3]
1821 %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v2i32.v4i32(<2 x i32> %a, <4 x i32> %v, i32 3)
1822 ret <2 x i32> %vqrdmulh2.i
1825 define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
1826 ; CHECK-LABEL: test_vqrdmulhq_lane_s32:
1827 ; CHECK: // %bb.0: // %entry
1828 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1829 ; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.s[1]
1832 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1833 %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
1834 ret <4 x i32> %vqrdmulh2.i
1837 define <4 x i32> @test_vqrdmulhq_lane_s32_intrinsic(<4 x i32> %a, <2 x i32> %v) {
1838 ; CHECK-LABEL: test_vqrdmulhq_lane_s32_intrinsic:
1839 ; CHECK: // %bb.0: // %entry
1840 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1841 ; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.s[1]
1844 %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.lane.v4i32.v2i32(<4 x i32> %a, <2 x i32> %v, i32 1)
1845 ret <4 x i32> %vqrdmulh2.i
1848 define <4 x i32> @test_vqrdmulhq_laneq_s32_intrinsic_lo(<4 x i32> %a, <4 x i32> %v) {
1849 ; CHECK-LABEL: test_vqrdmulhq_laneq_s32_intrinsic_lo:
1850 ; CHECK: // %bb.0: // %entry
1851 ; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.s[1]
1854 %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v4i32.v4i32(<4 x i32> %a, <4 x i32> %v, i32 1)
1855 ret <4 x i32> %vqrdmulh2.i
1858 define <4 x i32> @test_vqrdmulhq_laneq_s32_intrinsic_hi(<4 x i32> %a, <4 x i32> %v) {
1859 ; CHECK-LABEL: test_vqrdmulhq_laneq_s32_intrinsic_hi:
1860 ; CHECK: // %bb.0: // %entry
1861 ; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.s[3]
1864 %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v4i32.v4i32(<4 x i32> %a, <4 x i32> %v, i32 3)
1865 ret <4 x i32> %vqrdmulh2.i
1868 define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) {
1869 ; CHECK-LABEL: test_vmul_lane_f32:
1870 ; CHECK: // %bb.0: // %entry
1871 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1872 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[1]
1875 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
1876 %mul = fmul <2 x float> %shuffle, %a
1877 ret <2 x float> %mul
1880 define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) {
1881 ; CHECK-LABEL: test_vmul_lane_f64:
1882 ; CHECK: // %bb.0: // %entry
1883 ; CHECK-NEXT: fmul d0, d0, d1
1886 %0 = bitcast <1 x double> %a to <8 x i8>
1887 %1 = bitcast <8 x i8> %0 to double
1888 %extract = extractelement <1 x double> %v, i32 0
1889 %2 = fmul double %1, %extract
1890 %3 = insertelement <1 x double> undef, double %2, i32 0
1894 define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) {
1895 ; CHECK-LABEL: test_vmulq_lane_f32:
1896 ; CHECK: // %bb.0: // %entry
1897 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1898 ; CHECK-NEXT: fmul v0.4s, v0.4s, v1.s[1]
1901 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1902 %mul = fmul <4 x float> %shuffle, %a
1903 ret <4 x float> %mul
1906 define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) {
1907 ; CHECK-LABEL: test_vmulq_lane_f64:
1908 ; CHECK: // %bb.0: // %entry
1909 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1910 ; CHECK-NEXT: fmul v0.2d, v0.2d, v1.d[0]
1913 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
1914 %mul = fmul <2 x double> %shuffle, %a
1915 ret <2 x double> %mul
1918 define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) {
1919 ; CHECK-LABEL: test_vmul_laneq_f32:
1920 ; CHECK: // %bb.0: // %entry
1921 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[3]
1924 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
1925 %mul = fmul <2 x float> %shuffle, %a
1926 ret <2 x float> %mul
1929 define <2 x float> @test_vmul_laneq3_f32_bitcast(<2 x float> %a, <2 x double> %v) {
1930 ; CHECK-LABEL: test_vmul_laneq3_f32_bitcast:
1932 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[3]
1934 %extract = shufflevector <2 x double> %v, <2 x double> undef, <1 x i32> <i32 1>
1935 %bc = bitcast <1 x double> %extract to <2 x float>
1936 %splat = shufflevector <2 x float> %bc, <2 x float> undef, <2 x i32> <i32 1, i32 1>
1937 %mul = fmul <2 x float> %splat, %a
1938 ret <2 x float> %mul
1941 define <2 x float> @test_vmul_laneq2_f32_bitcast(<2 x float> %a, <2 x double> %v) {
1942 ; CHECK-LABEL: test_vmul_laneq2_f32_bitcast:
1944 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[2]
1946 %extract = shufflevector <2 x double> %v, <2 x double> undef, <1 x i32> <i32 1>
1947 %bc = bitcast <1 x double> %extract to <2 x float>
1948 %splat = shufflevector <2 x float> %bc, <2 x float> undef, <2 x i32> <i32 0, i32 0>
1949 %mul = fmul <2 x float> %splat, %a
1950 ret <2 x float> %mul
1953 define <4 x i16> @test_vadd_laneq5_i16_bitcast(<4 x i16> %a, <2 x double> %v) {
1954 ; CHECK-LABEL: test_vadd_laneq5_i16_bitcast:
1956 ; CHECK-NEXT: dup v1.4h, v1.h[5]
1957 ; CHECK-NEXT: add v0.4h, v1.4h, v0.4h
1959 %extract = shufflevector <2 x double> %v, <2 x double> undef, <1 x i32> <i32 1>
1960 %bc = bitcast <1 x double> %extract to <4 x i16>
1961 %splat = shufflevector <4 x i16> %bc, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1962 %r = add <4 x i16> %splat, %a
1966 ; TODO: The pattern in LowerVECTOR_SHUFFLE does not match what we are looking for.
1968 define <4 x i16> @test_vadd_lane2_i16_bitcast_bigger_aligned(<4 x i16> %a, <16 x i8> %v) {
1969 ; CHECK-LABEL: test_vadd_lane2_i16_bitcast_bigger_aligned:
1971 ; CHECK-NEXT: dup v1.4h, v1.h[2]
1972 ; CHECK-NEXT: dup v1.4h, v1.h[1]
1973 ; CHECK-NEXT: add v0.4h, v1.4h, v0.4h
1975 %extract = shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
1976 %bc = bitcast <8 x i8> %extract to <4 x i16>
1977 %splat = shufflevector <4 x i16> %bc, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1978 %r = add <4 x i16> %splat, %a
1982 define <4 x i16> @test_vadd_lane5_i16_bitcast_bigger_aligned(<4 x i16> %a, <16 x i8> %v) {
1983 ; CHECK-LABEL: test_vadd_lane5_i16_bitcast_bigger_aligned:
1985 ; CHECK-NEXT: dup v1.4h, v1.h[5]
1986 ; CHECK-NEXT: add v0.4h, v1.4h, v0.4h
1988 %extract = shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1989 %bc = bitcast <8 x i8> %extract to <4 x i16>
1990 %splat = shufflevector <4 x i16> %bc, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1991 %r = add <4 x i16> %splat, %a
1995 ; Negative test - can't dup bytes {3,4} of v8i16.
1997 define <4 x i16> @test_vadd_lane_i16_bitcast_bigger_unaligned(<4 x i16> %a, <16 x i8> %v) {
1998 ; CHECK-LABEL: test_vadd_lane_i16_bitcast_bigger_unaligned:
2000 ; CHECK-NEXT: ext v1.8b, v1.8b, v0.8b, #1
2001 ; CHECK-NEXT: dup v1.4h, v1.h[1]
2002 ; CHECK-NEXT: add v0.4h, v1.4h, v0.4h
2004 %extract = shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
2005 %bc = bitcast <8 x i8> %extract to <4 x i16>
2006 %splat = shufflevector <4 x i16> %bc, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2007 %r = add <4 x i16> %splat, %a
2011 define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) {
2012 ; CHECK-LABEL: test_vmul_laneq_f64:
2013 ; CHECK: // %bb.0: // %entry
2014 ; CHECK-NEXT: fmul d0, d0, v1.d[1]
2017 %0 = bitcast <1 x double> %a to <8 x i8>
2018 %1 = bitcast <8 x i8> %0 to double
2019 %extract = extractelement <2 x double> %v, i32 1
2020 %2 = fmul double %1, %extract
2021 %3 = insertelement <1 x double> undef, double %2, i32 0
2025 define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) {
2026 ; CHECK-LABEL: test_vmulq_laneq_f32:
2027 ; CHECK: // %bb.0: // %entry
2028 ; CHECK-NEXT: fmul v0.4s, v0.4s, v1.s[3]
2031 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
2032 %mul = fmul <4 x float> %shuffle, %a
2033 ret <4 x float> %mul
2036 define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) {
2037 ; CHECK-LABEL: test_vmulq_laneq_f64:
2038 ; CHECK: // %bb.0: // %entry
2039 ; CHECK-NEXT: fmul v0.2d, v0.2d, v1.d[1]
2042 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
2043 %mul = fmul <2 x double> %shuffle, %a
2044 ret <2 x double> %mul
2047 define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) {
2048 ; CHECK-LABEL: test_vmulx_lane_f32:
2049 ; CHECK: // %bb.0: // %entry
2050 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
2051 ; CHECK-NEXT: fmulx v0.2s, v0.2s, v1.s[1]
2054 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
2055 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
2056 ret <2 x float> %vmulx2.i
2059 define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) {
2060 ; CHECK-LABEL: test_vmulxq_lane_f32:
2061 ; CHECK: // %bb.0: // %entry
2062 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
2063 ; CHECK-NEXT: fmulx v0.4s, v0.4s, v1.s[1]
2066 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2067 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
2068 ret <4 x float> %vmulx2.i
2071 define <1 x double> @test_vmulx_lane_f64(<1 x double> %a, <1 x double> %v) {
2072 ; CHECK-LABEL: test_vmulx_lane_f64:
2073 ; CHECK: // %bb.0: // %entry
2074 ; CHECK-NEXT: fmulx d0, d0, d1
2077 %vget_lane = extractelement <1 x double> %a, i64 0
2078 %vget_lane3 = extractelement <1 x double> %v, i64 0
2079 %vmulxd_f64.i = tail call double @llvm.aarch64.neon.fmulx.f64(double %vget_lane, double %vget_lane3)
2080 %vset_lane = insertelement <1 x double> poison, double %vmulxd_f64.i, i64 0
2081 ret <1 x double> %vset_lane
2084 define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) {
2085 ; CHECK-LABEL: test_vmulxq_lane_f64:
2086 ; CHECK: // %bb.0: // %entry
2087 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
2088 ; CHECK-NEXT: fmulx v0.2d, v0.2d, v1.d[0]
2091 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
2092 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
2093 ret <2 x double> %vmulx2.i
2096 define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) {
2097 ; CHECK-LABEL: test_vmulx_laneq_f32:
2098 ; CHECK: // %bb.0: // %entry
2099 ; CHECK-NEXT: fmulx v0.2s, v0.2s, v1.s[3]
2102 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
2103 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
2104 ret <2 x float> %vmulx2.i
2107 define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) {
2108 ; CHECK-LABEL: test_vmulxq_laneq_f32:
2109 ; CHECK: // %bb.0: // %entry
2110 ; CHECK-NEXT: fmulx v0.4s, v0.4s, v1.s[3]
2113 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
2114 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
2115 ret <4 x float> %vmulx2.i
2118 define <1 x double> @test_vmulx_laneq_f64(<1 x double> %a, <2 x double> %v) {
2119 ; CHECK-LABEL: test_vmulx_laneq_f64:
2120 ; CHECK: // %bb.0: // %entry
2121 ; CHECK-NEXT: fmulx d0, d0, v1.d[1]
2124 %vget_lane = extractelement <1 x double> %a, i64 0
2125 %vgetq_lane = extractelement <2 x double> %v, i64 1
2126 %vmulxd_f64.i = tail call double @llvm.aarch64.neon.fmulx.f64(double %vget_lane, double %vgetq_lane)
2127 %vset_lane = insertelement <1 x double> poison, double %vmulxd_f64.i, i64 0
2128 ret <1 x double> %vset_lane
2131 define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) {
2132 ; CHECK-LABEL: test_vmulxq_laneq_f64:
2133 ; CHECK: // %bb.0: // %entry
2134 ; CHECK-NEXT: fmulx v0.2d, v0.2d, v1.d[1]
2137 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
2138 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
2139 ret <2 x double> %vmulx2.i
2142 define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
2143 ; CHECK-LABEL: test_vmla_lane_s16_0:
2144 ; CHECK: // %bb.0: // %entry
2145 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2146 ; CHECK-NEXT: mla v0.4h, v1.4h, v2.h[0]
2149 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2150 %mul = mul <4 x i16> %shuffle, %b
2151 %add = add <4 x i16> %mul, %a
2155 define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
2156 ; CHECK-LABEL: test_vmlaq_lane_s16_0:
2157 ; CHECK: // %bb.0: // %entry
2158 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2159 ; CHECK-NEXT: mla v0.8h, v1.8h, v2.h[0]
2162 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
2163 %mul = mul <8 x i16> %shuffle, %b
2164 %add = add <8 x i16> %mul, %a
2168 define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
2169 ; CHECK-LABEL: test_vmla_lane_s32_0:
2170 ; CHECK: // %bb.0: // %entry
2171 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2172 ; CHECK-NEXT: mla v0.2s, v1.2s, v2.s[0]
2175 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2176 %mul = mul <2 x i32> %shuffle, %b
2177 %add = add <2 x i32> %mul, %a
2181 define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
2182 ; CHECK-LABEL: test_vmlaq_lane_s32_0:
2183 ; CHECK: // %bb.0: // %entry
2184 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2185 ; CHECK-NEXT: mla v0.4s, v1.4s, v2.s[0]
2188 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
2189 %mul = mul <4 x i32> %shuffle, %b
2190 %add = add <4 x i32> %mul, %a
2194 define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
2195 ; CHECK-LABEL: test_vmla_laneq_s16_0:
2196 ; CHECK: // %bb.0: // %entry
2197 ; CHECK-NEXT: mla v0.4h, v1.4h, v2.h[0]
2200 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2201 %mul = mul <4 x i16> %shuffle, %b
2202 %add = add <4 x i16> %mul, %a
2206 define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
2207 ; CHECK-LABEL: test_vmlaq_laneq_s16_0:
2208 ; CHECK: // %bb.0: // %entry
2209 ; CHECK-NEXT: mla v0.8h, v1.8h, v2.h[0]
2212 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
2213 %mul = mul <8 x i16> %shuffle, %b
2214 %add = add <8 x i16> %mul, %a
2218 define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
2219 ; CHECK-LABEL: test_vmla_laneq_s32_0:
2220 ; CHECK: // %bb.0: // %entry
2221 ; CHECK-NEXT: mla v0.2s, v1.2s, v2.s[0]
2224 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2225 %mul = mul <2 x i32> %shuffle, %b
2226 %add = add <2 x i32> %mul, %a
2230 define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
2231 ; CHECK-LABEL: test_vmlaq_laneq_s32_0:
2232 ; CHECK: // %bb.0: // %entry
2233 ; CHECK-NEXT: mla v0.4s, v1.4s, v2.s[0]
2236 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
2237 %mul = mul <4 x i32> %shuffle, %b
2238 %add = add <4 x i32> %mul, %a
2242 define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
2243 ; CHECK-LABEL: test_vmls_lane_s16_0:
2244 ; CHECK: // %bb.0: // %entry
2245 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2246 ; CHECK-NEXT: mls v0.4h, v1.4h, v2.h[0]
2249 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2250 %mul = mul <4 x i16> %shuffle, %b
2251 %sub = sub <4 x i16> %a, %mul
2255 define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
2256 ; CHECK-LABEL: test_vmlsq_lane_s16_0:
2257 ; CHECK: // %bb.0: // %entry
2258 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2259 ; CHECK-NEXT: mls v0.8h, v1.8h, v2.h[0]
2262 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
2263 %mul = mul <8 x i16> %shuffle, %b
2264 %sub = sub <8 x i16> %a, %mul
2268 define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
2269 ; CHECK-LABEL: test_vmls_lane_s32_0:
2270 ; CHECK: // %bb.0: // %entry
2271 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2272 ; CHECK-NEXT: mls v0.2s, v1.2s, v2.s[0]
2275 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2276 %mul = mul <2 x i32> %shuffle, %b
2277 %sub = sub <2 x i32> %a, %mul
2281 define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
2282 ; CHECK-LABEL: test_vmlsq_lane_s32_0:
2283 ; CHECK: // %bb.0: // %entry
2284 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2285 ; CHECK-NEXT: mls v0.4s, v1.4s, v2.s[0]
2288 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
2289 %mul = mul <4 x i32> %shuffle, %b
2290 %sub = sub <4 x i32> %a, %mul
2294 define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
2295 ; CHECK-LABEL: test_vmls_laneq_s16_0:
2296 ; CHECK: // %bb.0: // %entry
2297 ; CHECK-NEXT: mls v0.4h, v1.4h, v2.h[0]
2300 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2301 %mul = mul <4 x i16> %shuffle, %b
2302 %sub = sub <4 x i16> %a, %mul
2306 define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
2307 ; CHECK-LABEL: test_vmlsq_laneq_s16_0:
2308 ; CHECK: // %bb.0: // %entry
2309 ; CHECK-NEXT: mls v0.8h, v1.8h, v2.h[0]
2312 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
2313 %mul = mul <8 x i16> %shuffle, %b
2314 %sub = sub <8 x i16> %a, %mul
2318 define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
2319 ; CHECK-LABEL: test_vmls_laneq_s32_0:
2320 ; CHECK: // %bb.0: // %entry
2321 ; CHECK-NEXT: mls v0.2s, v1.2s, v2.s[0]
2324 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2325 %mul = mul <2 x i32> %shuffle, %b
2326 %sub = sub <2 x i32> %a, %mul
2330 define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
2331 ; CHECK-LABEL: test_vmlsq_laneq_s32_0:
2332 ; CHECK: // %bb.0: // %entry
2333 ; CHECK-NEXT: mls v0.4s, v1.4s, v2.s[0]
2336 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
2337 %mul = mul <4 x i32> %shuffle, %b
2338 %sub = sub <4 x i32> %a, %mul
2342 define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
2343 ; CHECK-LABEL: test_vmul_lane_s16_0:
2344 ; CHECK: // %bb.0: // %entry
2345 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
2346 ; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[0]
2349 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2350 %mul = mul <4 x i16> %shuffle, %a
2354 define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
2355 ; CHECK-LABEL: test_vmulq_lane_s16_0:
2356 ; CHECK: // %bb.0: // %entry
2357 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
2358 ; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[0]
2361 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
2362 %mul = mul <8 x i16> %shuffle, %a
2366 define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
2367 ; CHECK-LABEL: test_vmul_lane_s32_0:
2368 ; CHECK: // %bb.0: // %entry
2369 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
2370 ; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[0]
2373 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2374 %mul = mul <2 x i32> %shuffle, %a
2378 define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
2379 ; CHECK-LABEL: test_vmulq_lane_s32_0:
2380 ; CHECK: // %bb.0: // %entry
2381 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
2382 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[0]
2385 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
2386 %mul = mul <4 x i32> %shuffle, %a
2390 define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
2391 ; CHECK-LABEL: test_vmul_lane_u16_0:
2392 ; CHECK: // %bb.0: // %entry
2393 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
2394 ; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[0]
2397 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2398 %mul = mul <4 x i16> %shuffle, %a
2402 define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
2403 ; CHECK-LABEL: test_vmulq_lane_u16_0:
2404 ; CHECK: // %bb.0: // %entry
2405 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
2406 ; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[0]
2409 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
2410 %mul = mul <8 x i16> %shuffle, %a
2414 define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
2415 ; CHECK-LABEL: test_vmul_lane_u32_0:
2416 ; CHECK: // %bb.0: // %entry
2417 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
2418 ; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[0]
2421 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2422 %mul = mul <2 x i32> %shuffle, %a
2426 define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
2427 ; CHECK-LABEL: test_vmulq_lane_u32_0:
2428 ; CHECK: // %bb.0: // %entry
2429 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
2430 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[0]
2433 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
2434 %mul = mul <4 x i32> %shuffle, %a
2438 define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
2439 ; CHECK-LABEL: test_vmul_laneq_s16_0:
2440 ; CHECK: // %bb.0: // %entry
2441 ; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[0]
2444 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2445 %mul = mul <4 x i16> %shuffle, %a
2449 define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
2450 ; CHECK-LABEL: test_vmulq_laneq_s16_0:
2451 ; CHECK: // %bb.0: // %entry
2452 ; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[0]
2455 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
2456 %mul = mul <8 x i16> %shuffle, %a
2460 define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
2461 ; CHECK-LABEL: test_vmul_laneq_s32_0:
2462 ; CHECK: // %bb.0: // %entry
2463 ; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[0]
2466 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2467 %mul = mul <2 x i32> %shuffle, %a
2471 define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
2472 ; CHECK-LABEL: test_vmulq_laneq_s32_0:
2473 ; CHECK: // %bb.0: // %entry
2474 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[0]
2477 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
2478 %mul = mul <4 x i32> %shuffle, %a
2482 define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
2483 ; CHECK-LABEL: test_vmul_laneq_u16_0:
2484 ; CHECK: // %bb.0: // %entry
2485 ; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[0]
2488 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2489 %mul = mul <4 x i16> %shuffle, %a
2493 define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) {
2494 ; CHECK-LABEL: test_vmulq_laneq_u16_0:
2495 ; CHECK: // %bb.0: // %entry
2496 ; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[0]
2499 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
2500 %mul = mul <8 x i16> %shuffle, %a
2504 define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
2505 ; CHECK-LABEL: test_vmul_laneq_u32_0:
2506 ; CHECK: // %bb.0: // %entry
2507 ; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[0]
2510 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2511 %mul = mul <2 x i32> %shuffle, %a
2515 define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) {
2516 ; CHECK-LABEL: test_vmulq_laneq_u32_0:
2517 ; CHECK: // %bb.0: // %entry
2518 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[0]
2521 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
2522 %mul = mul <4 x i32> %shuffle, %a
2526 define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
2527 ; CHECK-LABEL: test_vfma_lane_f32_0:
2528 ; CHECK: // %bb.0: // %entry
2529 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2530 ; CHECK-NEXT: fmla v0.2s, v1.2s, v2.s[0]
2533 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
2534 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
2538 define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
2539 ; CHECK-LABEL: test_vfmaq_lane_f32_0:
2540 ; CHECK: // %bb.0: // %entry
2541 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2542 ; CHECK-NEXT: fmla v0.4s, v1.4s, v2.s[0]
2545 %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
2546 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
2550 define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
2551 ; CHECK-LABEL: test_vfma_laneq_f32_0:
2552 ; CHECK: // %bb.0: // %entry
2553 ; CHECK-NEXT: fmla v0.2s, v1.2s, v2.s[0]
2556 %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
2557 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
2561 define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
2562 ; CHECK-LABEL: test_vfmaq_laneq_f32_0:
2563 ; CHECK: // %bb.0: // %entry
2564 ; CHECK-NEXT: fmla v0.4s, v1.4s, v2.s[0]
2567 %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
2568 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
2572 define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
2573 ; CHECK-LABEL: test_vfms_lane_f32_0:
2574 ; CHECK: // %bb.0: // %entry
2575 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2576 ; CHECK-NEXT: fmls v0.2s, v1.2s, v2.s[0]
2579 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
2580 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer
2581 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
2585 define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
2586 ; CHECK-LABEL: test_vfmsq_lane_f32_0:
2587 ; CHECK: // %bb.0: // %entry
2588 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2589 ; CHECK-NEXT: fmls v0.4s, v1.4s, v2.s[0]
2592 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
2593 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer
2594 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
2598 define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
2599 ; CHECK-LABEL: test_vfms_laneq_f32_0:
2600 ; CHECK: // %bb.0: // %entry
2601 ; CHECK-NEXT: fmls v0.2s, v1.2s, v2.s[0]
2604 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
2605 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer
2606 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
2610 define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
2611 ; CHECK-LABEL: test_vfmsq_laneq_f32_0:
2612 ; CHECK: // %bb.0: // %entry
2613 ; CHECK-NEXT: fmls v0.4s, v1.4s, v2.s[0]
2616 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
2617 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer
2618 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
2622 define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
2623 ; CHECK-LABEL: test_vfmaq_laneq_f64_0:
2624 ; CHECK: // %bb.0: // %entry
2625 ; CHECK-NEXT: fmla v0.2d, v1.2d, v2.d[0]
2628 %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
2629 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
2633 define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
2634 ; CHECK-LABEL: test_vfmsq_laneq_f64_0:
2635 ; CHECK: // %bb.0: // %entry
2636 ; CHECK-NEXT: fmls v0.2d, v1.2d, v2.d[0]
2639 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
2640 %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer
2641 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
2645 define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
2646 ; CHECK-LABEL: test_vmlal_lane_s16_0:
2647 ; CHECK: // %bb.0: // %entry
2648 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2649 ; CHECK-NEXT: smlal v0.4s, v1.4h, v2.h[0]
2652 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2653 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2654 %add = add <4 x i32> %vmull2.i, %a
2658 define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
2659 ; CHECK-LABEL: test_vmlal_lane_s32_0:
2660 ; CHECK: // %bb.0: // %entry
2661 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2662 ; CHECK-NEXT: smlal v0.2d, v1.2s, v2.s[0]
2665 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2666 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2667 %add = add <2 x i64> %vmull2.i, %a
2671 define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
2672 ; CHECK-LABEL: test_vmlal_laneq_s16_0:
2673 ; CHECK: // %bb.0: // %entry
2674 ; CHECK-NEXT: smlal v0.4s, v1.4h, v2.h[0]
2677 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2678 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2679 %add = add <4 x i32> %vmull2.i, %a
2683 define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
2684 ; CHECK-LABEL: test_vmlal_laneq_s32_0:
2685 ; CHECK: // %bb.0: // %entry
2686 ; CHECK-NEXT: smlal v0.2d, v1.2s, v2.s[0]
2689 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2690 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2691 %add = add <2 x i64> %vmull2.i, %a
2695 define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
2696 ; CHECK-LABEL: test_vmlal_high_lane_s16_0:
2697 ; CHECK: // %bb.0: // %entry
2698 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2699 ; CHECK-NEXT: smlal2 v0.4s, v1.8h, v2.h[0]
2702 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2703 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2704 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2705 %add = add <4 x i32> %vmull2.i, %a
2709 define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
2710 ; CHECK-LABEL: test_vmlal_high_lane_s32_0:
2711 ; CHECK: // %bb.0: // %entry
2712 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2713 ; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.s[0]
2716 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2717 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2718 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2719 %add = add <2 x i64> %vmull2.i, %a
2723 define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
2724 ; CHECK-LABEL: test_vmlal_high_laneq_s16_0:
2725 ; CHECK: // %bb.0: // %entry
2726 ; CHECK-NEXT: smlal2 v0.4s, v1.8h, v2.h[0]
2729 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2730 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2731 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2732 %add = add <4 x i32> %vmull2.i, %a
2736 define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
2737 ; CHECK-LABEL: test_vmlal_high_laneq_s32_0:
2738 ; CHECK: // %bb.0: // %entry
2739 ; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.s[0]
2742 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2743 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2744 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2745 %add = add <2 x i64> %vmull2.i, %a
2749 define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
2750 ; CHECK-LABEL: test_vmlsl_lane_s16_0:
2751 ; CHECK: // %bb.0: // %entry
2752 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2753 ; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.h[0]
2756 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2757 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2758 %sub = sub <4 x i32> %a, %vmull2.i
2762 define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
2763 ; CHECK-LABEL: test_vmlsl_lane_s32_0:
2764 ; CHECK: // %bb.0: // %entry
2765 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2766 ; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.s[0]
2769 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2770 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2771 %sub = sub <2 x i64> %a, %vmull2.i
2775 define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
2776 ; CHECK-LABEL: test_vmlsl_laneq_s16_0:
2777 ; CHECK: // %bb.0: // %entry
2778 ; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.h[0]
2781 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2782 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2783 %sub = sub <4 x i32> %a, %vmull2.i
2787 define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
2788 ; CHECK-LABEL: test_vmlsl_laneq_s32_0:
2789 ; CHECK: // %bb.0: // %entry
2790 ; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.s[0]
2793 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2794 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2795 %sub = sub <2 x i64> %a, %vmull2.i
2799 define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
2800 ; CHECK-LABEL: test_vmlsl_high_lane_s16_0:
2801 ; CHECK: // %bb.0: // %entry
2802 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2803 ; CHECK-NEXT: smlsl2 v0.4s, v1.8h, v2.h[0]
2806 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2807 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2808 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2809 %sub = sub <4 x i32> %a, %vmull2.i
2813 define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
2814 ; CHECK-LABEL: test_vmlsl_high_lane_s32_0:
2815 ; CHECK: // %bb.0: // %entry
2816 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2817 ; CHECK-NEXT: smlsl2 v0.2d, v1.4s, v2.s[0]
2820 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2821 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2822 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2823 %sub = sub <2 x i64> %a, %vmull2.i
2827 define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
2828 ; CHECK-LABEL: test_vmlsl_high_laneq_s16_0:
2829 ; CHECK: // %bb.0: // %entry
2830 ; CHECK-NEXT: smlsl2 v0.4s, v1.8h, v2.h[0]
2833 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2834 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2835 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2836 %sub = sub <4 x i32> %a, %vmull2.i
2840 define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
2841 ; CHECK-LABEL: test_vmlsl_high_laneq_s32_0:
2842 ; CHECK: // %bb.0: // %entry
2843 ; CHECK-NEXT: smlsl2 v0.2d, v1.4s, v2.s[0]
2846 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2847 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2848 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2849 %sub = sub <2 x i64> %a, %vmull2.i
2853 define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
2854 ; CHECK-LABEL: test_vmlal_lane_u16_0:
2855 ; CHECK: // %bb.0: // %entry
2856 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2857 ; CHECK-NEXT: umlal v0.4s, v1.4h, v2.h[0]
2860 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2861 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2862 %add = add <4 x i32> %vmull2.i, %a
2866 define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
2867 ; CHECK-LABEL: test_vmlal_lane_u32_0:
2868 ; CHECK: // %bb.0: // %entry
2869 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2870 ; CHECK-NEXT: umlal v0.2d, v1.2s, v2.s[0]
2873 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2874 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2875 %add = add <2 x i64> %vmull2.i, %a
2879 define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
2880 ; CHECK-LABEL: test_vmlal_laneq_u16_0:
2881 ; CHECK: // %bb.0: // %entry
2882 ; CHECK-NEXT: umlal v0.4s, v1.4h, v2.h[0]
2885 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2886 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2887 %add = add <4 x i32> %vmull2.i, %a
2891 define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
2892 ; CHECK-LABEL: test_vmlal_laneq_u32_0:
2893 ; CHECK: // %bb.0: // %entry
2894 ; CHECK-NEXT: umlal v0.2d, v1.2s, v2.s[0]
2897 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2898 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2899 %add = add <2 x i64> %vmull2.i, %a
2903 define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
2904 ; CHECK-LABEL: test_vmlal_high_lane_u16_0:
2905 ; CHECK: // %bb.0: // %entry
2906 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2907 ; CHECK-NEXT: umlal2 v0.4s, v1.8h, v2.h[0]
2910 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2911 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2912 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2913 %add = add <4 x i32> %vmull2.i, %a
2917 define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
2918 ; CHECK-LABEL: test_vmlal_high_lane_u32_0:
2919 ; CHECK: // %bb.0: // %entry
2920 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2921 ; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.s[0]
2924 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2925 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2926 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2927 %add = add <2 x i64> %vmull2.i, %a
2931 define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
2932 ; CHECK-LABEL: test_vmlal_high_laneq_u16_0:
2933 ; CHECK: // %bb.0: // %entry
2934 ; CHECK-NEXT: umlal2 v0.4s, v1.8h, v2.h[0]
2937 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2938 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2939 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2940 %add = add <4 x i32> %vmull2.i, %a
2944 define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
2945 ; CHECK-LABEL: test_vmlal_high_laneq_u32_0:
2946 ; CHECK: // %bb.0: // %entry
2947 ; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.s[0]
2950 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2951 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2952 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2953 %add = add <2 x i64> %vmull2.i, %a
2957 define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
2958 ; CHECK-LABEL: test_vmlsl_lane_u16_0:
2959 ; CHECK: // %bb.0: // %entry
2960 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2961 ; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.h[0]
2964 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2965 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2966 %sub = sub <4 x i32> %a, %vmull2.i
2970 define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
2971 ; CHECK-LABEL: test_vmlsl_lane_u32_0:
2972 ; CHECK: // %bb.0: // %entry
2973 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2974 ; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.s[0]
2977 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2978 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2979 %sub = sub <2 x i64> %a, %vmull2.i
2983 define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
2984 ; CHECK-LABEL: test_vmlsl_laneq_u16_0:
2985 ; CHECK: // %bb.0: // %entry
2986 ; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.h[0]
2989 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2990 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2991 %sub = sub <4 x i32> %a, %vmull2.i
2995 define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
2996 ; CHECK-LABEL: test_vmlsl_laneq_u32_0:
2997 ; CHECK: // %bb.0: // %entry
2998 ; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.s[0]
3001 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
3002 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
3003 %sub = sub <2 x i64> %a, %vmull2.i
3007 define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
3008 ; CHECK-LABEL: test_vmlsl_high_lane_u16_0:
3009 ; CHECK: // %bb.0: // %entry
3010 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
3011 ; CHECK-NEXT: umlsl2 v0.4s, v1.8h, v2.h[0]
3014 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3015 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3016 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
3017 %sub = sub <4 x i32> %a, %vmull2.i
3021 define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
3022 ; CHECK-LABEL: test_vmlsl_high_lane_u32_0:
3023 ; CHECK: // %bb.0: // %entry
3024 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
3025 ; CHECK-NEXT: umlsl2 v0.2d, v1.4s, v2.s[0]
3028 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
3029 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3030 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
3031 %sub = sub <2 x i64> %a, %vmull2.i
3035 define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
3036 ; CHECK-LABEL: test_vmlsl_high_laneq_u16_0:
3037 ; CHECK: // %bb.0: // %entry
3038 ; CHECK-NEXT: umlsl2 v0.4s, v1.8h, v2.h[0]
3041 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3042 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
3043 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
3044 %sub = sub <4 x i32> %a, %vmull2.i
3048 define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
3049 ; CHECK-LABEL: test_vmlsl_high_laneq_u32_0:
3050 ; CHECK: // %bb.0: // %entry
3051 ; CHECK-NEXT: umlsl2 v0.2d, v1.4s, v2.s[0]
3054 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
3055 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
3056 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
3057 %sub = sub <2 x i64> %a, %vmull2.i
3061 define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
3062 ; CHECK-LABEL: test_vmull_lane_s16_0:
3063 ; CHECK: // %bb.0: // %entry
3064 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3065 ; CHECK-NEXT: smull v0.4s, v0.4h, v1.h[0]
3068 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3069 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
3070 ret <4 x i32> %vmull2.i
3073 define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
3074 ; CHECK-LABEL: test_vmull_lane_s32_0:
3075 ; CHECK: // %bb.0: // %entry
3076 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3077 ; CHECK-NEXT: smull v0.2d, v0.2s, v1.s[0]
3080 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3081 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
3082 ret <2 x i64> %vmull2.i
3085 define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
3086 ; CHECK-LABEL: test_vmull_lane_u16_0:
3087 ; CHECK: // %bb.0: // %entry
3088 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3089 ; CHECK-NEXT: umull v0.4s, v0.4h, v1.h[0]
3092 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3093 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
3094 ret <4 x i32> %vmull2.i
3097 define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
3098 ; CHECK-LABEL: test_vmull_lane_u32_0:
3099 ; CHECK: // %bb.0: // %entry
3100 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3101 ; CHECK-NEXT: umull v0.2d, v0.2s, v1.s[0]
3104 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3105 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
3106 ret <2 x i64> %vmull2.i
3109 define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
3110 ; CHECK-LABEL: test_vmull_high_lane_s16_0:
3111 ; CHECK: // %bb.0: // %entry
3112 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3113 ; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.h[0]
3116 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3117 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3118 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
3119 ret <4 x i32> %vmull2.i
3122 define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
3123 ; CHECK-LABEL: test_vmull_high_lane_s32_0:
3124 ; CHECK: // %bb.0: // %entry
3125 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3126 ; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.s[0]
3129 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
3130 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3131 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
3132 ret <2 x i64> %vmull2.i
3135 define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
3136 ; CHECK-LABEL: test_vmull_high_lane_u16_0:
3137 ; CHECK: // %bb.0: // %entry
3138 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3139 ; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.h[0]
3142 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3143 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3144 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
3145 ret <4 x i32> %vmull2.i
3148 define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
3149 ; CHECK-LABEL: test_vmull_high_lane_u32_0:
3150 ; CHECK: // %bb.0: // %entry
3151 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3152 ; CHECK-NEXT: umull2 v0.2d, v0.4s, v1.s[0]
3155 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
3156 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3157 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
3158 ret <2 x i64> %vmull2.i
3161 define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
3162 ; CHECK-LABEL: test_vmull_laneq_s16_0:
3163 ; CHECK: // %bb.0: // %entry
3164 ; CHECK-NEXT: smull v0.4s, v0.4h, v1.h[0]
3167 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
3168 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
3169 ret <4 x i32> %vmull2.i
3172 define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
3173 ; CHECK-LABEL: test_vmull_laneq_s32_0:
3174 ; CHECK: // %bb.0: // %entry
3175 ; CHECK-NEXT: smull v0.2d, v0.2s, v1.s[0]
3178 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
3179 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
3180 ret <2 x i64> %vmull2.i
3183 define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
3184 ; CHECK-LABEL: test_vmull_laneq_u16_0:
3185 ; CHECK: // %bb.0: // %entry
3186 ; CHECK-NEXT: umull v0.4s, v0.4h, v1.h[0]
3189 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
3190 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
3191 ret <4 x i32> %vmull2.i
3194 define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
3195 ; CHECK-LABEL: test_vmull_laneq_u32_0:
3196 ; CHECK: // %bb.0: // %entry
3197 ; CHECK-NEXT: umull v0.2d, v0.2s, v1.s[0]
3200 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
3201 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
3202 ret <2 x i64> %vmull2.i
3205 define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
3206 ; CHECK-LABEL: test_vmull_high_laneq_s16_0:
3207 ; CHECK: // %bb.0: // %entry
3208 ; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.h[0]
3211 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3212 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
3213 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
3214 ret <4 x i32> %vmull2.i
3217 define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
3218 ; CHECK-LABEL: test_vmull_high_laneq_s32_0:
3219 ; CHECK: // %bb.0: // %entry
3220 ; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.s[0]
3223 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
3224 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
3225 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
3226 ret <2 x i64> %vmull2.i
3229 define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) {
3230 ; CHECK-LABEL: test_vmull_high_laneq_u16_0:
3231 ; CHECK: // %bb.0: // %entry
3232 ; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.h[0]
3235 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3236 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
3237 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
3238 ret <4 x i32> %vmull2.i
3241 define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) {
3242 ; CHECK-LABEL: test_vmull_high_laneq_u32_0:
3243 ; CHECK: // %bb.0: // %entry
3244 ; CHECK-NEXT: umull2 v0.2d, v0.4s, v1.s[0]
3247 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
3248 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
3249 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
3250 ret <2 x i64> %vmull2.i
3253 define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
3254 ; CHECK-LABEL: test_vqdmlal_lane_s16_0:
3255 ; CHECK: // %bb.0: // %entry
3256 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
3257 ; CHECK-NEXT: sqdmlal v0.4s, v1.4h, v2.h[0]
3260 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3261 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
3262 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
3263 ret <4 x i32> %vqdmlal4.i
3266 define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
3267 ; CHECK-LABEL: test_vqdmlal_lane_s32_0:
3268 ; CHECK: // %bb.0: // %entry
3269 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
3270 ; CHECK-NEXT: sqdmlal v0.2d, v1.2s, v2.s[0]
3273 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3274 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
3275 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
3276 ret <2 x i64> %vqdmlal4.i
3279 define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
3280 ; CHECK-LABEL: test_vqdmlal_high_lane_s16_0:
3281 ; CHECK: // %bb.0: // %entry
3282 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
3283 ; CHECK-NEXT: sqdmlal2 v0.4s, v1.8h, v2.h[0]
3286 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3287 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3288 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
3289 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
3290 ret <4 x i32> %vqdmlal4.i
3293 define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
3294 ; CHECK-LABEL: test_vqdmlal_high_lane_s32_0:
3295 ; CHECK: // %bb.0: // %entry
3296 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
3297 ; CHECK-NEXT: sqdmlal2 v0.2d, v1.4s, v2.s[0]
3300 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
3301 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3302 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
3303 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
3304 ret <2 x i64> %vqdmlal4.i
3307 define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
3308 ; CHECK-LABEL: test_vqdmlsl_lane_s16_0:
3309 ; CHECK: // %bb.0: // %entry
3310 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
3311 ; CHECK-NEXT: sqdmlsl v0.4s, v1.4h, v2.h[0]
3314 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3315 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
3316 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
3317 ret <4 x i32> %vqdmlsl4.i
3320 define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
3321 ; CHECK-LABEL: test_vqdmlsl_lane_s32_0:
3322 ; CHECK: // %bb.0: // %entry
3323 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
3324 ; CHECK-NEXT: sqdmlsl v0.2d, v1.2s, v2.s[0]
3327 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3328 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
3329 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
3330 ret <2 x i64> %vqdmlsl4.i
3333 define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
3334 ; CHECK-LABEL: test_vqdmlsl_high_lane_s16_0:
3335 ; CHECK: // %bb.0: // %entry
3336 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
3337 ; CHECK-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.h[0]
3340 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3341 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3342 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
3343 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
3344 ret <4 x i32> %vqdmlsl4.i
3347 define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
3348 ; CHECK-LABEL: test_vqdmlsl_high_lane_s32_0:
3349 ; CHECK: // %bb.0: // %entry
3350 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
3351 ; CHECK-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.s[0]
3354 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
3355 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3356 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
3357 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
3358 ret <2 x i64> %vqdmlsl4.i
3361 define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
3362 ; CHECK-LABEL: test_vqdmull_lane_s16_0:
3363 ; CHECK: // %bb.0: // %entry
3364 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3365 ; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.h[0]
3368 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3369 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
3370 ret <4 x i32> %vqdmull2.i
3373 define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
3374 ; CHECK-LABEL: test_vqdmull_lane_s32_0:
3375 ; CHECK: // %bb.0: // %entry
3376 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3377 ; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.s[0]
3380 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3381 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
3382 ret <2 x i64> %vqdmull2.i
3385 define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
3386 ; CHECK-LABEL: test_vqdmull_laneq_s16_0:
3387 ; CHECK: // %bb.0: // %entry
3388 ; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.h[0]
3391 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
3392 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
3393 ret <4 x i32> %vqdmull2.i
3396 define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
3397 ; CHECK-LABEL: test_vqdmull_laneq_s32_0:
3398 ; CHECK: // %bb.0: // %entry
3399 ; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.s[0]
3402 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
3403 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
3404 ret <2 x i64> %vqdmull2.i
3407 define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
3408 ; CHECK-LABEL: test_vqdmull_high_lane_s16_0:
3409 ; CHECK: // %bb.0: // %entry
3410 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3411 ; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[0]
3414 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3415 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3416 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
3417 ret <4 x i32> %vqdmull2.i
3420 define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
3421 ; CHECK-LABEL: test_vqdmull_high_lane_s32_0:
3422 ; CHECK: // %bb.0: // %entry
3423 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3424 ; CHECK-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[0]
3427 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
3428 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3429 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
3430 ret <2 x i64> %vqdmull2.i
3433 define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
3434 ; CHECK-LABEL: test_vqdmull_high_laneq_s16_0:
3435 ; CHECK: // %bb.0: // %entry
3436 ; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[0]
3439 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3440 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
3441 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
3442 ret <4 x i32> %vqdmull2.i
3445 define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
3446 ; CHECK-LABEL: test_vqdmull_high_laneq_s32_0:
3447 ; CHECK: // %bb.0: // %entry
3448 ; CHECK-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[0]
3451 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
3452 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
3453 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
3454 ret <2 x i64> %vqdmull2.i
3457 define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
3458 ; CHECK-LABEL: test_vqdmulh_lane_s16_0:
3459 ; CHECK: // %bb.0: // %entry
3460 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3461 ; CHECK-NEXT: sqdmulh v0.4h, v0.4h, v1.h[0]
3464 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3465 %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
3466 ret <4 x i16> %vqdmulh2.i
3469 define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
3470 ; CHECK-LABEL: test_vqdmulhq_lane_s16_0:
3471 ; CHECK: // %bb.0: // %entry
3472 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3473 ; CHECK-NEXT: sqdmulh v0.8h, v0.8h, v1.h[0]
3476 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
3477 %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
3478 ret <8 x i16> %vqdmulh2.i
3481 define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
3482 ; CHECK-LABEL: test_vqdmulh_lane_s32_0:
3483 ; CHECK: // %bb.0: // %entry
3484 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3485 ; CHECK-NEXT: sqdmulh v0.2s, v0.2s, v1.s[0]
3488 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3489 %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
3490 ret <2 x i32> %vqdmulh2.i
3493 define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
3494 ; CHECK-LABEL: test_vqdmulhq_lane_s32_0:
3495 ; CHECK: // %bb.0: // %entry
3496 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3497 ; CHECK-NEXT: sqdmulh v0.4s, v0.4s, v1.s[0]
3500 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
3501 %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
3502 ret <4 x i32> %vqdmulh2.i
3505 define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
3506 ; CHECK-LABEL: test_vqrdmulh_lane_s16_0:
3507 ; CHECK: // %bb.0: // %entry
3508 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3509 ; CHECK-NEXT: sqrdmulh v0.4h, v0.4h, v1.h[0]
3512 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3513 %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
3514 ret <4 x i16> %vqrdmulh2.i
3517 define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
3518 ; CHECK-LABEL: test_vqrdmulhq_lane_s16_0:
3519 ; CHECK: // %bb.0: // %entry
3520 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3521 ; CHECK-NEXT: sqrdmulh v0.8h, v0.8h, v1.h[0]
3524 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
3525 %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
3526 ret <8 x i16> %vqrdmulh2.i
3529 define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
3530 ; CHECK-LABEL: test_vqrdmulh_lane_s32_0:
3531 ; CHECK: // %bb.0: // %entry
3532 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3533 ; CHECK-NEXT: sqrdmulh v0.2s, v0.2s, v1.s[0]
3536 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3537 %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
3538 ret <2 x i32> %vqrdmulh2.i
3541 define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
3542 ; CHECK-LABEL: test_vqrdmulhq_lane_s32_0:
3543 ; CHECK: // %bb.0: // %entry
3544 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3545 ; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.s[0]
3548 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
3549 %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
3550 ret <4 x i32> %vqrdmulh2.i
3553 define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) {
3554 ; CHECK-LABEL: test_vmul_lane_f32_0:
3555 ; CHECK: // %bb.0: // %entry
3556 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3557 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[0]
3560 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
3561 %mul = fmul <2 x float> %shuffle, %a
3562 ret <2 x float> %mul
3565 define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
3566 ; CHECK-LABEL: test_vmulq_lane_f32_0:
3567 ; CHECK: // %bb.0: // %entry
3568 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3569 ; CHECK-NEXT: fmul v0.4s, v0.4s, v1.s[0]
3572 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
3573 %mul = fmul <4 x float> %shuffle, %a
3574 ret <4 x float> %mul
3577 define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
3578 ; CHECK-LABEL: test_vmul_laneq_f32_0:
3579 ; CHECK: // %bb.0: // %entry
3580 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[0]
3583 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
3584 %mul = fmul <2 x float> %shuffle, %a
3585 ret <2 x float> %mul
3588 define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) {
3589 ; CHECK-LABEL: test_vmul_laneq_f64_0:
3590 ; CHECK: // %bb.0: // %entry
3591 ; CHECK-NEXT: fmul d0, d0, d1
3594 %0 = bitcast <1 x double> %a to <8 x i8>
3595 %1 = bitcast <8 x i8> %0 to double
3596 %extract = extractelement <2 x double> %v, i32 0
3597 %2 = fmul double %1, %extract
3598 %3 = insertelement <1 x double> undef, double %2, i32 0
3602 define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) {
3603 ; CHECK-LABEL: test_vmulq_laneq_f32_0:
3604 ; CHECK: // %bb.0: // %entry
3605 ; CHECK-NEXT: fmul v0.4s, v0.4s, v1.s[0]
3608 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
3609 %mul = fmul <4 x float> %shuffle, %a
3610 ret <4 x float> %mul
3613 define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
3614 ; CHECK-LABEL: test_vmulq_laneq_f64_0:
3615 ; CHECK: // %bb.0: // %entry
3616 ; CHECK-NEXT: fmul v0.2d, v0.2d, v1.d[0]
3619 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
3620 %mul = fmul <2 x double> %shuffle, %a
3621 ret <2 x double> %mul
3624 define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) {
3625 ; CHECK-LABEL: test_vmulx_lane_f32_0:
3626 ; CHECK: // %bb.0: // %entry
3627 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3628 ; CHECK-NEXT: fmulx v0.2s, v0.2s, v1.s[0]
3631 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
3632 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
3633 ret <2 x float> %vmulx2.i
3636 define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
3637 ; CHECK-LABEL: test_vmulxq_lane_f32_0:
3638 ; CHECK: // %bb.0: // %entry
3639 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3640 ; CHECK-NEXT: fmulx v0.4s, v0.4s, v1.s[0]
3643 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
3644 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
3645 ret <4 x float> %vmulx2.i
3648 define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) {
3649 ; CHECK-LABEL: test_vmulxq_lane_f64_0:
3650 ; CHECK: // %bb.0: // %entry
3651 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3652 ; CHECK-NEXT: fmulx v0.2d, v0.2d, v1.d[0]
3655 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
3656 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
3657 ret <2 x double> %vmulx2.i
3660 define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
3661 ; CHECK-LABEL: test_vmulx_laneq_f32_0:
3662 ; CHECK: // %bb.0: // %entry
3663 ; CHECK-NEXT: fmulx v0.2s, v0.2s, v1.s[0]
3666 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
3667 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
3668 ret <2 x float> %vmulx2.i
3671 define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) {
3672 ; CHECK-LABEL: test_vmulxq_laneq_f32_0:
3673 ; CHECK: // %bb.0: // %entry
3674 ; CHECK-NEXT: fmulx v0.4s, v0.4s, v1.s[0]
3677 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
3678 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
3679 ret <4 x float> %vmulx2.i
3682 define <1 x double> @test_vmulx_laneq_f64_0(<1 x double> %a, <2 x double> %v) {
3683 ; CHECK-LABEL: test_vmulx_laneq_f64_0:
3684 ; CHECK: // %bb.0: // %entry
3685 ; CHECK-NEXT: fmulx d0, d0, d1
3688 %vget_lane = extractelement <1 x double> %a, i64 0
3689 %vgetq_lane = extractelement <2 x double> %v, i64 0
3690 %vmulxd_f64.i = tail call double @llvm.aarch64.neon.fmulx.f64(double %vget_lane, double %vgetq_lane)
3691 %vset_lane = insertelement <1 x double> poison, double %vmulxd_f64.i, i64 0
3692 ret <1 x double> %vset_lane
3695 define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
3696 ; CHECK-LABEL: test_vmulxq_laneq_f64_0:
3697 ; CHECK: // %bb.0: // %entry
3698 ; CHECK-NEXT: fmulx v0.2d, v0.2d, v1.d[0]
3701 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
3702 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
3703 ret <2 x double> %vmulx2.i
3706 define <4 x float> @optimize_dup(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %v) {
3707 ; CHECK-LABEL: optimize_dup:
3708 ; CHECK: // %bb.0: // %entry
3709 ; CHECK-NEXT: fmla v0.4s, v1.4s, v3.s[3]
3710 ; CHECK-NEXT: fmls v0.4s, v2.4s, v3.s[3]
3713 %lane1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
3714 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane1, <4 x float> %b, <4 x float> %a)
3715 %lane2 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
3716 %1 = fmul <4 x float> %lane2, %c
3717 %s = fsub <4 x float> %0, %1
3721 define <4 x float> @no_optimize_dup(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %v) {
3722 ; CHECK-LABEL: no_optimize_dup:
3723 ; CHECK: // %bb.0: // %entry
3724 ; CHECK-NEXT: fmla v0.4s, v1.4s, v3.s[3]
3725 ; CHECK-NEXT: fmls v0.4s, v2.4s, v3.s[1]
3728 %lane1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
3729 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane1, <4 x float> %b, <4 x float> %a)
3730 %lane2 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
3731 %1 = fmul <4 x float> %lane2, %c
3732 %s = fsub <4 x float> %0, %1
3736 define <2 x float> @test_vfma_lane_simdinstr_opt_pass_caching_a57(<2 x float> %a, <2 x float> %b, <2 x float> %v) "target-cpu"="cortex-a57" {
3737 ; CHECK-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_a57:
3738 ; CHECK: // %bb.0: // %entry
3739 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
3740 ; CHECK-NEXT: fmla v0.2s, v1.2s, v2.s[1]
3743 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
3744 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
3748 define <2 x float> @test_vfma_lane_simdinstr_opt_pass_caching_m3(<2 x float> %a, <2 x float> %b, <2 x float> %v) "target-cpu"="exynos-m3" {
3749 ; CHECK-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_m3:
3750 ; CHECK: // %bb.0: // %entry
3751 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
3752 ; CHECK-NEXT: fmla v0.2s, v1.2s, v2.s[1]
3755 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
3756 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)