1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s --check-prefix=CHECK
3 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast -mcpu=exynos-m3 | FileCheck %s --check-prefix=CHECK
5 declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>)
7 declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>)
9 declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>)
11 declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>)
12 declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.lane.v4i32.v2i32(<4 x i32>, <2 x i32>, i32)
13 declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v4i32.v4i32(<4 x i32>, <4 x i32>, i32)
15 declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>)
16 declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.lane.v2i32.v2i32(<2 x i32>, <2 x i32>, i32)
17 declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v2i32.v4i32(<2 x i32>, <4 x i32>, i32)
19 declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>)
20 declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.lane.v8i16.v4i16(<8 x i16>, <4 x i16>, i32)
21 declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v8i16.v8i16(<8 x i16>, <8 x i16>, i32)
23 declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>)
24 declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.lane.v4i16.v4i16(<4 x i16>, <4 x i16>, i32)
25 declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v4i16.v8i16(<4 x i16>, <8 x i16>, i32)
27 declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>)
28 declare <4 x i32> @llvm.aarch64.neon.sqdmulh.lane.v4i32.v2i32(<4 x i32>, <2 x i32>, i32)
29 declare <4 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v4i32.v4i32(<4 x i32>, <4 x i32>, i32)
31 declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>)
32 declare <2 x i32> @llvm.aarch64.neon.sqdmulh.lane.v2i32.v2i32(<2 x i32>, <2 x i32>, i32)
33 declare <2 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v2i32.v4i32(<2 x i32>, <4 x i32>, i32)
35 declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>)
36 declare <8 x i16> @llvm.aarch64.neon.sqdmulh.lane.v8i16.v4i16(<8 x i16>, <4 x i16>, i32)
37 declare <8 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v8i16.v8i16(<8 x i16>, <8 x i16>, i32)
39 declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>)
40 declare <4 x i16> @llvm.aarch64.neon.sqdmulh.lane.v4i16.v4i16(<4 x i16>, <4 x i16>, i32)
41 declare <4 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v4i16.v8i16(<4 x i16>, <8 x i16>, i32)
43 declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
45 declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
47 declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
49 declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
51 declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
53 declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
55 declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
57 declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
59 declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
61 declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
63 define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
64 ; CHECK-LABEL: test_vmla_lane_s16:
65 ; CHECK: // %bb.0: // %entry
66 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
67 ; CHECK-NEXT: mla v0.4h, v1.4h, v2.h[3]
70 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
71 %mul = mul <4 x i16> %shuffle, %b
72 %add = add <4 x i16> %mul, %a
76 define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
77 ; CHECK-LABEL: test_vmlaq_lane_s16:
78 ; CHECK: // %bb.0: // %entry
79 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
80 ; CHECK-NEXT: mla v0.8h, v1.8h, v2.h[3]
83 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
84 %mul = mul <8 x i16> %shuffle, %b
85 %add = add <8 x i16> %mul, %a
89 define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
90 ; CHECK-LABEL: test_vmla_lane_s32:
91 ; CHECK: // %bb.0: // %entry
92 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
93 ; CHECK-NEXT: mla v0.2s, v1.2s, v2.s[1]
96 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
97 %mul = mul <2 x i32> %shuffle, %b
98 %add = add <2 x i32> %mul, %a
102 define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
103 ; CHECK-LABEL: test_vmlaq_lane_s32:
104 ; CHECK: // %bb.0: // %entry
105 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
106 ; CHECK-NEXT: mla v0.4s, v1.4s, v2.s[1]
109 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
110 %mul = mul <4 x i32> %shuffle, %b
111 %add = add <4 x i32> %mul, %a
115 define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
116 ; CHECK-LABEL: test_vmla_laneq_s16:
117 ; CHECK: // %bb.0: // %entry
118 ; CHECK-NEXT: mla v0.4h, v1.4h, v2.h[7]
121 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
122 %mul = mul <4 x i16> %shuffle, %b
123 %add = add <4 x i16> %mul, %a
127 define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
128 ; CHECK-LABEL: test_vmlaq_laneq_s16:
129 ; CHECK: // %bb.0: // %entry
130 ; CHECK-NEXT: mla v0.8h, v1.8h, v2.h[7]
133 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
134 %mul = mul <8 x i16> %shuffle, %b
135 %add = add <8 x i16> %mul, %a
139 define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
140 ; CHECK-LABEL: test_vmla_laneq_s32:
141 ; CHECK: // %bb.0: // %entry
142 ; CHECK-NEXT: mla v0.2s, v1.2s, v2.s[3]
145 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
146 %mul = mul <2 x i32> %shuffle, %b
147 %add = add <2 x i32> %mul, %a
151 define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
152 ; CHECK-LABEL: test_vmlaq_laneq_s32:
153 ; CHECK: // %bb.0: // %entry
154 ; CHECK-NEXT: mla v0.4s, v1.4s, v2.s[3]
157 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
158 %mul = mul <4 x i32> %shuffle, %b
159 %add = add <4 x i32> %mul, %a
163 define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
164 ; CHECK-LABEL: test_vmls_lane_s16:
165 ; CHECK: // %bb.0: // %entry
166 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
167 ; CHECK-NEXT: mls v0.4h, v1.4h, v2.h[3]
170 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
171 %mul = mul <4 x i16> %shuffle, %b
172 %sub = sub <4 x i16> %a, %mul
176 define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
177 ; CHECK-LABEL: test_vmlsq_lane_s16:
178 ; CHECK: // %bb.0: // %entry
179 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
180 ; CHECK-NEXT: mls v0.8h, v1.8h, v2.h[3]
183 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
184 %mul = mul <8 x i16> %shuffle, %b
185 %sub = sub <8 x i16> %a, %mul
189 define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
190 ; CHECK-LABEL: test_vmls_lane_s32:
191 ; CHECK: // %bb.0: // %entry
192 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
193 ; CHECK-NEXT: mls v0.2s, v1.2s, v2.s[1]
196 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
197 %mul = mul <2 x i32> %shuffle, %b
198 %sub = sub <2 x i32> %a, %mul
202 define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
203 ; CHECK-LABEL: test_vmlsq_lane_s32:
204 ; CHECK: // %bb.0: // %entry
205 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
206 ; CHECK-NEXT: mls v0.4s, v1.4s, v2.s[1]
209 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
210 %mul = mul <4 x i32> %shuffle, %b
211 %sub = sub <4 x i32> %a, %mul
215 define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
216 ; CHECK-LABEL: test_vmls_laneq_s16:
217 ; CHECK: // %bb.0: // %entry
218 ; CHECK-NEXT: mls v0.4h, v1.4h, v2.h[7]
221 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
222 %mul = mul <4 x i16> %shuffle, %b
223 %sub = sub <4 x i16> %a, %mul
227 define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
228 ; CHECK-LABEL: test_vmlsq_laneq_s16:
229 ; CHECK: // %bb.0: // %entry
230 ; CHECK-NEXT: mls v0.8h, v1.8h, v2.h[7]
233 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
234 %mul = mul <8 x i16> %shuffle, %b
235 %sub = sub <8 x i16> %a, %mul
239 define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
240 ; CHECK-LABEL: test_vmls_laneq_s32:
241 ; CHECK: // %bb.0: // %entry
242 ; CHECK-NEXT: mls v0.2s, v1.2s, v2.s[3]
245 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
246 %mul = mul <2 x i32> %shuffle, %b
247 %sub = sub <2 x i32> %a, %mul
251 define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
252 ; CHECK-LABEL: test_vmlsq_laneq_s32:
253 ; CHECK: // %bb.0: // %entry
254 ; CHECK-NEXT: mls v0.4s, v1.4s, v2.s[3]
257 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
258 %mul = mul <4 x i32> %shuffle, %b
259 %sub = sub <4 x i32> %a, %mul
263 define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) {
264 ; CHECK-LABEL: test_vmul_lane_s16:
265 ; CHECK: // %bb.0: // %entry
266 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
267 ; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[3]
270 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
271 %mul = mul <4 x i16> %shuffle, %a
275 define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
276 ; CHECK-LABEL: test_vmulq_lane_s16:
277 ; CHECK: // %bb.0: // %entry
278 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
279 ; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[3]
282 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
283 %mul = mul <8 x i16> %shuffle, %a
287 define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) {
288 ; CHECK-LABEL: test_vmul_lane_s32:
289 ; CHECK: // %bb.0: // %entry
290 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
291 ; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[1]
294 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
295 %mul = mul <2 x i32> %shuffle, %a
299 define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
300 ; CHECK-LABEL: test_vmulq_lane_s32:
301 ; CHECK: // %bb.0: // %entry
302 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
303 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[1]
306 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
307 %mul = mul <4 x i32> %shuffle, %a
311 define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) {
312 ; CHECK-LABEL: test_vmul_lane_u16:
313 ; CHECK: // %bb.0: // %entry
314 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
315 ; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[3]
318 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
319 %mul = mul <4 x i16> %shuffle, %a
323 define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) {
324 ; CHECK-LABEL: test_vmulq_lane_u16:
325 ; CHECK: // %bb.0: // %entry
326 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
327 ; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[3]
330 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
331 %mul = mul <8 x i16> %shuffle, %a
335 define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) {
336 ; CHECK-LABEL: test_vmul_lane_u32:
337 ; CHECK: // %bb.0: // %entry
338 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
339 ; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[1]
342 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
343 %mul = mul <2 x i32> %shuffle, %a
347 define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) {
348 ; CHECK-LABEL: test_vmulq_lane_u32:
349 ; CHECK: // %bb.0: // %entry
350 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
351 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[1]
354 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
355 %mul = mul <4 x i32> %shuffle, %a
359 define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
360 ; CHECK-LABEL: test_vmul_laneq_s16:
361 ; CHECK: // %bb.0: // %entry
362 ; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[7]
365 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
366 %mul = mul <4 x i16> %shuffle, %a
370 define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
371 ; CHECK-LABEL: test_vmulq_laneq_s16:
372 ; CHECK: // %bb.0: // %entry
373 ; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[7]
376 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
377 %mul = mul <8 x i16> %shuffle, %a
381 define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
382 ; CHECK-LABEL: test_vmul_laneq_s32:
383 ; CHECK: // %bb.0: // %entry
384 ; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[3]
387 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
388 %mul = mul <2 x i32> %shuffle, %a
392 define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
393 ; CHECK-LABEL: test_vmulq_laneq_s32:
394 ; CHECK: // %bb.0: // %entry
395 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[3]
398 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
399 %mul = mul <4 x i32> %shuffle, %a
403 define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
404 ; CHECK-LABEL: test_vmul_laneq_u16:
405 ; CHECK: // %bb.0: // %entry
406 ; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[7]
409 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
410 %mul = mul <4 x i16> %shuffle, %a
414 define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
415 ; CHECK-LABEL: test_vmulq_laneq_u16:
416 ; CHECK: // %bb.0: // %entry
417 ; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[7]
420 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
421 %mul = mul <8 x i16> %shuffle, %a
425 define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
426 ; CHECK-LABEL: test_vmul_laneq_u32:
427 ; CHECK: // %bb.0: // %entry
428 ; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[3]
431 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
432 %mul = mul <2 x i32> %shuffle, %a
436 define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
437 ; CHECK-LABEL: test_vmulq_laneq_u32:
438 ; CHECK: // %bb.0: // %entry
439 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[3]
442 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
443 %mul = mul <4 x i32> %shuffle, %a
447 define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
448 ; CHECK-LABEL: test_vfma_lane_f32:
449 ; CHECK: // %bb.0: // %entry
450 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
451 ; CHECK-NEXT: fmla v0.2s, v1.2s, v2.s[1]
454 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
455 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
459 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
461 define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
462 ; CHECK-LABEL: test_vfmaq_lane_f32:
463 ; CHECK: // %bb.0: // %entry
464 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
465 ; CHECK-NEXT: fmla v0.4s, v1.4s, v2.s[1]
468 %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
469 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
473 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
475 define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
476 ; CHECK-LABEL: test_vfma_laneq_f32:
477 ; CHECK: // %bb.0: // %entry
478 ; CHECK-NEXT: fmla v0.2s, v1.2s, v2.s[3]
481 %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
482 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
486 define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
487 ; CHECK-LABEL: test_vfmaq_laneq_f32:
488 ; CHECK: // %bb.0: // %entry
489 ; CHECK-NEXT: fmla v0.4s, v1.4s, v2.s[3]
492 %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
493 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
497 define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
498 ; CHECK-LABEL: test_vfms_lane_f32:
499 ; CHECK: // %bb.0: // %entry
500 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
501 ; CHECK-NEXT: fmls v0.2s, v1.2s, v2.s[1]
504 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
505 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> <i32 1, i32 1>
506 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
510 define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
511 ; CHECK-LABEL: test_vfmsq_lane_f32:
512 ; CHECK: // %bb.0: // %entry
513 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
514 ; CHECK-NEXT: fmls v0.4s, v1.4s, v2.s[1]
517 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
518 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
519 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
523 define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
524 ; CHECK-LABEL: test_vfms_laneq_f32:
525 ; CHECK: // %bb.0: // %entry
526 ; CHECK-NEXT: fmls v0.2s, v1.2s, v2.s[3]
529 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
530 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> <i32 3, i32 3>
531 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
535 define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
536 ; CHECK-LABEL: test_vfmsq_laneq_f32:
537 ; CHECK: // %bb.0: // %entry
538 ; CHECK-NEXT: fmls v0.4s, v1.4s, v2.s[3]
541 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
542 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
543 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
547 define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
548 ; CHECK-LABEL: test_vfmaq_lane_f64:
549 ; CHECK: // %bb.0: // %entry
550 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
551 ; CHECK-NEXT: fmla v0.2d, v1.2d, v2.d[0]
554 %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
555 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
559 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
561 define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
562 ; CHECK-LABEL: test_vfmaq_laneq_f64:
563 ; CHECK: // %bb.0: // %entry
564 ; CHECK-NEXT: fmla v0.2d, v1.2d, v2.d[1]
567 %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
568 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
572 define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
573 ; CHECK-LABEL: test_vfmsq_lane_f64:
574 ; CHECK: // %bb.0: // %entry
575 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
576 ; CHECK-NEXT: fmls v0.2d, v1.2d, v2.d[0]
579 %sub = fsub <1 x double> <double -0.000000e+00>, %v
580 %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer
581 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
585 define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
586 ; CHECK-LABEL: test_vfmsq_laneq_f64:
587 ; CHECK: // %bb.0: // %entry
588 ; CHECK-NEXT: fmls v0.2d, v1.2d, v2.d[1]
591 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
592 %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> <i32 1, i32 1>
593 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
597 define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) {
598 ; CHECK-LABEL: test_vfmas_laneq_f32:
599 ; CHECK: // %bb.0: // %entry
600 ; CHECK-NEXT: fmla s0, s1, v2.s[3]
603 %extract = extractelement <4 x float> %v, i32 3
604 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a)
608 declare float @llvm.fma.f32(float, float, float)
610 define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) {
611 ; CHECK-LABEL: test_vfmsd_lane_f64:
612 ; CHECK: // %bb.0: // %entry
613 ; CHECK-NEXT: fmsub d0, d1, d2, d0
616 %extract.rhs = extractelement <1 x double> %v, i32 0
617 %extract = fsub double -0.000000e+00, %extract.rhs
618 %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a)
622 declare double @llvm.fma.f64(double, double, double)
624 define float @test_vfmss_lane_f32(float %a, float %b, <2 x float> %v) {
625 ; CHECK-LABEL: test_vfmss_lane_f32:
626 ; CHECK: // %bb.0: // %entry
627 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
628 ; CHECK-NEXT: fmls s0, s1, v2.s[1]
631 %extract.rhs = extractelement <2 x float> %v, i32 1
632 %extract = fsub float -0.000000e+00, %extract.rhs
633 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a)
637 define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) {
638 ; CHECK-LABEL: test_vfmss_laneq_f32:
639 ; CHECK: // %bb.0: // %entry
640 ; CHECK-NEXT: fmls s0, s1, v2.s[3]
643 %extract.rhs = extractelement <4 x float> %v, i32 3
644 %extract = fsub float -0.000000e+00, %extract.rhs
645 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a)
649 define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) {
650 ; CHECK-LABEL: test_vfmsd_laneq_f64:
651 ; CHECK: // %bb.0: // %entry
652 ; CHECK-NEXT: fmls d0, d1, v2.d[1]
655 %extract.rhs = extractelement <2 x double> %v, i32 1
656 %extract = fsub double -0.000000e+00, %extract.rhs
657 %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a)
661 define double @test_vfmsd_lane_f64_0(double %a, double %b, <1 x double> %v) {
662 ; CHECK-LABEL: test_vfmsd_lane_f64_0:
663 ; CHECK: // %bb.0: // %entry
664 ; CHECK-NEXT: fmsub d0, d1, d2, d0
667 %tmp0 = fsub <1 x double> <double -0.000000e+00>, %v
668 %tmp1 = extractelement <1 x double> %tmp0, i32 0
669 %0 = tail call double @llvm.fma.f64(double %b, double %tmp1, double %a)
673 define float @test_vfmss_lane_f32_0(float %a, float %b, <2 x float> %v) {
674 ; CHECK-LABEL: test_vfmss_lane_f32_0:
675 ; CHECK: // %bb.0: // %entry
676 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
677 ; CHECK-NEXT: fmls s0, s1, v2.s[1]
680 %tmp0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
681 %tmp1 = extractelement <2 x float> %tmp0, i32 1
682 %0 = tail call float @llvm.fma.f32(float %b, float %tmp1, float %a)
686 define float @test_vfmss_laneq_f32_0(float %a, float %b, <4 x float> %v) {
687 ; CHECK-LABEL: test_vfmss_laneq_f32_0:
688 ; CHECK: // %bb.0: // %entry
689 ; CHECK-NEXT: fmls s0, s1, v2.s[3]
692 %tmp0 = fsub <4 x float><float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
693 %tmp1 = extractelement <4 x float> %tmp0, i32 3
694 %0 = tail call float @llvm.fma.f32(float %b, float %tmp1, float %a)
698 define double @test_vfmsd_laneq_f64_0(double %a, double %b, <2 x double> %v) {
699 ; CHECK-LABEL: test_vfmsd_laneq_f64_0:
700 ; CHECK: // %bb.0: // %entry
701 ; CHECK-NEXT: fmls d0, d1, v2.d[1]
704 %tmp0 = fsub <2 x double><double -0.000000e+00, double -0.000000e+00>, %v
705 %tmp1 = extractelement <2 x double> %tmp0, i32 1
706 %0 = tail call double @llvm.fma.f64(double %b, double %tmp1, double %a)
710 define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
711 ; CHECK-LABEL: test_vmlal_lane_s16:
712 ; CHECK: // %bb.0: // %entry
713 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
714 ; CHECK-NEXT: smlal v0.4s, v1.4h, v2.h[3]
717 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
718 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
719 %add = add <4 x i32> %vmull2.i, %a
723 define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
724 ; CHECK-LABEL: test_vmlal_lane_s32:
725 ; CHECK: // %bb.0: // %entry
726 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
727 ; CHECK-NEXT: smlal v0.2d, v1.2s, v2.s[1]
730 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
731 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
732 %add = add <2 x i64> %vmull2.i, %a
736 define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
737 ; CHECK-LABEL: test_vmlal_laneq_s16:
738 ; CHECK: // %bb.0: // %entry
739 ; CHECK-NEXT: smlal v0.4s, v1.4h, v2.h[7]
742 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
743 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
744 %add = add <4 x i32> %vmull2.i, %a
748 define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
749 ; CHECK-LABEL: test_vmlal_laneq_s32:
750 ; CHECK: // %bb.0: // %entry
751 ; CHECK-NEXT: smlal v0.2d, v1.2s, v2.s[3]
754 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
755 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
756 %add = add <2 x i64> %vmull2.i, %a
760 define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
761 ; CHECK-LABEL: test_vmlal_high_lane_s16:
762 ; CHECK: // %bb.0: // %entry
763 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
764 ; CHECK-NEXT: smlal2 v0.4s, v1.8h, v2.h[3]
767 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
768 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
769 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
770 %add = add <4 x i32> %vmull2.i, %a
774 define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
775 ; CHECK-LABEL: test_vmlal_high_lane_s32:
776 ; CHECK: // %bb.0: // %entry
777 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
778 ; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.s[1]
781 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
782 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
783 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
784 %add = add <2 x i64> %vmull2.i, %a
788 define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
789 ; CHECK-LABEL: test_vmlal_high_laneq_s16:
790 ; CHECK: // %bb.0: // %entry
791 ; CHECK-NEXT: smlal2 v0.4s, v1.8h, v2.h[7]
794 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
795 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
796 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
797 %add = add <4 x i32> %vmull2.i, %a
801 define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
802 ; CHECK-LABEL: test_vmlal_high_laneq_s32:
803 ; CHECK: // %bb.0: // %entry
804 ; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.s[3]
807 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
808 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
809 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
810 %add = add <2 x i64> %vmull2.i, %a
814 define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
815 ; CHECK-LABEL: test_vmlsl_lane_s16:
816 ; CHECK: // %bb.0: // %entry
817 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
818 ; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.h[3]
821 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
822 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
823 %sub = sub <4 x i32> %a, %vmull2.i
827 define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
828 ; CHECK-LABEL: test_vmlsl_lane_s32:
829 ; CHECK: // %bb.0: // %entry
830 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
831 ; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.s[1]
834 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
835 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
836 %sub = sub <2 x i64> %a, %vmull2.i
840 define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
841 ; CHECK-LABEL: test_vmlsl_laneq_s16:
842 ; CHECK: // %bb.0: // %entry
843 ; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.h[7]
846 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
847 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
848 %sub = sub <4 x i32> %a, %vmull2.i
852 define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
853 ; CHECK-LABEL: test_vmlsl_laneq_s32:
854 ; CHECK: // %bb.0: // %entry
855 ; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.s[3]
858 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
859 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
860 %sub = sub <2 x i64> %a, %vmull2.i
864 define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
865 ; CHECK-LABEL: test_vmlsl_high_lane_s16:
866 ; CHECK: // %bb.0: // %entry
867 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
868 ; CHECK-NEXT: smlsl2 v0.4s, v1.8h, v2.h[3]
871 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
872 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
873 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
874 %sub = sub <4 x i32> %a, %vmull2.i
878 define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
879 ; CHECK-LABEL: test_vmlsl_high_lane_s32:
880 ; CHECK: // %bb.0: // %entry
881 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
882 ; CHECK-NEXT: smlsl2 v0.2d, v1.4s, v2.s[1]
885 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
886 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
887 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
888 %sub = sub <2 x i64> %a, %vmull2.i
892 define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
893 ; CHECK-LABEL: test_vmlsl_high_laneq_s16:
894 ; CHECK: // %bb.0: // %entry
895 ; CHECK-NEXT: smlsl2 v0.4s, v1.8h, v2.h[7]
898 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
899 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
900 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
901 %sub = sub <4 x i32> %a, %vmull2.i
905 define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
906 ; CHECK-LABEL: test_vmlsl_high_laneq_s32:
907 ; CHECK: // %bb.0: // %entry
908 ; CHECK-NEXT: smlsl2 v0.2d, v1.4s, v2.s[3]
911 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
912 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
913 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
914 %sub = sub <2 x i64> %a, %vmull2.i
918 define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
919 ; CHECK-LABEL: test_vmlal_lane_u16:
920 ; CHECK: // %bb.0: // %entry
921 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
922 ; CHECK-NEXT: umlal v0.4s, v1.4h, v2.h[3]
925 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
926 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
927 %add = add <4 x i32> %vmull2.i, %a
931 define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
932 ; CHECK-LABEL: test_vmlal_lane_u32:
933 ; CHECK: // %bb.0: // %entry
934 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
935 ; CHECK-NEXT: umlal v0.2d, v1.2s, v2.s[1]
938 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
939 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
940 %add = add <2 x i64> %vmull2.i, %a
944 define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
945 ; CHECK-LABEL: test_vmlal_laneq_u16:
946 ; CHECK: // %bb.0: // %entry
947 ; CHECK-NEXT: umlal v0.4s, v1.4h, v2.h[7]
950 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
951 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
952 %add = add <4 x i32> %vmull2.i, %a
956 define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
957 ; CHECK-LABEL: test_vmlal_laneq_u32:
958 ; CHECK: // %bb.0: // %entry
959 ; CHECK-NEXT: umlal v0.2d, v1.2s, v2.s[3]
962 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
963 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
964 %add = add <2 x i64> %vmull2.i, %a
968 define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
969 ; CHECK-LABEL: test_vmlal_high_lane_u16:
970 ; CHECK: // %bb.0: // %entry
971 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
972 ; CHECK-NEXT: umlal2 v0.4s, v1.8h, v2.h[3]
975 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
976 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
977 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
978 %add = add <4 x i32> %vmull2.i, %a
982 define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
983 ; CHECK-LABEL: test_vmlal_high_lane_u32:
984 ; CHECK: // %bb.0: // %entry
985 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
986 ; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.s[1]
989 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
990 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
991 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
992 %add = add <2 x i64> %vmull2.i, %a
996 define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
997 ; CHECK-LABEL: test_vmlal_high_laneq_u16:
998 ; CHECK: // %bb.0: // %entry
999 ; CHECK-NEXT: umlal2 v0.4s, v1.8h, v2.h[7]
1002 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1003 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1004 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1005 %add = add <4 x i32> %vmull2.i, %a
1009 define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
1010 ; CHECK-LABEL: test_vmlal_high_laneq_u32:
1011 ; CHECK: // %bb.0: // %entry
1012 ; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.s[3]
1015 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1016 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1017 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1018 %add = add <2 x i64> %vmull2.i, %a
1022 define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
1023 ; CHECK-LABEL: test_vmlsl_lane_u16:
1024 ; CHECK: // %bb.0: // %entry
1025 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1026 ; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.h[3]
1029 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1030 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
1031 %sub = sub <4 x i32> %a, %vmull2.i
1035 define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
1036 ; CHECK-LABEL: test_vmlsl_lane_u32:
1037 ; CHECK: // %bb.0: // %entry
1038 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1039 ; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.s[1]
1042 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1043 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
1044 %sub = sub <2 x i64> %a, %vmull2.i
1048 define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
1049 ; CHECK-LABEL: test_vmlsl_laneq_u16:
1050 ; CHECK: // %bb.0: // %entry
1051 ; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.h[7]
1054 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1055 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
1056 %sub = sub <4 x i32> %a, %vmull2.i
1060 define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
1061 ; CHECK-LABEL: test_vmlsl_laneq_u32:
1062 ; CHECK: // %bb.0: // %entry
1063 ; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.s[3]
1066 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1067 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
1068 %sub = sub <2 x i64> %a, %vmull2.i
1072 define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
1073 ; CHECK-LABEL: test_vmlsl_high_lane_u16:
1074 ; CHECK: // %bb.0: // %entry
1075 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1076 ; CHECK-NEXT: umlsl2 v0.4s, v1.8h, v2.h[3]
1079 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1080 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1081 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1082 %sub = sub <4 x i32> %a, %vmull2.i
1086 define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
1087 ; CHECK-LABEL: test_vmlsl_high_lane_u32:
1088 ; CHECK: // %bb.0: // %entry
1089 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1090 ; CHECK-NEXT: umlsl2 v0.2d, v1.4s, v2.s[1]
1093 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1094 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1095 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1096 %sub = sub <2 x i64> %a, %vmull2.i
1100 define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
1101 ; CHECK-LABEL: test_vmlsl_high_laneq_u16:
1102 ; CHECK: // %bb.0: // %entry
1103 ; CHECK-NEXT: umlsl2 v0.4s, v1.8h, v2.h[7]
1106 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1107 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1108 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1109 %sub = sub <4 x i32> %a, %vmull2.i
1113 define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
1114 ; CHECK-LABEL: test_vmlsl_high_laneq_u32:
1115 ; CHECK: // %bb.0: // %entry
1116 ; CHECK-NEXT: umlsl2 v0.2d, v1.4s, v2.s[3]
1119 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1120 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1121 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1122 %sub = sub <2 x i64> %a, %vmull2.i
1126 define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
1127 ; CHECK-LABEL: test_vmull_lane_s16:
1128 ; CHECK: // %bb.0: // %entry
1129 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1130 ; CHECK-NEXT: smull v0.4s, v0.4h, v1.h[3]
1133 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1134 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
1135 ret <4 x i32> %vmull2.i
1138 define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
1139 ; CHECK-LABEL: test_vmull_lane_s32:
1140 ; CHECK: // %bb.0: // %entry
1141 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1142 ; CHECK-NEXT: smull v0.2d, v0.2s, v1.s[1]
1145 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1146 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
1147 ret <2 x i64> %vmull2.i
1150 define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) {
1151 ; CHECK-LABEL: test_vmull_lane_u16:
1152 ; CHECK: // %bb.0: // %entry
1153 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1154 ; CHECK-NEXT: umull v0.4s, v0.4h, v1.h[3]
1157 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1158 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
1159 ret <4 x i32> %vmull2.i
1162 define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) {
1163 ; CHECK-LABEL: test_vmull_lane_u32:
1164 ; CHECK: // %bb.0: // %entry
1165 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1166 ; CHECK-NEXT: umull v0.2d, v0.2s, v1.s[1]
1169 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1170 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
1171 ret <2 x i64> %vmull2.i
1174 define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
1175 ; CHECK-LABEL: test_vmull_high_lane_s16:
1176 ; CHECK: // %bb.0: // %entry
1177 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1178 ; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.h[3]
1181 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1182 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1183 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1184 ret <4 x i32> %vmull2.i
1187 define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
1188 ; CHECK-LABEL: test_vmull_high_lane_s32:
1189 ; CHECK: // %bb.0: // %entry
1190 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1191 ; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.s[1]
1194 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1195 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1196 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1197 ret <2 x i64> %vmull2.i
1200 define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) {
1201 ; CHECK-LABEL: test_vmull_high_lane_u16:
1202 ; CHECK: // %bb.0: // %entry
1203 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1204 ; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.h[3]
1207 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1208 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1209 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1210 ret <4 x i32> %vmull2.i
1213 define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) {
1214 ; CHECK-LABEL: test_vmull_high_lane_u32:
1215 ; CHECK: // %bb.0: // %entry
1216 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1217 ; CHECK-NEXT: umull2 v0.2d, v0.4s, v1.s[1]
1220 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1221 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1222 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1223 ret <2 x i64> %vmull2.i
1226 define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
1227 ; CHECK-LABEL: test_vmull_laneq_s16:
1228 ; CHECK: // %bb.0: // %entry
1229 ; CHECK-NEXT: smull v0.4s, v0.4h, v1.h[7]
1232 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1233 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
1234 ret <4 x i32> %vmull2.i
1237 define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
1238 ; CHECK-LABEL: test_vmull_laneq_s32:
1239 ; CHECK: // %bb.0: // %entry
1240 ; CHECK-NEXT: smull v0.2d, v0.2s, v1.s[3]
1243 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1244 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
1245 ret <2 x i64> %vmull2.i
1248 define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
1249 ; CHECK-LABEL: test_vmull_laneq_u16:
1250 ; CHECK: // %bb.0: // %entry
1251 ; CHECK-NEXT: umull v0.4s, v0.4h, v1.h[7]
1254 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1255 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
1256 ret <4 x i32> %vmull2.i
1259 define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
1260 ; CHECK-LABEL: test_vmull_laneq_u32:
1261 ; CHECK: // %bb.0: // %entry
1262 ; CHECK-NEXT: umull v0.2d, v0.2s, v1.s[3]
1265 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1266 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
1267 ret <2 x i64> %vmull2.i
1270 define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
1271 ; CHECK-LABEL: test_vmull_high_laneq_s16:
1272 ; CHECK: // %bb.0: // %entry
1273 ; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.h[7]
1276 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1277 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1278 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1279 ret <4 x i32> %vmull2.i
1282 define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
1283 ; CHECK-LABEL: test_vmull_high_laneq_s32:
1284 ; CHECK: // %bb.0: // %entry
1285 ; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.s[3]
1288 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1289 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1290 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1291 ret <2 x i64> %vmull2.i
1294 define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
1295 ; CHECK-LABEL: test_vmull_high_laneq_u16:
1296 ; CHECK: // %bb.0: // %entry
1297 ; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.h[7]
1300 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1301 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1302 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1303 ret <4 x i32> %vmull2.i
1306 define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
1307 ; CHECK-LABEL: test_vmull_high_laneq_u32:
1308 ; CHECK: // %bb.0: // %entry
1309 ; CHECK-NEXT: umull2 v0.2d, v0.4s, v1.s[3]
1312 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1313 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1314 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1315 ret <2 x i64> %vmull2.i
1318 define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
1319 ; CHECK-LABEL: test_vqdmlal_lane_s16:
1320 ; CHECK: // %bb.0: // %entry
1321 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1322 ; CHECK-NEXT: sqdmlal v0.4s, v1.4h, v2.h[3]
1325 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1326 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
1327 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
1328 ret <4 x i32> %vqdmlal4.i
1331 define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
1332 ; CHECK-LABEL: test_vqdmlal_lane_s32:
1333 ; CHECK: // %bb.0: // %entry
1334 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1335 ; CHECK-NEXT: sqdmlal v0.2d, v1.2s, v2.s[1]
1338 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1339 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
1340 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
1341 ret <2 x i64> %vqdmlal4.i
1344 define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
1345 ; CHECK-LABEL: test_vqdmlal_high_lane_s16:
1346 ; CHECK: // %bb.0: // %entry
1347 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1348 ; CHECK-NEXT: sqdmlal2 v0.4s, v1.8h, v2.h[3]
1351 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1352 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1353 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1354 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
1355 ret <4 x i32> %vqdmlal4.i
1358 define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
1359 ; CHECK-LABEL: test_vqdmlal_high_lane_s32:
1360 ; CHECK: // %bb.0: // %entry
1361 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1362 ; CHECK-NEXT: sqdmlal2 v0.2d, v1.4s, v2.s[1]
1365 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1366 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1367 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1368 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
1369 ret <2 x i64> %vqdmlal4.i
1372 define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
1373 ; CHECK-LABEL: test_vqdmlsl_lane_s16:
1374 ; CHECK: // %bb.0: // %entry
1375 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1376 ; CHECK-NEXT: sqdmlsl v0.4s, v1.4h, v2.h[3]
1379 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1380 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
1381 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
1382 ret <4 x i32> %vqdmlsl4.i
1385 define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
1386 ; CHECK-LABEL: test_vqdmlsl_lane_s32:
1387 ; CHECK: // %bb.0: // %entry
1388 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1389 ; CHECK-NEXT: sqdmlsl v0.2d, v1.2s, v2.s[1]
1392 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1393 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
1394 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
1395 ret <2 x i64> %vqdmlsl4.i
1398 define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
1399 ; CHECK-LABEL: test_vqdmlsl_high_lane_s16:
1400 ; CHECK: // %bb.0: // %entry
1401 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1402 ; CHECK-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.h[3]
1405 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1406 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1407 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1408 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
1409 ret <4 x i32> %vqdmlsl4.i
1412 define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
1413 ; CHECK-LABEL: test_vqdmlsl_high_lane_s32:
1414 ; CHECK: // %bb.0: // %entry
1415 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1416 ; CHECK-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.s[1]
1419 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1420 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1421 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1422 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
1423 ret <2 x i64> %vqdmlsl4.i
1426 define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
1427 ; CHECK-LABEL: test_vqdmull_lane_s16:
1428 ; CHECK: // %bb.0: // %entry
1429 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1430 ; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.h[3]
1433 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1434 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
1435 ret <4 x i32> %vqdmull2.i
1438 define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
1439 ; CHECK-LABEL: test_vqdmull_lane_s32:
1440 ; CHECK: // %bb.0: // %entry
1441 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1442 ; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.s[1]
1445 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1446 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
1447 ret <2 x i64> %vqdmull2.i
1450 define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
1451 ; CHECK-LABEL: test_vqdmull_laneq_s16:
1452 ; CHECK: // %bb.0: // %entry
1453 ; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.h[3]
1456 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1457 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
1458 ret <4 x i32> %vqdmull2.i
1461 define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
1462 ; CHECK-LABEL: test_vqdmull_laneq_s32:
1463 ; CHECK: // %bb.0: // %entry
1464 ; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.s[3]
1467 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1468 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
1469 ret <2 x i64> %vqdmull2.i
1472 define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
1473 ; CHECK-LABEL: test_vqdmull_high_lane_s16:
1474 ; CHECK: // %bb.0: // %entry
1475 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1476 ; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[3]
1479 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1480 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1481 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1482 ret <4 x i32> %vqdmull2.i
1485 define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
1486 ; CHECK-LABEL: test_vqdmull_high_lane_s32:
1487 ; CHECK: // %bb.0: // %entry
1488 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1489 ; CHECK-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[1]
1492 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1493 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1494 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1495 ret <2 x i64> %vqdmull2.i
1498 define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
1499 ; CHECK-LABEL: test_vqdmull_high_laneq_s16:
1500 ; CHECK: // %bb.0: // %entry
1501 ; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[7]
1504 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1505 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1506 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1507 ret <4 x i32> %vqdmull2.i
1510 define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
1511 ; CHECK-LABEL: test_vqdmull_high_laneq_s32:
1512 ; CHECK: // %bb.0: // %entry
1513 ; CHECK-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[3]
1516 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1517 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1518 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1519 ret <2 x i64> %vqdmull2.i
1522 define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
1523 ; CHECK-LABEL: test_vqdmulh_lane_s16:
1524 ; CHECK: // %bb.0: // %entry
1525 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1526 ; CHECK-NEXT: sqdmulh v0.4h, v0.4h, v1.h[3]
1529 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1530 %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
1531 ret <4 x i16> %vqdmulh2.i
1534 define <4 x i16> @test_vqdmulh_lane_s16_intrinsic(<4 x i16> %a, <4 x i16> %v) {
1535 ; CHECK-LABEL: test_vqdmulh_lane_s16_intrinsic:
1536 ; CHECK: // %bb.0: // %entry
1537 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1538 ; CHECK-NEXT: sqdmulh v0.4h, v0.4h, v1.h[3]
1541 %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.lane.v4i16.v4i16(<4 x i16> %a, <4 x i16> %v, i32 3)
1542 ret <4 x i16> %vqdmulh2.i
1545 define <4 x i16> @test_vqdmulh_laneq_s16_intrinsic_lo(<4 x i16> %a, <8 x i16> %v) {
1546 ; CHECK-LABEL: test_vqdmulh_laneq_s16_intrinsic_lo:
1547 ; CHECK: // %bb.0: // %entry
1548 ; CHECK-NEXT: sqdmulh v0.4h, v0.4h, v1.h[3]
1551 %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v4i16.v8i16(<4 x i16> %a, <8 x i16> %v, i32 3)
1552 ret <4 x i16> %vqdmulh2.i
1555 define <4 x i16> @test_vqdmulh_laneq_s16_intrinsic_hi(<4 x i16> %a, <8 x i16> %v) {
1556 ; CHECK-LABEL: test_vqdmulh_laneq_s16_intrinsic_hi:
1557 ; CHECK: // %bb.0: // %entry
1558 ; CHECK-NEXT: sqdmulh v0.4h, v0.4h, v1.h[7]
1561 %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v4i16.v8i16(<4 x i16> %a, <8 x i16> %v, i32 7)
1562 ret <4 x i16> %vqdmulh2.i
1565 define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
1566 ; CHECK-LABEL: test_vqdmulhq_lane_s16:
1567 ; CHECK: // %bb.0: // %entry
1568 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1569 ; CHECK-NEXT: sqdmulh v0.8h, v0.8h, v1.h[3]
1572 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
1573 %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
1574 ret <8 x i16> %vqdmulh2.i
1577 define <8 x i16> @test_vqdmulhq_lane_s16_intrinsic(<8 x i16> %a, <4 x i16> %v) {
1578 ; CHECK-LABEL: test_vqdmulhq_lane_s16_intrinsic:
1579 ; CHECK: // %bb.0: // %entry
1580 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1581 ; CHECK-NEXT: sqdmulh v0.8h, v0.8h, v1.h[3]
1584 %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.lane.v8i16.v4i16(<8 x i16> %a, <4 x i16> %v, i32 3)
1585 ret <8 x i16> %vqdmulh2.i
1588 define <8 x i16> @test_vqdmulhq_laneq_s16_intrinsic_lo(<8 x i16> %a, <8 x i16> %v) {
1589 ; CHECK-LABEL: test_vqdmulhq_laneq_s16_intrinsic_lo:
1590 ; CHECK: // %bb.0: // %entry
1591 ; CHECK-NEXT: sqdmulh v0.8h, v0.8h, v1.h[3]
1594 %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v8i16.v8i16(<8 x i16> %a, <8 x i16> %v, i32 3)
1595 ret <8 x i16> %vqdmulh2.i
1598 define <8 x i16> @test_vqdmulhq_laneq_s16_intrinsic_hi(<8 x i16> %a, <8 x i16> %v) {
1599 ; CHECK-LABEL: test_vqdmulhq_laneq_s16_intrinsic_hi:
1600 ; CHECK: // %bb.0: // %entry
1601 ; CHECK-NEXT: sqdmulh v0.8h, v0.8h, v1.h[7]
1604 %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v8i16.v8i16(<8 x i16> %a, <8 x i16> %v, i32 7)
1605 ret <8 x i16> %vqdmulh2.i
1608 define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
1609 ; CHECK-LABEL: test_vqdmulh_lane_s32:
1610 ; CHECK: // %bb.0: // %entry
1611 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1612 ; CHECK-NEXT: sqdmulh v0.2s, v0.2s, v1.s[1]
1615 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1616 %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
1617 ret <2 x i32> %vqdmulh2.i
1620 define <2 x i32> @test_vqdmulh_lane_s32_intrinsic(<2 x i32> %a, <2 x i32> %v) {
1621 ; CHECK-LABEL: test_vqdmulh_lane_s32_intrinsic:
1622 ; CHECK: // %bb.0: // %entry
1623 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1624 ; CHECK-NEXT: sqdmulh v0.2s, v0.2s, v1.s[1]
1627 %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.lane.v2i32.v2i32(<2 x i32> %a, <2 x i32> %v, i32 1)
1628 ret <2 x i32> %vqdmulh2.i
1631 define <2 x i32> @test_vqdmulh_laneq_s32_intrinsic_lo(<2 x i32> %a, <4 x i32> %v) {
1632 ; CHECK-LABEL: test_vqdmulh_laneq_s32_intrinsic_lo:
1633 ; CHECK: // %bb.0: // %entry
1634 ; CHECK-NEXT: sqdmulh v0.2s, v0.2s, v1.s[1]
1637 %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v2i32.v4i32(<2 x i32> %a, <4 x i32> %v, i32 1)
1638 ret <2 x i32> %vqdmulh2.i
1641 define <2 x i32> @test_vqdmulh_laneq_s32_intrinsic_hi(<2 x i32> %a, <4 x i32> %v) {
1642 ; CHECK-LABEL: test_vqdmulh_laneq_s32_intrinsic_hi:
1643 ; CHECK: // %bb.0: // %entry
1644 ; CHECK-NEXT: sqdmulh v0.2s, v0.2s, v1.s[3]
1647 %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v2i32.v4i32(<2 x i32> %a, <4 x i32> %v, i32 3)
1648 ret <2 x i32> %vqdmulh2.i
1651 define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
1652 ; CHECK-LABEL: test_vqdmulhq_lane_s32:
1653 ; CHECK: // %bb.0: // %entry
1654 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1655 ; CHECK-NEXT: sqdmulh v0.4s, v0.4s, v1.s[1]
1658 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1659 %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
1660 ret <4 x i32> %vqdmulh2.i
1663 define <4 x i32> @test_vqdmulhq_lane_s32_intrinsic(<4 x i32> %a, <2 x i32> %v) {
1664 ; CHECK-LABEL: test_vqdmulhq_lane_s32_intrinsic:
1665 ; CHECK: // %bb.0: // %entry
1666 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1667 ; CHECK-NEXT: sqdmulh v0.4s, v0.4s, v1.s[1]
1670 %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.lane.v4i32.v2i32(<4 x i32> %a, <2 x i32> %v, i32 1)
1671 ret <4 x i32> %vqdmulh2.i
1674 define <4 x i32> @test_vqdmulhq_laneq_s32_intrinsic_lo(<4 x i32> %a, <4 x i32> %v) {
1675 ; CHECK-LABEL: test_vqdmulhq_laneq_s32_intrinsic_lo:
1676 ; CHECK: // %bb.0: // %entry
1677 ; CHECK-NEXT: sqdmulh v0.4s, v0.4s, v1.s[1]
1680 %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v4i32.v4i32(<4 x i32> %a, <4 x i32> %v, i32 1)
1681 ret <4 x i32> %vqdmulh2.i
1684 define <4 x i32> @test_vqdmulhq_laneq_s32_intrinsic_hi(<4 x i32> %a, <4 x i32> %v) {
1685 ; CHECK-LABEL: test_vqdmulhq_laneq_s32_intrinsic_hi:
1686 ; CHECK: // %bb.0: // %entry
1687 ; CHECK-NEXT: sqdmulh v0.4s, v0.4s, v1.s[3]
1690 %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v4i32.v4i32(<4 x i32> %a, <4 x i32> %v, i32 3)
1691 ret <4 x i32> %vqdmulh2.i
1694 define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
1695 ; CHECK-LABEL: test_vqrdmulh_lane_s16:
1696 ; CHECK: // %bb.0: // %entry
1697 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1698 ; CHECK-NEXT: sqrdmulh v0.4h, v0.4h, v1.h[3]
1701 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1702 %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
1703 ret <4 x i16> %vqrdmulh2.i
1706 define <4 x i16> @test_vqrdmulh_lane_s16_intrinsic(<4 x i16> %a, <4 x i16> %v) {
1707 ; CHECK-LABEL: test_vqrdmulh_lane_s16_intrinsic:
1708 ; CHECK: // %bb.0: // %entry
1709 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1710 ; CHECK-NEXT: sqrdmulh v0.4h, v0.4h, v1.h[3]
1713 %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.lane.v4i16.v4i16(<4 x i16> %a, <4 x i16> %v, i32 3)
1714 ret <4 x i16> %vqrdmulh2.i
1717 define <4 x i16> @test_vqrdmulh_laneq_s16_intrinsic_lo(<4 x i16> %a, <8 x i16> %v) {
1718 ; CHECK-LABEL: test_vqrdmulh_laneq_s16_intrinsic_lo:
1719 ; CHECK: // %bb.0: // %entry
1720 ; CHECK-NEXT: sqrdmulh v0.4h, v0.4h, v1.h[3]
1723 %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v4i16.v8i16(<4 x i16> %a, <8 x i16> %v, i32 3)
1724 ret <4 x i16> %vqrdmulh2.i
1727 define <4 x i16> @test_vqrdmulh_laneq_s16_intrinsic_hi(<4 x i16> %a, <8 x i16> %v) {
1728 ; CHECK-LABEL: test_vqrdmulh_laneq_s16_intrinsic_hi:
1729 ; CHECK: // %bb.0: // %entry
1730 ; CHECK-NEXT: sqrdmulh v0.4h, v0.4h, v1.h[7]
1733 %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v4i16.v8i16(<4 x i16> %a, <8 x i16> %v, i32 7)
1734 ret <4 x i16> %vqrdmulh2.i
1737 define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
1738 ; CHECK-LABEL: test_vqrdmulhq_lane_s16:
1739 ; CHECK: // %bb.0: // %entry
1740 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1741 ; CHECK-NEXT: sqrdmulh v0.8h, v0.8h, v1.h[3]
1744 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
1745 %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
1746 ret <8 x i16> %vqrdmulh2.i
1749 define <8 x i16> @test_vqrdmulhq_lane_s16_intrinsic(<8 x i16> %a, <4 x i16> %v) {
1750 ; CHECK-LABEL: test_vqrdmulhq_lane_s16_intrinsic:
1751 ; CHECK: // %bb.0: // %entry
1752 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1753 ; CHECK-NEXT: sqrdmulh v0.8h, v0.8h, v1.h[3]
1756 %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.lane.v8i16.v4i16(<8 x i16> %a, <4 x i16> %v, i32 3)
1757 ret <8 x i16> %vqrdmulh2.i
1760 define <8 x i16> @test_vqrdmulhq_laneq_s16_intrinsic_lo(<8 x i16> %a, <8 x i16> %v) {
1761 ; CHECK-LABEL: test_vqrdmulhq_laneq_s16_intrinsic_lo:
1762 ; CHECK: // %bb.0: // %entry
1763 ; CHECK-NEXT: sqrdmulh v0.8h, v0.8h, v1.h[3]
1766 %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v8i16.v8i16(<8 x i16> %a, <8 x i16> %v, i32 3)
1767 ret <8 x i16> %vqrdmulh2.i
1770 define <8 x i16> @test_vqrdmulhq_laneq_s16_intrinsic_hi(<8 x i16> %a, <8 x i16> %v) {
1771 ; CHECK-LABEL: test_vqrdmulhq_laneq_s16_intrinsic_hi:
1772 ; CHECK: // %bb.0: // %entry
1773 ; CHECK-NEXT: sqrdmulh v0.8h, v0.8h, v1.h[7]
1776 %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v8i16.v8i16(<8 x i16> %a, <8 x i16> %v, i32 7)
1777 ret <8 x i16> %vqrdmulh2.i
1780 define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
1781 ; CHECK-LABEL: test_vqrdmulh_lane_s32:
1782 ; CHECK: // %bb.0: // %entry
1783 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1784 ; CHECK-NEXT: sqrdmulh v0.2s, v0.2s, v1.s[1]
1787 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1788 %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
1789 ret <2 x i32> %vqrdmulh2.i
1792 define <2 x i32> @test_vqrdmulh_lane_s32_intrinsic(<2 x i32> %a, <2 x i32> %v) {
1793 ; CHECK-LABEL: test_vqrdmulh_lane_s32_intrinsic:
1794 ; CHECK: // %bb.0: // %entry
1795 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1796 ; CHECK-NEXT: sqrdmulh v0.2s, v0.2s, v1.s[1]
1799 %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.lane.v2i32.v2i32(<2 x i32> %a, <2 x i32> %v, i32 1)
1800 ret <2 x i32> %vqrdmulh2.i
1803 define <2 x i32> @test_vqrdmulh_laneq_s32_intrinsic_lo(<2 x i32> %a, <4 x i32> %v) {
1804 ; CHECK-LABEL: test_vqrdmulh_laneq_s32_intrinsic_lo:
1805 ; CHECK: // %bb.0: // %entry
1806 ; CHECK-NEXT: sqrdmulh v0.2s, v0.2s, v1.s[1]
1809 %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v2i32.v4i32(<2 x i32> %a, <4 x i32> %v, i32 1)
1810 ret <2 x i32> %vqrdmulh2.i
1813 define <2 x i32> @test_vqrdmulh_laneq_s32_intrinsic_hi(<2 x i32> %a, <4 x i32> %v) {
1814 ; CHECK-LABEL: test_vqrdmulh_laneq_s32_intrinsic_hi:
1815 ; CHECK: // %bb.0: // %entry
1816 ; CHECK-NEXT: sqrdmulh v0.2s, v0.2s, v1.s[3]
1819 %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v2i32.v4i32(<2 x i32> %a, <4 x i32> %v, i32 3)
1820 ret <2 x i32> %vqrdmulh2.i
1823 define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
1824 ; CHECK-LABEL: test_vqrdmulhq_lane_s32:
1825 ; CHECK: // %bb.0: // %entry
1826 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1827 ; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.s[1]
1830 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1831 %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
1832 ret <4 x i32> %vqrdmulh2.i
1835 define <4 x i32> @test_vqrdmulhq_lane_s32_intrinsic(<4 x i32> %a, <2 x i32> %v) {
1836 ; CHECK-LABEL: test_vqrdmulhq_lane_s32_intrinsic:
1837 ; CHECK: // %bb.0: // %entry
1838 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1839 ; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.s[1]
1842 %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.lane.v4i32.v2i32(<4 x i32> %a, <2 x i32> %v, i32 1)
1843 ret <4 x i32> %vqrdmulh2.i
1846 define <4 x i32> @test_vqrdmulhq_laneq_s32_intrinsic_lo(<4 x i32> %a, <4 x i32> %v) {
1847 ; CHECK-LABEL: test_vqrdmulhq_laneq_s32_intrinsic_lo:
1848 ; CHECK: // %bb.0: // %entry
1849 ; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.s[1]
1852 %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v4i32.v4i32(<4 x i32> %a, <4 x i32> %v, i32 1)
1853 ret <4 x i32> %vqrdmulh2.i
1856 define <4 x i32> @test_vqrdmulhq_laneq_s32_intrinsic_hi(<4 x i32> %a, <4 x i32> %v) {
1857 ; CHECK-LABEL: test_vqrdmulhq_laneq_s32_intrinsic_hi:
1858 ; CHECK: // %bb.0: // %entry
1859 ; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.s[3]
1862 %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v4i32.v4i32(<4 x i32> %a, <4 x i32> %v, i32 3)
1863 ret <4 x i32> %vqrdmulh2.i
1866 define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) {
1867 ; CHECK-LABEL: test_vmul_lane_f32:
1868 ; CHECK: // %bb.0: // %entry
1869 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1870 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[1]
1873 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
1874 %mul = fmul <2 x float> %shuffle, %a
1875 ret <2 x float> %mul
1878 define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) {
1879 ; CHECK-LABEL: test_vmul_lane_f64:
1880 ; CHECK: // %bb.0: // %entry
1881 ; CHECK-NEXT: fmul d0, d0, d1
1884 %0 = bitcast <1 x double> %a to <8 x i8>
1885 %1 = bitcast <8 x i8> %0 to double
1886 %extract = extractelement <1 x double> %v, i32 0
1887 %2 = fmul double %1, %extract
1888 %3 = insertelement <1 x double> undef, double %2, i32 0
1892 define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) {
1893 ; CHECK-LABEL: test_vmulq_lane_f32:
1894 ; CHECK: // %bb.0: // %entry
1895 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1896 ; CHECK-NEXT: fmul v0.4s, v0.4s, v1.s[1]
1899 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1900 %mul = fmul <4 x float> %shuffle, %a
1901 ret <4 x float> %mul
1904 define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) {
1905 ; CHECK-LABEL: test_vmulq_lane_f64:
1906 ; CHECK: // %bb.0: // %entry
1907 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1908 ; CHECK-NEXT: fmul v0.2d, v0.2d, v1.d[0]
1911 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
1912 %mul = fmul <2 x double> %shuffle, %a
1913 ret <2 x double> %mul
1916 define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) {
1917 ; CHECK-LABEL: test_vmul_laneq_f32:
1918 ; CHECK: // %bb.0: // %entry
1919 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[3]
1922 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
1923 %mul = fmul <2 x float> %shuffle, %a
1924 ret <2 x float> %mul
1927 define <2 x float> @test_vmul_laneq3_f32_bitcast(<2 x float> %a, <2 x double> %v) {
1928 ; CHECK-LABEL: test_vmul_laneq3_f32_bitcast:
1930 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[3]
1932 %extract = shufflevector <2 x double> %v, <2 x double> undef, <1 x i32> <i32 1>
1933 %bc = bitcast <1 x double> %extract to <2 x float>
1934 %splat = shufflevector <2 x float> %bc, <2 x float> undef, <2 x i32> <i32 1, i32 1>
1935 %mul = fmul <2 x float> %splat, %a
1936 ret <2 x float> %mul
1939 define <2 x float> @test_vmul_laneq2_f32_bitcast(<2 x float> %a, <2 x double> %v) {
1940 ; CHECK-LABEL: test_vmul_laneq2_f32_bitcast:
1942 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[2]
1944 %extract = shufflevector <2 x double> %v, <2 x double> undef, <1 x i32> <i32 1>
1945 %bc = bitcast <1 x double> %extract to <2 x float>
1946 %splat = shufflevector <2 x float> %bc, <2 x float> undef, <2 x i32> <i32 0, i32 0>
1947 %mul = fmul <2 x float> %splat, %a
1948 ret <2 x float> %mul
1951 define <4 x i16> @test_vadd_laneq5_i16_bitcast(<4 x i16> %a, <2 x double> %v) {
1952 ; CHECK-LABEL: test_vadd_laneq5_i16_bitcast:
1954 ; CHECK-NEXT: dup v1.4h, v1.h[5]
1955 ; CHECK-NEXT: add v0.4h, v1.4h, v0.4h
1957 %extract = shufflevector <2 x double> %v, <2 x double> undef, <1 x i32> <i32 1>
1958 %bc = bitcast <1 x double> %extract to <4 x i16>
1959 %splat = shufflevector <4 x i16> %bc, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1960 %r = add <4 x i16> %splat, %a
1964 ; TODO: The pattern in LowerVECTOR_SHUFFLE does not match what we are looking for.
1966 define <4 x i16> @test_vadd_lane2_i16_bitcast_bigger_aligned(<4 x i16> %a, <16 x i8> %v) {
1967 ; CHECK-LABEL: test_vadd_lane2_i16_bitcast_bigger_aligned:
1969 ; CHECK-NEXT: dup v1.4h, v1.h[2]
1970 ; CHECK-NEXT: dup v1.4h, v1.h[1]
1971 ; CHECK-NEXT: add v0.4h, v1.4h, v0.4h
1973 %extract = shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
1974 %bc = bitcast <8 x i8> %extract to <4 x i16>
1975 %splat = shufflevector <4 x i16> %bc, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1976 %r = add <4 x i16> %splat, %a
1980 define <4 x i16> @test_vadd_lane5_i16_bitcast_bigger_aligned(<4 x i16> %a, <16 x i8> %v) {
1981 ; CHECK-LABEL: test_vadd_lane5_i16_bitcast_bigger_aligned:
1983 ; CHECK-NEXT: dup v1.4h, v1.h[5]
1984 ; CHECK-NEXT: add v0.4h, v1.4h, v0.4h
1986 %extract = shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1987 %bc = bitcast <8 x i8> %extract to <4 x i16>
1988 %splat = shufflevector <4 x i16> %bc, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1989 %r = add <4 x i16> %splat, %a
1993 ; Negative test - can't dup bytes {3,4} of v8i16.
1995 define <4 x i16> @test_vadd_lane_i16_bitcast_bigger_unaligned(<4 x i16> %a, <16 x i8> %v) {
1996 ; CHECK-LABEL: test_vadd_lane_i16_bitcast_bigger_unaligned:
1998 ; CHECK-NEXT: ext v1.8b, v1.8b, v0.8b, #1
1999 ; CHECK-NEXT: dup v1.4h, v1.h[1]
2000 ; CHECK-NEXT: add v0.4h, v1.4h, v0.4h
2002 %extract = shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
2003 %bc = bitcast <8 x i8> %extract to <4 x i16>
2004 %splat = shufflevector <4 x i16> %bc, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2005 %r = add <4 x i16> %splat, %a
2009 define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) {
2010 ; CHECK-LABEL: test_vmul_laneq_f64:
2011 ; CHECK: // %bb.0: // %entry
2012 ; CHECK-NEXT: fmul d0, d0, v1.d[1]
2015 %0 = bitcast <1 x double> %a to <8 x i8>
2016 %1 = bitcast <8 x i8> %0 to double
2017 %extract = extractelement <2 x double> %v, i32 1
2018 %2 = fmul double %1, %extract
2019 %3 = insertelement <1 x double> undef, double %2, i32 0
2023 define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) {
2024 ; CHECK-LABEL: test_vmulq_laneq_f32:
2025 ; CHECK: // %bb.0: // %entry
2026 ; CHECK-NEXT: fmul v0.4s, v0.4s, v1.s[3]
2029 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
2030 %mul = fmul <4 x float> %shuffle, %a
2031 ret <4 x float> %mul
2034 define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) {
2035 ; CHECK-LABEL: test_vmulq_laneq_f64:
2036 ; CHECK: // %bb.0: // %entry
2037 ; CHECK-NEXT: fmul v0.2d, v0.2d, v1.d[1]
2040 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
2041 %mul = fmul <2 x double> %shuffle, %a
2042 ret <2 x double> %mul
2045 define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) {
2046 ; CHECK-LABEL: test_vmulx_lane_f32:
2047 ; CHECK: // %bb.0: // %entry
2048 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
2049 ; CHECK-NEXT: fmulx v0.2s, v0.2s, v1.s[1]
2052 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
2053 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
2054 ret <2 x float> %vmulx2.i
2057 define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) {
2058 ; CHECK-LABEL: test_vmulxq_lane_f32:
2059 ; CHECK: // %bb.0: // %entry
2060 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
2061 ; CHECK-NEXT: fmulx v0.4s, v0.4s, v1.s[1]
2064 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2065 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
2066 ret <4 x float> %vmulx2.i
2069 define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) {
2070 ; CHECK-LABEL: test_vmulxq_lane_f64:
2071 ; CHECK: // %bb.0: // %entry
2072 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
2073 ; CHECK-NEXT: fmulx v0.2d, v0.2d, v1.d[0]
2076 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
2077 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
2078 ret <2 x double> %vmulx2.i
2081 define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) {
2082 ; CHECK-LABEL: test_vmulx_laneq_f32:
2083 ; CHECK: // %bb.0: // %entry
2084 ; CHECK-NEXT: fmulx v0.2s, v0.2s, v1.s[3]
2087 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
2088 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
2089 ret <2 x float> %vmulx2.i
2092 define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) {
2093 ; CHECK-LABEL: test_vmulxq_laneq_f32:
2094 ; CHECK: // %bb.0: // %entry
2095 ; CHECK-NEXT: fmulx v0.4s, v0.4s, v1.s[3]
2098 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
2099 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
2100 ret <4 x float> %vmulx2.i
2103 define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) {
2104 ; CHECK-LABEL: test_vmulxq_laneq_f64:
2105 ; CHECK: // %bb.0: // %entry
2106 ; CHECK-NEXT: fmulx v0.2d, v0.2d, v1.d[1]
2109 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
2110 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
2111 ret <2 x double> %vmulx2.i
2114 define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
2115 ; CHECK-LABEL: test_vmla_lane_s16_0:
2116 ; CHECK: // %bb.0: // %entry
2117 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2118 ; CHECK-NEXT: mla v0.4h, v1.4h, v2.h[0]
2121 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2122 %mul = mul <4 x i16> %shuffle, %b
2123 %add = add <4 x i16> %mul, %a
2127 define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
2128 ; CHECK-LABEL: test_vmlaq_lane_s16_0:
2129 ; CHECK: // %bb.0: // %entry
2130 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2131 ; CHECK-NEXT: mla v0.8h, v1.8h, v2.h[0]
2134 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
2135 %mul = mul <8 x i16> %shuffle, %b
2136 %add = add <8 x i16> %mul, %a
2140 define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
2141 ; CHECK-LABEL: test_vmla_lane_s32_0:
2142 ; CHECK: // %bb.0: // %entry
2143 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2144 ; CHECK-NEXT: mla v0.2s, v1.2s, v2.s[0]
2147 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2148 %mul = mul <2 x i32> %shuffle, %b
2149 %add = add <2 x i32> %mul, %a
2153 define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
2154 ; CHECK-LABEL: test_vmlaq_lane_s32_0:
2155 ; CHECK: // %bb.0: // %entry
2156 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2157 ; CHECK-NEXT: mla v0.4s, v1.4s, v2.s[0]
2160 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
2161 %mul = mul <4 x i32> %shuffle, %b
2162 %add = add <4 x i32> %mul, %a
2166 define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
2167 ; CHECK-LABEL: test_vmla_laneq_s16_0:
2168 ; CHECK: // %bb.0: // %entry
2169 ; CHECK-NEXT: mla v0.4h, v1.4h, v2.h[0]
2172 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2173 %mul = mul <4 x i16> %shuffle, %b
2174 %add = add <4 x i16> %mul, %a
2178 define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
2179 ; CHECK-LABEL: test_vmlaq_laneq_s16_0:
2180 ; CHECK: // %bb.0: // %entry
2181 ; CHECK-NEXT: mla v0.8h, v1.8h, v2.h[0]
2184 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
2185 %mul = mul <8 x i16> %shuffle, %b
2186 %add = add <8 x i16> %mul, %a
2190 define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
2191 ; CHECK-LABEL: test_vmla_laneq_s32_0:
2192 ; CHECK: // %bb.0: // %entry
2193 ; CHECK-NEXT: mla v0.2s, v1.2s, v2.s[0]
2196 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2197 %mul = mul <2 x i32> %shuffle, %b
2198 %add = add <2 x i32> %mul, %a
2202 define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
2203 ; CHECK-LABEL: test_vmlaq_laneq_s32_0:
2204 ; CHECK: // %bb.0: // %entry
2205 ; CHECK-NEXT: mla v0.4s, v1.4s, v2.s[0]
2208 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
2209 %mul = mul <4 x i32> %shuffle, %b
2210 %add = add <4 x i32> %mul, %a
2214 define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
2215 ; CHECK-LABEL: test_vmls_lane_s16_0:
2216 ; CHECK: // %bb.0: // %entry
2217 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2218 ; CHECK-NEXT: mls v0.4h, v1.4h, v2.h[0]
2221 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2222 %mul = mul <4 x i16> %shuffle, %b
2223 %sub = sub <4 x i16> %a, %mul
2227 define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
2228 ; CHECK-LABEL: test_vmlsq_lane_s16_0:
2229 ; CHECK: // %bb.0: // %entry
2230 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2231 ; CHECK-NEXT: mls v0.8h, v1.8h, v2.h[0]
2234 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
2235 %mul = mul <8 x i16> %shuffle, %b
2236 %sub = sub <8 x i16> %a, %mul
2240 define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
2241 ; CHECK-LABEL: test_vmls_lane_s32_0:
2242 ; CHECK: // %bb.0: // %entry
2243 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2244 ; CHECK-NEXT: mls v0.2s, v1.2s, v2.s[0]
2247 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2248 %mul = mul <2 x i32> %shuffle, %b
2249 %sub = sub <2 x i32> %a, %mul
2253 define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
2254 ; CHECK-LABEL: test_vmlsq_lane_s32_0:
2255 ; CHECK: // %bb.0: // %entry
2256 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2257 ; CHECK-NEXT: mls v0.4s, v1.4s, v2.s[0]
2260 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
2261 %mul = mul <4 x i32> %shuffle, %b
2262 %sub = sub <4 x i32> %a, %mul
2266 define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
2267 ; CHECK-LABEL: test_vmls_laneq_s16_0:
2268 ; CHECK: // %bb.0: // %entry
2269 ; CHECK-NEXT: mls v0.4h, v1.4h, v2.h[0]
2272 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2273 %mul = mul <4 x i16> %shuffle, %b
2274 %sub = sub <4 x i16> %a, %mul
2278 define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
2279 ; CHECK-LABEL: test_vmlsq_laneq_s16_0:
2280 ; CHECK: // %bb.0: // %entry
2281 ; CHECK-NEXT: mls v0.8h, v1.8h, v2.h[0]
2284 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
2285 %mul = mul <8 x i16> %shuffle, %b
2286 %sub = sub <8 x i16> %a, %mul
2290 define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
2291 ; CHECK-LABEL: test_vmls_laneq_s32_0:
2292 ; CHECK: // %bb.0: // %entry
2293 ; CHECK-NEXT: mls v0.2s, v1.2s, v2.s[0]
2296 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2297 %mul = mul <2 x i32> %shuffle, %b
2298 %sub = sub <2 x i32> %a, %mul
2302 define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
2303 ; CHECK-LABEL: test_vmlsq_laneq_s32_0:
2304 ; CHECK: // %bb.0: // %entry
2305 ; CHECK-NEXT: mls v0.4s, v1.4s, v2.s[0]
2308 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
2309 %mul = mul <4 x i32> %shuffle, %b
2310 %sub = sub <4 x i32> %a, %mul
2314 define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
2315 ; CHECK-LABEL: test_vmul_lane_s16_0:
2316 ; CHECK: // %bb.0: // %entry
2317 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
2318 ; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[0]
2321 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2322 %mul = mul <4 x i16> %shuffle, %a
2326 define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
2327 ; CHECK-LABEL: test_vmulq_lane_s16_0:
2328 ; CHECK: // %bb.0: // %entry
2329 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
2330 ; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[0]
2333 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
2334 %mul = mul <8 x i16> %shuffle, %a
2338 define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
2339 ; CHECK-LABEL: test_vmul_lane_s32_0:
2340 ; CHECK: // %bb.0: // %entry
2341 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
2342 ; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[0]
2345 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2346 %mul = mul <2 x i32> %shuffle, %a
2350 define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
2351 ; CHECK-LABEL: test_vmulq_lane_s32_0:
2352 ; CHECK: // %bb.0: // %entry
2353 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
2354 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[0]
2357 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
2358 %mul = mul <4 x i32> %shuffle, %a
2362 define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
2363 ; CHECK-LABEL: test_vmul_lane_u16_0:
2364 ; CHECK: // %bb.0: // %entry
2365 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
2366 ; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[0]
2369 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2370 %mul = mul <4 x i16> %shuffle, %a
2374 define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
2375 ; CHECK-LABEL: test_vmulq_lane_u16_0:
2376 ; CHECK: // %bb.0: // %entry
2377 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
2378 ; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[0]
2381 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
2382 %mul = mul <8 x i16> %shuffle, %a
2386 define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
2387 ; CHECK-LABEL: test_vmul_lane_u32_0:
2388 ; CHECK: // %bb.0: // %entry
2389 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
2390 ; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[0]
2393 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2394 %mul = mul <2 x i32> %shuffle, %a
2398 define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
2399 ; CHECK-LABEL: test_vmulq_lane_u32_0:
2400 ; CHECK: // %bb.0: // %entry
2401 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
2402 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[0]
2405 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
2406 %mul = mul <4 x i32> %shuffle, %a
2410 define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
2411 ; CHECK-LABEL: test_vmul_laneq_s16_0:
2412 ; CHECK: // %bb.0: // %entry
2413 ; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[0]
2416 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2417 %mul = mul <4 x i16> %shuffle, %a
2421 define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
2422 ; CHECK-LABEL: test_vmulq_laneq_s16_0:
2423 ; CHECK: // %bb.0: // %entry
2424 ; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[0]
2427 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
2428 %mul = mul <8 x i16> %shuffle, %a
2432 define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
2433 ; CHECK-LABEL: test_vmul_laneq_s32_0:
2434 ; CHECK: // %bb.0: // %entry
2435 ; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[0]
2438 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2439 %mul = mul <2 x i32> %shuffle, %a
2443 define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
2444 ; CHECK-LABEL: test_vmulq_laneq_s32_0:
2445 ; CHECK: // %bb.0: // %entry
2446 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[0]
2449 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
2450 %mul = mul <4 x i32> %shuffle, %a
2454 define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
2455 ; CHECK-LABEL: test_vmul_laneq_u16_0:
2456 ; CHECK: // %bb.0: // %entry
2457 ; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[0]
2460 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2461 %mul = mul <4 x i16> %shuffle, %a
2465 define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) {
2466 ; CHECK-LABEL: test_vmulq_laneq_u16_0:
2467 ; CHECK: // %bb.0: // %entry
2468 ; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[0]
2471 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
2472 %mul = mul <8 x i16> %shuffle, %a
2476 define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
2477 ; CHECK-LABEL: test_vmul_laneq_u32_0:
2478 ; CHECK: // %bb.0: // %entry
2479 ; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[0]
2482 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2483 %mul = mul <2 x i32> %shuffle, %a
2487 define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) {
2488 ; CHECK-LABEL: test_vmulq_laneq_u32_0:
2489 ; CHECK: // %bb.0: // %entry
2490 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[0]
2493 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
2494 %mul = mul <4 x i32> %shuffle, %a
2498 define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
2499 ; CHECK-LABEL: test_vfma_lane_f32_0:
2500 ; CHECK: // %bb.0: // %entry
2501 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2502 ; CHECK-NEXT: fmla v0.2s, v1.2s, v2.s[0]
2505 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
2506 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
2510 define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
2511 ; CHECK-LABEL: test_vfmaq_lane_f32_0:
2512 ; CHECK: // %bb.0: // %entry
2513 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2514 ; CHECK-NEXT: fmla v0.4s, v1.4s, v2.s[0]
2517 %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
2518 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
2522 define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
2523 ; CHECK-LABEL: test_vfma_laneq_f32_0:
2524 ; CHECK: // %bb.0: // %entry
2525 ; CHECK-NEXT: fmla v0.2s, v1.2s, v2.s[0]
2528 %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
2529 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
2533 define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
2534 ; CHECK-LABEL: test_vfmaq_laneq_f32_0:
2535 ; CHECK: // %bb.0: // %entry
2536 ; CHECK-NEXT: fmla v0.4s, v1.4s, v2.s[0]
2539 %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
2540 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
2544 define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
2545 ; CHECK-LABEL: test_vfms_lane_f32_0:
2546 ; CHECK: // %bb.0: // %entry
2547 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2548 ; CHECK-NEXT: fmls v0.2s, v1.2s, v2.s[0]
2551 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
2552 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer
2553 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
2557 define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
2558 ; CHECK-LABEL: test_vfmsq_lane_f32_0:
2559 ; CHECK: // %bb.0: // %entry
2560 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2561 ; CHECK-NEXT: fmls v0.4s, v1.4s, v2.s[0]
2564 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
2565 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer
2566 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
2570 define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
2571 ; CHECK-LABEL: test_vfms_laneq_f32_0:
2572 ; CHECK: // %bb.0: // %entry
2573 ; CHECK-NEXT: fmls v0.2s, v1.2s, v2.s[0]
2576 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
2577 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer
2578 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
2582 define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
2583 ; CHECK-LABEL: test_vfmsq_laneq_f32_0:
2584 ; CHECK: // %bb.0: // %entry
2585 ; CHECK-NEXT: fmls v0.4s, v1.4s, v2.s[0]
2588 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
2589 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer
2590 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
2594 define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
2595 ; CHECK-LABEL: test_vfmaq_laneq_f64_0:
2596 ; CHECK: // %bb.0: // %entry
2597 ; CHECK-NEXT: fmla v0.2d, v1.2d, v2.d[0]
2600 %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
2601 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
2605 define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
2606 ; CHECK-LABEL: test_vfmsq_laneq_f64_0:
2607 ; CHECK: // %bb.0: // %entry
2608 ; CHECK-NEXT: fmls v0.2d, v1.2d, v2.d[0]
2611 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
2612 %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer
2613 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
2617 define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
2618 ; CHECK-LABEL: test_vmlal_lane_s16_0:
2619 ; CHECK: // %bb.0: // %entry
2620 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2621 ; CHECK-NEXT: smlal v0.4s, v1.4h, v2.h[0]
2624 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2625 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2626 %add = add <4 x i32> %vmull2.i, %a
2630 define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
2631 ; CHECK-LABEL: test_vmlal_lane_s32_0:
2632 ; CHECK: // %bb.0: // %entry
2633 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2634 ; CHECK-NEXT: smlal v0.2d, v1.2s, v2.s[0]
2637 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2638 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2639 %add = add <2 x i64> %vmull2.i, %a
2643 define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
2644 ; CHECK-LABEL: test_vmlal_laneq_s16_0:
2645 ; CHECK: // %bb.0: // %entry
2646 ; CHECK-NEXT: smlal v0.4s, v1.4h, v2.h[0]
2649 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2650 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2651 %add = add <4 x i32> %vmull2.i, %a
2655 define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
2656 ; CHECK-LABEL: test_vmlal_laneq_s32_0:
2657 ; CHECK: // %bb.0: // %entry
2658 ; CHECK-NEXT: smlal v0.2d, v1.2s, v2.s[0]
2661 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2662 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2663 %add = add <2 x i64> %vmull2.i, %a
2667 define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
2668 ; CHECK-LABEL: test_vmlal_high_lane_s16_0:
2669 ; CHECK: // %bb.0: // %entry
2670 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2671 ; CHECK-NEXT: smlal2 v0.4s, v1.8h, v2.h[0]
2674 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2675 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2676 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2677 %add = add <4 x i32> %vmull2.i, %a
2681 define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
2682 ; CHECK-LABEL: test_vmlal_high_lane_s32_0:
2683 ; CHECK: // %bb.0: // %entry
2684 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2685 ; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.s[0]
2688 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2689 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2690 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2691 %add = add <2 x i64> %vmull2.i, %a
2695 define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
2696 ; CHECK-LABEL: test_vmlal_high_laneq_s16_0:
2697 ; CHECK: // %bb.0: // %entry
2698 ; CHECK-NEXT: smlal2 v0.4s, v1.8h, v2.h[0]
2701 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2702 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2703 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2704 %add = add <4 x i32> %vmull2.i, %a
2708 define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
2709 ; CHECK-LABEL: test_vmlal_high_laneq_s32_0:
2710 ; CHECK: // %bb.0: // %entry
2711 ; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.s[0]
2714 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2715 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2716 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2717 %add = add <2 x i64> %vmull2.i, %a
2721 define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
2722 ; CHECK-LABEL: test_vmlsl_lane_s16_0:
2723 ; CHECK: // %bb.0: // %entry
2724 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2725 ; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.h[0]
2728 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2729 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2730 %sub = sub <4 x i32> %a, %vmull2.i
2734 define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
2735 ; CHECK-LABEL: test_vmlsl_lane_s32_0:
2736 ; CHECK: // %bb.0: // %entry
2737 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2738 ; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.s[0]
2741 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2742 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2743 %sub = sub <2 x i64> %a, %vmull2.i
2747 define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
2748 ; CHECK-LABEL: test_vmlsl_laneq_s16_0:
2749 ; CHECK: // %bb.0: // %entry
2750 ; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.h[0]
2753 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2754 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2755 %sub = sub <4 x i32> %a, %vmull2.i
2759 define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
2760 ; CHECK-LABEL: test_vmlsl_laneq_s32_0:
2761 ; CHECK: // %bb.0: // %entry
2762 ; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.s[0]
2765 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2766 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2767 %sub = sub <2 x i64> %a, %vmull2.i
2771 define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
2772 ; CHECK-LABEL: test_vmlsl_high_lane_s16_0:
2773 ; CHECK: // %bb.0: // %entry
2774 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2775 ; CHECK-NEXT: smlsl2 v0.4s, v1.8h, v2.h[0]
2778 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2779 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2780 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2781 %sub = sub <4 x i32> %a, %vmull2.i
2785 define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
2786 ; CHECK-LABEL: test_vmlsl_high_lane_s32_0:
2787 ; CHECK: // %bb.0: // %entry
2788 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2789 ; CHECK-NEXT: smlsl2 v0.2d, v1.4s, v2.s[0]
2792 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2793 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2794 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2795 %sub = sub <2 x i64> %a, %vmull2.i
2799 define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
2800 ; CHECK-LABEL: test_vmlsl_high_laneq_s16_0:
2801 ; CHECK: // %bb.0: // %entry
2802 ; CHECK-NEXT: smlsl2 v0.4s, v1.8h, v2.h[0]
2805 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2806 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2807 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2808 %sub = sub <4 x i32> %a, %vmull2.i
2812 define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
2813 ; CHECK-LABEL: test_vmlsl_high_laneq_s32_0:
2814 ; CHECK: // %bb.0: // %entry
2815 ; CHECK-NEXT: smlsl2 v0.2d, v1.4s, v2.s[0]
2818 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2819 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2820 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2821 %sub = sub <2 x i64> %a, %vmull2.i
2825 define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
2826 ; CHECK-LABEL: test_vmlal_lane_u16_0:
2827 ; CHECK: // %bb.0: // %entry
2828 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2829 ; CHECK-NEXT: umlal v0.4s, v1.4h, v2.h[0]
2832 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2833 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2834 %add = add <4 x i32> %vmull2.i, %a
2838 define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
2839 ; CHECK-LABEL: test_vmlal_lane_u32_0:
2840 ; CHECK: // %bb.0: // %entry
2841 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2842 ; CHECK-NEXT: umlal v0.2d, v1.2s, v2.s[0]
2845 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2846 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2847 %add = add <2 x i64> %vmull2.i, %a
2851 define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
2852 ; CHECK-LABEL: test_vmlal_laneq_u16_0:
2853 ; CHECK: // %bb.0: // %entry
2854 ; CHECK-NEXT: umlal v0.4s, v1.4h, v2.h[0]
2857 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2858 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2859 %add = add <4 x i32> %vmull2.i, %a
2863 define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
2864 ; CHECK-LABEL: test_vmlal_laneq_u32_0:
2865 ; CHECK: // %bb.0: // %entry
2866 ; CHECK-NEXT: umlal v0.2d, v1.2s, v2.s[0]
2869 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2870 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2871 %add = add <2 x i64> %vmull2.i, %a
2875 define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
2876 ; CHECK-LABEL: test_vmlal_high_lane_u16_0:
2877 ; CHECK: // %bb.0: // %entry
2878 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2879 ; CHECK-NEXT: umlal2 v0.4s, v1.8h, v2.h[0]
2882 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2883 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2884 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2885 %add = add <4 x i32> %vmull2.i, %a
2889 define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
2890 ; CHECK-LABEL: test_vmlal_high_lane_u32_0:
2891 ; CHECK: // %bb.0: // %entry
2892 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2893 ; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.s[0]
2896 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2897 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2898 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2899 %add = add <2 x i64> %vmull2.i, %a
2903 define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
2904 ; CHECK-LABEL: test_vmlal_high_laneq_u16_0:
2905 ; CHECK: // %bb.0: // %entry
2906 ; CHECK-NEXT: umlal2 v0.4s, v1.8h, v2.h[0]
2909 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2910 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2911 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2912 %add = add <4 x i32> %vmull2.i, %a
2916 define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
2917 ; CHECK-LABEL: test_vmlal_high_laneq_u32_0:
2918 ; CHECK: // %bb.0: // %entry
2919 ; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.s[0]
2922 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2923 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2924 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2925 %add = add <2 x i64> %vmull2.i, %a
2929 define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
2930 ; CHECK-LABEL: test_vmlsl_lane_u16_0:
2931 ; CHECK: // %bb.0: // %entry
2932 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2933 ; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.h[0]
2936 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2937 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2938 %sub = sub <4 x i32> %a, %vmull2.i
2942 define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
2943 ; CHECK-LABEL: test_vmlsl_lane_u32_0:
2944 ; CHECK: // %bb.0: // %entry
2945 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2946 ; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.s[0]
2949 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2950 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2951 %sub = sub <2 x i64> %a, %vmull2.i
2955 define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
2956 ; CHECK-LABEL: test_vmlsl_laneq_u16_0:
2957 ; CHECK: // %bb.0: // %entry
2958 ; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.h[0]
2961 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2962 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2963 %sub = sub <4 x i32> %a, %vmull2.i
2967 define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
2968 ; CHECK-LABEL: test_vmlsl_laneq_u32_0:
2969 ; CHECK: // %bb.0: // %entry
2970 ; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.s[0]
2973 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2974 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2975 %sub = sub <2 x i64> %a, %vmull2.i
2979 define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
2980 ; CHECK-LABEL: test_vmlsl_high_lane_u16_0:
2981 ; CHECK: // %bb.0: // %entry
2982 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2983 ; CHECK-NEXT: umlsl2 v0.4s, v1.8h, v2.h[0]
2986 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2987 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2988 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2989 %sub = sub <4 x i32> %a, %vmull2.i
2993 define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
2994 ; CHECK-LABEL: test_vmlsl_high_lane_u32_0:
2995 ; CHECK: // %bb.0: // %entry
2996 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
2997 ; CHECK-NEXT: umlsl2 v0.2d, v1.4s, v2.s[0]
3000 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
3001 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3002 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
3003 %sub = sub <2 x i64> %a, %vmull2.i
3007 define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
3008 ; CHECK-LABEL: test_vmlsl_high_laneq_u16_0:
3009 ; CHECK: // %bb.0: // %entry
3010 ; CHECK-NEXT: umlsl2 v0.4s, v1.8h, v2.h[0]
3013 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3014 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
3015 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
3016 %sub = sub <4 x i32> %a, %vmull2.i
3020 define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
3021 ; CHECK-LABEL: test_vmlsl_high_laneq_u32_0:
3022 ; CHECK: // %bb.0: // %entry
3023 ; CHECK-NEXT: umlsl2 v0.2d, v1.4s, v2.s[0]
3026 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
3027 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
3028 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
3029 %sub = sub <2 x i64> %a, %vmull2.i
3033 define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
3034 ; CHECK-LABEL: test_vmull_lane_s16_0:
3035 ; CHECK: // %bb.0: // %entry
3036 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3037 ; CHECK-NEXT: smull v0.4s, v0.4h, v1.h[0]
3040 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3041 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
3042 ret <4 x i32> %vmull2.i
3045 define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
3046 ; CHECK-LABEL: test_vmull_lane_s32_0:
3047 ; CHECK: // %bb.0: // %entry
3048 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3049 ; CHECK-NEXT: smull v0.2d, v0.2s, v1.s[0]
3052 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3053 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
3054 ret <2 x i64> %vmull2.i
3057 define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
3058 ; CHECK-LABEL: test_vmull_lane_u16_0:
3059 ; CHECK: // %bb.0: // %entry
3060 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3061 ; CHECK-NEXT: umull v0.4s, v0.4h, v1.h[0]
3064 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3065 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
3066 ret <4 x i32> %vmull2.i
3069 define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
3070 ; CHECK-LABEL: test_vmull_lane_u32_0:
3071 ; CHECK: // %bb.0: // %entry
3072 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3073 ; CHECK-NEXT: umull v0.2d, v0.2s, v1.s[0]
3076 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3077 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
3078 ret <2 x i64> %vmull2.i
3081 define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
3082 ; CHECK-LABEL: test_vmull_high_lane_s16_0:
3083 ; CHECK: // %bb.0: // %entry
3084 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3085 ; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.h[0]
3088 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3089 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3090 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
3091 ret <4 x i32> %vmull2.i
3094 define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
3095 ; CHECK-LABEL: test_vmull_high_lane_s32_0:
3096 ; CHECK: // %bb.0: // %entry
3097 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3098 ; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.s[0]
3101 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
3102 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3103 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
3104 ret <2 x i64> %vmull2.i
3107 define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
3108 ; CHECK-LABEL: test_vmull_high_lane_u16_0:
3109 ; CHECK: // %bb.0: // %entry
3110 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3111 ; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.h[0]
3114 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3115 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3116 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
3117 ret <4 x i32> %vmull2.i
3120 define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
3121 ; CHECK-LABEL: test_vmull_high_lane_u32_0:
3122 ; CHECK: // %bb.0: // %entry
3123 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3124 ; CHECK-NEXT: umull2 v0.2d, v0.4s, v1.s[0]
3127 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
3128 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3129 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
3130 ret <2 x i64> %vmull2.i
3133 define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
3134 ; CHECK-LABEL: test_vmull_laneq_s16_0:
3135 ; CHECK: // %bb.0: // %entry
3136 ; CHECK-NEXT: smull v0.4s, v0.4h, v1.h[0]
3139 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
3140 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
3141 ret <4 x i32> %vmull2.i
3144 define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
3145 ; CHECK-LABEL: test_vmull_laneq_s32_0:
3146 ; CHECK: // %bb.0: // %entry
3147 ; CHECK-NEXT: smull v0.2d, v0.2s, v1.s[0]
3150 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
3151 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
3152 ret <2 x i64> %vmull2.i
3155 define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
3156 ; CHECK-LABEL: test_vmull_laneq_u16_0:
3157 ; CHECK: // %bb.0: // %entry
3158 ; CHECK-NEXT: umull v0.4s, v0.4h, v1.h[0]
3161 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
3162 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
3163 ret <4 x i32> %vmull2.i
3166 define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
3167 ; CHECK-LABEL: test_vmull_laneq_u32_0:
3168 ; CHECK: // %bb.0: // %entry
3169 ; CHECK-NEXT: umull v0.2d, v0.2s, v1.s[0]
3172 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
3173 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
3174 ret <2 x i64> %vmull2.i
3177 define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
3178 ; CHECK-LABEL: test_vmull_high_laneq_s16_0:
3179 ; CHECK: // %bb.0: // %entry
3180 ; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.h[0]
3183 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3184 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
3185 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
3186 ret <4 x i32> %vmull2.i
3189 define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
3190 ; CHECK-LABEL: test_vmull_high_laneq_s32_0:
3191 ; CHECK: // %bb.0: // %entry
3192 ; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.s[0]
3195 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
3196 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
3197 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
3198 ret <2 x i64> %vmull2.i
3201 define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) {
3202 ; CHECK-LABEL: test_vmull_high_laneq_u16_0:
3203 ; CHECK: // %bb.0: // %entry
3204 ; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.h[0]
3207 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3208 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
3209 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
3210 ret <4 x i32> %vmull2.i
3213 define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) {
3214 ; CHECK-LABEL: test_vmull_high_laneq_u32_0:
3215 ; CHECK: // %bb.0: // %entry
3216 ; CHECK-NEXT: umull2 v0.2d, v0.4s, v1.s[0]
3219 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
3220 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
3221 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
3222 ret <2 x i64> %vmull2.i
3225 define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
3226 ; CHECK-LABEL: test_vqdmlal_lane_s16_0:
3227 ; CHECK: // %bb.0: // %entry
3228 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
3229 ; CHECK-NEXT: sqdmlal v0.4s, v1.4h, v2.h[0]
3232 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3233 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
3234 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
3235 ret <4 x i32> %vqdmlal4.i
3238 define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
3239 ; CHECK-LABEL: test_vqdmlal_lane_s32_0:
3240 ; CHECK: // %bb.0: // %entry
3241 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
3242 ; CHECK-NEXT: sqdmlal v0.2d, v1.2s, v2.s[0]
3245 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3246 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
3247 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
3248 ret <2 x i64> %vqdmlal4.i
3251 define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
3252 ; CHECK-LABEL: test_vqdmlal_high_lane_s16_0:
3253 ; CHECK: // %bb.0: // %entry
3254 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
3255 ; CHECK-NEXT: sqdmlal2 v0.4s, v1.8h, v2.h[0]
3258 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3259 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3260 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
3261 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
3262 ret <4 x i32> %vqdmlal4.i
3265 define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
3266 ; CHECK-LABEL: test_vqdmlal_high_lane_s32_0:
3267 ; CHECK: // %bb.0: // %entry
3268 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
3269 ; CHECK-NEXT: sqdmlal2 v0.2d, v1.4s, v2.s[0]
3272 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
3273 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3274 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
3275 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
3276 ret <2 x i64> %vqdmlal4.i
3279 define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
3280 ; CHECK-LABEL: test_vqdmlsl_lane_s16_0:
3281 ; CHECK: // %bb.0: // %entry
3282 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
3283 ; CHECK-NEXT: sqdmlsl v0.4s, v1.4h, v2.h[0]
3286 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3287 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
3288 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
3289 ret <4 x i32> %vqdmlsl4.i
3292 define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
3293 ; CHECK-LABEL: test_vqdmlsl_lane_s32_0:
3294 ; CHECK: // %bb.0: // %entry
3295 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
3296 ; CHECK-NEXT: sqdmlsl v0.2d, v1.2s, v2.s[0]
3299 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3300 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
3301 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
3302 ret <2 x i64> %vqdmlsl4.i
3305 define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
3306 ; CHECK-LABEL: test_vqdmlsl_high_lane_s16_0:
3307 ; CHECK: // %bb.0: // %entry
3308 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
3309 ; CHECK-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.h[0]
3312 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3313 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3314 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
3315 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
3316 ret <4 x i32> %vqdmlsl4.i
3319 define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
3320 ; CHECK-LABEL: test_vqdmlsl_high_lane_s32_0:
3321 ; CHECK: // %bb.0: // %entry
3322 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
3323 ; CHECK-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.s[0]
3326 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
3327 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3328 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
3329 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
3330 ret <2 x i64> %vqdmlsl4.i
3333 define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
3334 ; CHECK-LABEL: test_vqdmull_lane_s16_0:
3335 ; CHECK: // %bb.0: // %entry
3336 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3337 ; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.h[0]
3340 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3341 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
3342 ret <4 x i32> %vqdmull2.i
3345 define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
3346 ; CHECK-LABEL: test_vqdmull_lane_s32_0:
3347 ; CHECK: // %bb.0: // %entry
3348 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3349 ; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.s[0]
3352 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3353 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
3354 ret <2 x i64> %vqdmull2.i
3357 define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
3358 ; CHECK-LABEL: test_vqdmull_laneq_s16_0:
3359 ; CHECK: // %bb.0: // %entry
3360 ; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.h[0]
3363 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
3364 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
3365 ret <4 x i32> %vqdmull2.i
3368 define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
3369 ; CHECK-LABEL: test_vqdmull_laneq_s32_0:
3370 ; CHECK: // %bb.0: // %entry
3371 ; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.s[0]
3374 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
3375 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
3376 ret <2 x i64> %vqdmull2.i
3379 define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
3380 ; CHECK-LABEL: test_vqdmull_high_lane_s16_0:
3381 ; CHECK: // %bb.0: // %entry
3382 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3383 ; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[0]
3386 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3387 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3388 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
3389 ret <4 x i32> %vqdmull2.i
3392 define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
3393 ; CHECK-LABEL: test_vqdmull_high_lane_s32_0:
3394 ; CHECK: // %bb.0: // %entry
3395 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3396 ; CHECK-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[0]
3399 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
3400 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3401 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
3402 ret <2 x i64> %vqdmull2.i
3405 define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
3406 ; CHECK-LABEL: test_vqdmull_high_laneq_s16_0:
3407 ; CHECK: // %bb.0: // %entry
3408 ; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[0]
3411 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3412 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
3413 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
3414 ret <4 x i32> %vqdmull2.i
3417 define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
3418 ; CHECK-LABEL: test_vqdmull_high_laneq_s32_0:
3419 ; CHECK: // %bb.0: // %entry
3420 ; CHECK-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[0]
3423 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
3424 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
3425 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
3426 ret <2 x i64> %vqdmull2.i
3429 define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
3430 ; CHECK-LABEL: test_vqdmulh_lane_s16_0:
3431 ; CHECK: // %bb.0: // %entry
3432 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3433 ; CHECK-NEXT: sqdmulh v0.4h, v0.4h, v1.h[0]
3436 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3437 %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
3438 ret <4 x i16> %vqdmulh2.i
3441 define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
3442 ; CHECK-LABEL: test_vqdmulhq_lane_s16_0:
3443 ; CHECK: // %bb.0: // %entry
3444 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3445 ; CHECK-NEXT: sqdmulh v0.8h, v0.8h, v1.h[0]
3448 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
3449 %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
3450 ret <8 x i16> %vqdmulh2.i
3453 define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
3454 ; CHECK-LABEL: test_vqdmulh_lane_s32_0:
3455 ; CHECK: // %bb.0: // %entry
3456 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3457 ; CHECK-NEXT: sqdmulh v0.2s, v0.2s, v1.s[0]
3460 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3461 %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
3462 ret <2 x i32> %vqdmulh2.i
3465 define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
3466 ; CHECK-LABEL: test_vqdmulhq_lane_s32_0:
3467 ; CHECK: // %bb.0: // %entry
3468 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3469 ; CHECK-NEXT: sqdmulh v0.4s, v0.4s, v1.s[0]
3472 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
3473 %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
3474 ret <4 x i32> %vqdmulh2.i
3477 define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
3478 ; CHECK-LABEL: test_vqrdmulh_lane_s16_0:
3479 ; CHECK: // %bb.0: // %entry
3480 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3481 ; CHECK-NEXT: sqrdmulh v0.4h, v0.4h, v1.h[0]
3484 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
3485 %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
3486 ret <4 x i16> %vqrdmulh2.i
3489 define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
3490 ; CHECK-LABEL: test_vqrdmulhq_lane_s16_0:
3491 ; CHECK: // %bb.0: // %entry
3492 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3493 ; CHECK-NEXT: sqrdmulh v0.8h, v0.8h, v1.h[0]
3496 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
3497 %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
3498 ret <8 x i16> %vqrdmulh2.i
3501 define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
3502 ; CHECK-LABEL: test_vqrdmulh_lane_s32_0:
3503 ; CHECK: // %bb.0: // %entry
3504 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3505 ; CHECK-NEXT: sqrdmulh v0.2s, v0.2s, v1.s[0]
3508 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
3509 %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
3510 ret <2 x i32> %vqrdmulh2.i
3513 define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
3514 ; CHECK-LABEL: test_vqrdmulhq_lane_s32_0:
3515 ; CHECK: // %bb.0: // %entry
3516 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3517 ; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.s[0]
3520 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
3521 %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
3522 ret <4 x i32> %vqrdmulh2.i
3525 define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) {
3526 ; CHECK-LABEL: test_vmul_lane_f32_0:
3527 ; CHECK: // %bb.0: // %entry
3528 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3529 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[0]
3532 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
3533 %mul = fmul <2 x float> %shuffle, %a
3534 ret <2 x float> %mul
3537 define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
3538 ; CHECK-LABEL: test_vmulq_lane_f32_0:
3539 ; CHECK: // %bb.0: // %entry
3540 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3541 ; CHECK-NEXT: fmul v0.4s, v0.4s, v1.s[0]
3544 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
3545 %mul = fmul <4 x float> %shuffle, %a
3546 ret <4 x float> %mul
3549 define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
3550 ; CHECK-LABEL: test_vmul_laneq_f32_0:
3551 ; CHECK: // %bb.0: // %entry
3552 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[0]
3555 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
3556 %mul = fmul <2 x float> %shuffle, %a
3557 ret <2 x float> %mul
3560 define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) {
3561 ; CHECK-LABEL: test_vmul_laneq_f64_0:
3562 ; CHECK: // %bb.0: // %entry
3563 ; CHECK-NEXT: fmul d0, d0, v1.d[0]
3566 %0 = bitcast <1 x double> %a to <8 x i8>
3567 %1 = bitcast <8 x i8> %0 to double
3568 %extract = extractelement <2 x double> %v, i32 0
3569 %2 = fmul double %1, %extract
3570 %3 = insertelement <1 x double> undef, double %2, i32 0
3574 define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) {
3575 ; CHECK-LABEL: test_vmulq_laneq_f32_0:
3576 ; CHECK: // %bb.0: // %entry
3577 ; CHECK-NEXT: fmul v0.4s, v0.4s, v1.s[0]
3580 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
3581 %mul = fmul <4 x float> %shuffle, %a
3582 ret <4 x float> %mul
3585 define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
3586 ; CHECK-LABEL: test_vmulq_laneq_f64_0:
3587 ; CHECK: // %bb.0: // %entry
3588 ; CHECK-NEXT: fmul v0.2d, v0.2d, v1.d[0]
3591 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
3592 %mul = fmul <2 x double> %shuffle, %a
3593 ret <2 x double> %mul
3596 define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) {
3597 ; CHECK-LABEL: test_vmulx_lane_f32_0:
3598 ; CHECK: // %bb.0: // %entry
3599 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3600 ; CHECK-NEXT: fmulx v0.2s, v0.2s, v1.s[0]
3603 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
3604 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
3605 ret <2 x float> %vmulx2.i
3608 define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
3609 ; CHECK-LABEL: test_vmulxq_lane_f32_0:
3610 ; CHECK: // %bb.0: // %entry
3611 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3612 ; CHECK-NEXT: fmulx v0.4s, v0.4s, v1.s[0]
3615 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
3616 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
3617 ret <4 x float> %vmulx2.i
3620 define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) {
3621 ; CHECK-LABEL: test_vmulxq_lane_f64_0:
3622 ; CHECK: // %bb.0: // %entry
3623 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
3624 ; CHECK-NEXT: fmulx v0.2d, v0.2d, v1.d[0]
3627 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
3628 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
3629 ret <2 x double> %vmulx2.i
3632 define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
3633 ; CHECK-LABEL: test_vmulx_laneq_f32_0:
3634 ; CHECK: // %bb.0: // %entry
3635 ; CHECK-NEXT: fmulx v0.2s, v0.2s, v1.s[0]
3638 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
3639 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
3640 ret <2 x float> %vmulx2.i
3643 define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) {
3644 ; CHECK-LABEL: test_vmulxq_laneq_f32_0:
3645 ; CHECK: // %bb.0: // %entry
3646 ; CHECK-NEXT: fmulx v0.4s, v0.4s, v1.s[0]
3649 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
3650 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
3651 ret <4 x float> %vmulx2.i
3654 define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
3655 ; CHECK-LABEL: test_vmulxq_laneq_f64_0:
3656 ; CHECK: // %bb.0: // %entry
3657 ; CHECK-NEXT: fmulx v0.2d, v0.2d, v1.d[0]
3660 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
3661 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
3662 ret <2 x double> %vmulx2.i
3665 define <4 x float> @optimize_dup(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %v) {
3666 ; CHECK-LABEL: optimize_dup:
3667 ; CHECK: // %bb.0: // %entry
3668 ; CHECK-NEXT: fmla v0.4s, v1.4s, v3.s[3]
3669 ; CHECK-NEXT: fmls v0.4s, v2.4s, v3.s[3]
3672 %lane1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
3673 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane1, <4 x float> %b, <4 x float> %a)
3674 %lane2 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
3675 %1 = fmul <4 x float> %lane2, %c
3676 %s = fsub <4 x float> %0, %1
3680 define <4 x float> @no_optimize_dup(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %v) {
3681 ; CHECK-LABEL: no_optimize_dup:
3682 ; CHECK: // %bb.0: // %entry
3683 ; CHECK-NEXT: fmla v0.4s, v1.4s, v3.s[3]
3684 ; CHECK-NEXT: fmls v0.4s, v2.4s, v3.s[1]
3687 %lane1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
3688 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane1, <4 x float> %b, <4 x float> %a)
3689 %lane2 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
3690 %1 = fmul <4 x float> %lane2, %c
3691 %s = fsub <4 x float> %0, %1
3695 define <2 x float> @test_vfma_lane_simdinstr_opt_pass_caching_a57(<2 x float> %a, <2 x float> %b, <2 x float> %v) "target-cpu"="cortex-a57" {
3696 ; CHECK-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_a57:
3697 ; CHECK: // %bb.0: // %entry
3698 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
3699 ; CHECK-NEXT: fmla v0.2s, v1.2s, v2.s[1]
3702 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
3703 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
3707 define <2 x float> @test_vfma_lane_simdinstr_opt_pass_caching_m3(<2 x float> %a, <2 x float> %b, <2 x float> %v) "target-cpu"="exynos-m3" {
3708 ; CHECK-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_m3:
3709 ; CHECK: // %bb.0: // %entry
3710 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
3711 ; CHECK-NEXT: fmla v0.2s, v1.2s, v2.s[1]
3714 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
3715 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)