1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s
4 target triple = "aarch64"
6 ; Expected to transform
7 define <4 x float> @mul_mul(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
8 ; CHECK-LABEL: mul_mul:
9 ; CHECK: // %bb.0: // %entry
10 ; CHECK-NEXT: movi v4.2d, #0000000000000000
11 ; CHECK-NEXT: movi v3.2d, #0000000000000000
12 ; CHECK-NEXT: fcmla v4.4s, v1.4s, v0.4s, #0
13 ; CHECK-NEXT: fcmla v4.4s, v1.4s, v0.4s, #90
14 ; CHECK-NEXT: fcmla v3.4s, v2.4s, v4.4s, #0
15 ; CHECK-NEXT: fcmla v3.4s, v2.4s, v4.4s, #90
16 ; CHECK-NEXT: mov v0.16b, v3.16b
19 %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
20 %strided.vec151 = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
21 %strided.vec153 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
22 %strided.vec154 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
23 %0 = fmul fast <2 x float> %strided.vec154, %strided.vec151
24 %1 = fmul fast <2 x float> %strided.vec153, %strided.vec
25 %2 = fmul fast <2 x float> %strided.vec154, %strided.vec
26 %3 = fmul fast <2 x float> %strided.vec153, %strided.vec151
27 %4 = fadd fast <2 x float> %3, %2
28 %5 = fsub fast <2 x float> %1, %0
29 %strided.vec156 = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2>
30 %strided.vec157 = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3>
31 %6 = fmul fast <2 x float> %4, %strided.vec156
32 %7 = fmul fast <2 x float> %5, %strided.vec157
33 %8 = fadd fast <2 x float> %6, %7
34 %9 = fmul fast <2 x float> %strided.vec156, %5
35 %10 = fmul fast <2 x float> %4, %strided.vec157
36 %11 = fsub fast <2 x float> %9, %10
37 %interleaved.vec = shufflevector <2 x float> %11, <2 x float> %8, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
38 ret <4 x float> %interleaved.vec
41 ; Expected to not transform
42 define <4 x float> @add_mul(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
43 ; CHECK-LABEL: add_mul:
44 ; CHECK: // %bb.0: // %entry
45 ; CHECK-NEXT: fsub v0.4s, v1.4s, v0.4s
46 ; CHECK-NEXT: fsub v1.4s, v1.4s, v2.4s
47 ; CHECK-NEXT: ext v3.16b, v2.16b, v2.16b, #8
48 ; CHECK-NEXT: ext v4.16b, v0.16b, v0.16b, #8
49 ; CHECK-NEXT: ext v5.16b, v1.16b, v1.16b, #8
50 ; CHECK-NEXT: zip2 v0.2s, v0.2s, v4.2s
51 ; CHECK-NEXT: zip2 v4.2s, v2.2s, v3.2s
52 ; CHECK-NEXT: zip1 v1.2s, v1.2s, v5.2s
53 ; CHECK-NEXT: zip1 v2.2s, v2.2s, v3.2s
54 ; CHECK-NEXT: fmul v5.2s, v4.2s, v0.2s
55 ; CHECK-NEXT: fmul v3.2s, v1.2s, v4.2s
56 ; CHECK-NEXT: fneg v4.2s, v5.2s
57 ; CHECK-NEXT: fmla v3.2s, v0.2s, v2.2s
58 ; CHECK-NEXT: fmla v4.2s, v1.2s, v2.2s
59 ; CHECK-NEXT: zip1 v0.4s, v4.4s, v3.4s
62 %0 = fsub fast <4 x float> %b, %c
63 %1 = shufflevector <4 x float> %0, <4 x float> poison, <2 x i32> <i32 0, i32 2>
64 %strided.vec58 = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2>
65 %strided.vec59 = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3>
66 %2 = fmul fast <2 x float> %1, %strided.vec59
67 %3 = fsub fast <4 x float> %b, %a
68 %4 = shufflevector <4 x float> %3, <4 x float> poison, <2 x i32> <i32 1, i32 3>
69 %5 = fmul fast <2 x float> %strided.vec58, %4
70 %6 = fadd fast <2 x float> %5, %2
71 %7 = fmul fast <2 x float> %strided.vec58, %1
72 %8 = fmul fast <2 x float> %strided.vec59, %4
73 %9 = fsub fast <2 x float> %7, %8
74 %interleaved.vec = shufflevector <2 x float> %9, <2 x float> %6, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
75 ret <4 x float> %interleaved.vec
78 ; Expected to not transform
79 define <4 x float> @mul_mul270_mul(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
80 ; CHECK-LABEL: mul_mul270_mul:
81 ; CHECK: // %bb.0: // %entry
82 ; CHECK-NEXT: ext v3.16b, v2.16b, v2.16b, #8
83 ; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8
84 ; CHECK-NEXT: zip1 v5.2s, v2.2s, v3.2s
85 ; CHECK-NEXT: zip1 v6.2s, v1.2s, v4.2s
86 ; CHECK-NEXT: zip2 v2.2s, v2.2s, v3.2s
87 ; CHECK-NEXT: zip2 v1.2s, v1.2s, v4.2s
88 ; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8
89 ; CHECK-NEXT: fmul v7.2s, v6.2s, v5.2s
90 ; CHECK-NEXT: fneg v4.2s, v7.2s
91 ; CHECK-NEXT: zip2 v7.2s, v0.2s, v3.2s
92 ; CHECK-NEXT: zip1 v0.2s, v0.2s, v3.2s
93 ; CHECK-NEXT: fmla v4.2s, v2.2s, v1.2s
94 ; CHECK-NEXT: fmul v1.2s, v1.2s, v5.2s
95 ; CHECK-NEXT: fmul v3.2s, v4.2s, v7.2s
96 ; CHECK-NEXT: fmla v1.2s, v2.2s, v6.2s
97 ; CHECK-NEXT: fmul v2.2s, v4.2s, v0.2s
98 ; CHECK-NEXT: fneg v3.2s, v3.2s
99 ; CHECK-NEXT: fmla v2.2s, v7.2s, v1.2s
100 ; CHECK-NEXT: fmla v3.2s, v0.2s, v1.2s
101 ; CHECK-NEXT: zip1 v0.4s, v3.4s, v2.4s
104 %strided.vec = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2>
105 %strided.vec81 = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3>
106 %strided.vec83 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
107 %strided.vec84 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
108 %0 = fmul fast <2 x float> %strided.vec84, %strided.vec
109 %1 = fmul fast <2 x float> %strided.vec83, %strided.vec81
110 %2 = fadd fast <2 x float> %1, %0
111 %strided.vec86 = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
112 %strided.vec87 = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
113 %3 = fmul fast <2 x float> %2, %strided.vec87
114 %4 = fmul fast <2 x float> %strided.vec84, %strided.vec81
115 %5 = fmul fast <2 x float> %strided.vec83, %strided.vec
116 %6 = fsub fast <2 x float> %4, %5
117 %7 = fmul fast <2 x float> %6, %strided.vec86
118 %8 = fadd fast <2 x float> %3, %7
119 %9 = fmul fast <2 x float> %2, %strided.vec86
120 %10 = fmul fast <2 x float> %6, %strided.vec87
121 %11 = fsub fast <2 x float> %9, %10
122 %interleaved.vec = shufflevector <2 x float> %11, <2 x float> %8, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
123 ret <4 x float> %interleaved.vec
127 ; Expected to transform
128 define <4 x float> @mul_triangle(<4 x float> %a, <4 x float> %b) {
129 ; CHECK-LABEL: mul_triangle:
130 ; CHECK: // %bb.0: // %entry
131 ; CHECK-NEXT: movi v3.2d, #0000000000000000
132 ; CHECK-NEXT: movi v2.2d, #0000000000000000
133 ; CHECK-NEXT: fcmla v3.4s, v0.4s, v1.4s, #0
134 ; CHECK-NEXT: fcmla v3.4s, v0.4s, v1.4s, #90
135 ; CHECK-NEXT: fcmla v2.4s, v3.4s, v0.4s, #0
136 ; CHECK-NEXT: fcmla v2.4s, v3.4s, v0.4s, #90
137 ; CHECK-NEXT: mov v0.16b, v2.16b
140 %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
141 %strided.vec35 = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
142 %strided.vec37 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
143 %strided.vec38 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
144 %0 = fmul fast <2 x float> %strided.vec37, %strided.vec
145 %1 = fmul fast <2 x float> %strided.vec38, %strided.vec35
146 %2 = fsub fast <2 x float> %0, %1
147 %3 = fmul fast <2 x float> %2, %strided.vec35
148 %4 = fmul fast <2 x float> %strided.vec38, %strided.vec
149 %5 = fmul fast <2 x float> %strided.vec35, %strided.vec37
150 %6 = fadd fast <2 x float> %4, %5
151 %7 = fmul fast <2 x float> %6, %strided.vec
152 %8 = fadd fast <2 x float> %3, %7
153 %9 = fmul fast <2 x float> %2, %strided.vec
154 %10 = fmul fast <2 x float> %6, %strided.vec35
155 %11 = fsub fast <2 x float> %9, %10
156 %interleaved.vec = shufflevector <2 x float> %11, <2 x float> %8, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
157 ret <4 x float> %interleaved.vec
161 ; d * (b * a) * (c * a)
162 ; Expected to transform
163 define <4 x float> @mul_diamond(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d) {
164 ; CHECK-LABEL: mul_diamond:
165 ; CHECK: // %bb.0: // %entry
166 ; CHECK-NEXT: movi v4.2d, #0000000000000000
167 ; CHECK-NEXT: movi v5.2d, #0000000000000000
168 ; CHECK-NEXT: movi v6.2d, #0000000000000000
169 ; CHECK-NEXT: fcmla v4.4s, v0.4s, v1.4s, #0
170 ; CHECK-NEXT: fcmla v6.4s, v0.4s, v2.4s, #0
171 ; CHECK-NEXT: fcmla v4.4s, v0.4s, v1.4s, #90
172 ; CHECK-NEXT: movi v1.2d, #0000000000000000
173 ; CHECK-NEXT: fcmla v6.4s, v0.4s, v2.4s, #90
174 ; CHECK-NEXT: fcmla v5.4s, v3.4s, v4.4s, #0
175 ; CHECK-NEXT: fcmla v5.4s, v3.4s, v4.4s, #90
176 ; CHECK-NEXT: fcmla v1.4s, v5.4s, v6.4s, #0
177 ; CHECK-NEXT: fcmla v1.4s, v5.4s, v6.4s, #90
178 ; CHECK-NEXT: mov v0.16b, v1.16b
181 %a.real = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
182 %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
183 %b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
184 %b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
185 %c.real = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2>
186 %c.imag = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3>
187 %d.real = shufflevector <4 x float> %d, <4 x float> poison, <2 x i32> <i32 0, i32 2>
188 %d.imag = shufflevector <4 x float> %d, <4 x float> poison, <2 x i32> <i32 1, i32 3>
189 %0 = fmul fast <2 x float> %a.imag, %b.real
190 %1 = fmul fast <2 x float> %a.real, %b.imag
191 %2 = fadd fast <2 x float> %1, %0
192 %3 = fmul fast <2 x float> %a.real, %b.real
193 %4 = fmul fast <2 x float> %b.imag, %a.imag
194 %5 = fsub fast <2 x float> %3, %4
195 %6 = fmul fast <2 x float> %d.real, %5
196 %7 = fmul fast <2 x float> %2, %d.imag
197 %8 = fmul fast <2 x float> %d.real, %2
198 %9 = fmul fast <2 x float> %5, %d.imag
199 %10 = fsub fast <2 x float> %6, %7
200 %11 = fadd fast <2 x float> %8, %9
201 %12 = fmul fast <2 x float> %c.real, %a.imag
202 %13 = fmul fast <2 x float> %c.imag, %a.real
203 %14 = fadd fast <2 x float> %13, %12
204 %15 = fmul fast <2 x float> %14, %10
205 %16 = fmul fast <2 x float> %c.real, %a.real
206 %17 = fmul fast <2 x float> %c.imag, %a.imag
207 %18 = fsub fast <2 x float> %16, %17
208 %19 = fmul fast <2 x float> %18, %11
209 %20 = fadd fast <2 x float> %15, %19
210 %21 = fmul fast <2 x float> %18, %10
211 %22 = fmul fast <2 x float> %14, %11
212 %23 = fsub fast <2 x float> %21, %22
213 %interleaved.vec = shufflevector <2 x float> %23, <2 x float> %20, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
214 ret <4 x float> %interleaved.vec
217 ; Expected to transform
218 define <4 x float> @mul_add90_mul(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
219 ; CHECK-LABEL: mul_add90_mul:
220 ; CHECK: // %bb.0: // %entry
221 ; CHECK-NEXT: movi v3.2d, #0000000000000000
222 ; CHECK-NEXT: movi v4.2d, #0000000000000000
223 ; CHECK-NEXT: fcmla v4.4s, v0.4s, v2.4s, #0
224 ; CHECK-NEXT: fcmla v3.4s, v0.4s, v1.4s, #0
225 ; CHECK-NEXT: fcmla v4.4s, v0.4s, v2.4s, #90
226 ; CHECK-NEXT: fcmla v3.4s, v0.4s, v1.4s, #90
227 ; CHECK-NEXT: fcadd v0.4s, v4.4s, v3.4s, #90
230 %ar = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
231 %ai = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
232 %br = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
233 %bi = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
234 %cr = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2>
235 %ci = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3>
237 %i6 = fmul fast <2 x float> %br, %ar
238 %i7 = fmul fast <2 x float> %bi, %ai
239 %xr = fsub fast <2 x float> %i6, %i7
240 %i9 = fmul fast <2 x float> %bi, %ar
241 %i10 = fmul fast <2 x float> %br, %ai
242 %xi = fadd fast <2 x float> %i9, %i10
244 %j6 = fmul fast <2 x float> %cr, %ar
245 %j7 = fmul fast <2 x float> %ci, %ai
246 %yr = fsub fast <2 x float> %j6, %j7
247 %j9 = fmul fast <2 x float> %ci, %ar
248 %j10 = fmul fast <2 x float> %cr, %ai
249 %yi = fadd fast <2 x float> %j9, %j10
251 %zr = fsub fast <2 x float> %yr, %xi
252 %zi = fadd fast <2 x float> %yi, %xr
253 %interleaved.vec = shufflevector <2 x float> %zr, <2 x float> %zi, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
254 ret <4 x float> %interleaved.vec
257 ; Expected to not transform
258 define <4 x float> @mul_triangle_addmul(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
259 ; CHECK-LABEL: mul_triangle_addmul:
260 ; CHECK: // %bb.0: // %entry
261 ; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8
262 ; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8
263 ; CHECK-NEXT: zip1 v5.2s, v0.2s, v3.2s
264 ; CHECK-NEXT: zip1 v6.2s, v1.2s, v4.2s
265 ; CHECK-NEXT: zip2 v1.2s, v1.2s, v4.2s
266 ; CHECK-NEXT: ext v4.16b, v2.16b, v2.16b, #8
267 ; CHECK-NEXT: zip2 v0.2s, v0.2s, v3.2s
268 ; CHECK-NEXT: fmul v7.2s, v6.2s, v5.2s
269 ; CHECK-NEXT: fmul v5.2s, v1.2s, v5.2s
270 ; CHECK-NEXT: zip1 v3.2s, v2.2s, v4.2s
271 ; CHECK-NEXT: zip2 v2.2s, v2.2s, v4.2s
272 ; CHECK-NEXT: fmov d4, d7
273 ; CHECK-NEXT: fmov d16, d5
274 ; CHECK-NEXT: fmls v7.2s, v0.2s, v2.2s
275 ; CHECK-NEXT: fmla v5.2s, v0.2s, v3.2s
276 ; CHECK-NEXT: fmls v4.2s, v0.2s, v1.2s
277 ; CHECK-NEXT: fmla v16.2s, v0.2s, v6.2s
278 ; CHECK-NEXT: fsub v0.2s, v7.2s, v16.2s
279 ; CHECK-NEXT: fadd v1.2s, v5.2s, v4.2s
280 ; CHECK-NEXT: zip1 v0.4s, v0.4s, v1.4s
283 %ar = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
284 %ai = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
285 %br = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
286 %bi = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
287 %cr = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2>
288 %ci = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3>
290 %i6 = fmul fast <2 x float> %br, %ar
291 %i7 = fmul fast <2 x float> %bi, %ai
292 %xr = fsub fast <2 x float> %i6, %i7
293 %i9 = fmul fast <2 x float> %bi, %ar
294 %i10 = fmul fast <2 x float> %br, %ai
295 %xi = fadd fast <2 x float> %i9, %i10
297 ;%j6 = fmul fast <2 x float> %cr, %ar
298 %j7 = fmul fast <2 x float> %ci, %ai
299 %yr = fsub fast <2 x float> %i6, %j7
300 ;%j9 = fmul fast <2 x float> %ci, %ar
301 %j10 = fmul fast <2 x float> %cr, %ai
302 %yi = fadd fast <2 x float> %i9, %j10
304 %zr = fsub fast <2 x float> %yr, %xi
305 %zi = fadd fast <2 x float> %yi, %xr
306 %interleaved.vec = shufflevector <2 x float> %zr, <2 x float> %zi, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
307 ret <4 x float> %interleaved.vec
310 ; Expected to not transform
311 define <4 x float> @mul_triangle_multiuses(<4 x float> %a, <4 x float> %b, ptr %p) {
312 ; CHECK-LABEL: mul_triangle_multiuses:
313 ; CHECK: // %bb.0: // %entry
314 ; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8
315 ; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8
316 ; CHECK-NEXT: zip2 v4.2s, v0.2s, v2.2s
317 ; CHECK-NEXT: zip1 v5.2s, v1.2s, v3.2s
318 ; CHECK-NEXT: zip1 v0.2s, v0.2s, v2.2s
319 ; CHECK-NEXT: zip2 v1.2s, v1.2s, v3.2s
320 ; CHECK-NEXT: fmul v2.2s, v4.2s, v5.2s
321 ; CHECK-NEXT: fmul v3.2s, v1.2s, v4.2s
322 ; CHECK-NEXT: fmla v2.2s, v0.2s, v1.2s
323 ; CHECK-NEXT: fneg v1.2s, v3.2s
324 ; CHECK-NEXT: fmul v3.2s, v2.2s, v4.2s
325 ; CHECK-NEXT: fmla v1.2s, v0.2s, v5.2s
326 ; CHECK-NEXT: fmul v5.2s, v2.2s, v0.2s
327 ; CHECK-NEXT: fneg v3.2s, v3.2s
328 ; CHECK-NEXT: fmla v5.2s, v4.2s, v1.2s
329 ; CHECK-NEXT: fmla v3.2s, v0.2s, v1.2s
330 ; CHECK-NEXT: mov v1.d[1], v2.d[0]
331 ; CHECK-NEXT: zip1 v0.4s, v3.4s, v5.4s
332 ; CHECK-NEXT: str q1, [x0]
335 %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
336 %strided.vec35 = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
337 %strided.vec37 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
338 %strided.vec38 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
339 %0 = fmul fast <2 x float> %strided.vec37, %strided.vec
340 %1 = fmul fast <2 x float> %strided.vec38, %strided.vec35
341 %2 = fsub fast <2 x float> %0, %1
342 %3 = fmul fast <2 x float> %2, %strided.vec35
343 %4 = fmul fast <2 x float> %strided.vec38, %strided.vec
344 %5 = fmul fast <2 x float> %strided.vec35, %strided.vec37
345 %6 = fadd fast <2 x float> %4, %5
346 %otheruse = shufflevector <2 x float> %2, <2 x float> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
347 store <4 x float> %otheruse, ptr %p
348 %7 = fmul fast <2 x float> %6, %strided.vec
349 %8 = fadd fast <2 x float> %3, %7
350 %9 = fmul fast <2 x float> %2, %strided.vec
351 %10 = fmul fast <2 x float> %6, %strided.vec35
352 %11 = fsub fast <2 x float> %9, %10
353 %interleaved.vec = shufflevector <2 x float> %11, <2 x float> %8, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
354 ret <4 x float> %interleaved.vec
357 ; Expected to transform
358 define <4 x float> @mul_addequal(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
359 ; CHECK-LABEL: mul_addequal:
360 ; CHECK: // %bb.0: // %entry
361 ; CHECK-NEXT: fcmla v2.4s, v1.4s, v0.4s, #0
362 ; CHECK-NEXT: fcmla v2.4s, v1.4s, v0.4s, #90
363 ; CHECK-NEXT: mov v0.16b, v2.16b
366 %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
367 %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
368 %b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
369 %b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
370 %0 = fmul fast <2 x float> %b.imag, %strided.vec
371 %1 = fmul fast <2 x float> %b.real, %a.imag
372 %2 = fadd fast <2 x float> %1, %0
373 %3 = fmul fast <2 x float> %b.real, %strided.vec
374 %4 = fmul fast <2 x float> %a.imag, %b.imag
375 %5 = fsub fast <2 x float> %3, %4
376 %c.real = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2>
377 %c.imag = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3>
378 %6 = fadd fast <2 x float> %5, %c.real
379 %7 = fadd fast <2 x float> %2, %c.imag
380 %interleaved.vec = shufflevector <2 x float> %6, <2 x float> %7, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
381 ret <4 x float> %interleaved.vec
384 ; Expected to transform
385 define <4 x float> @mul_subequal(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
386 ; CHECK-LABEL: mul_subequal:
387 ; CHECK: // %bb.0: // %entry
388 ; CHECK-NEXT: movi v3.2d, #0000000000000000
389 ; CHECK-NEXT: fcmla v3.4s, v1.4s, v0.4s, #0
390 ; CHECK-NEXT: fcmla v3.4s, v1.4s, v0.4s, #90
391 ; CHECK-NEXT: fsub v0.4s, v3.4s, v2.4s
394 %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
395 %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
396 %b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
397 %b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
398 %0 = fmul fast <2 x float> %b.imag, %strided.vec
399 %1 = fmul fast <2 x float> %b.real, %a.imag
400 %2 = fadd fast <2 x float> %1, %0
401 %3 = fmul fast <2 x float> %b.real, %strided.vec
402 %4 = fmul fast <2 x float> %a.imag, %b.imag
403 %5 = fsub fast <2 x float> %3, %4
404 %c.real = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2>
405 %c.imag = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3>
406 %6 = fsub fast <2 x float> %5, %c.real
407 %7 = fsub fast <2 x float> %2, %c.imag
408 %interleaved.vec = shufflevector <2 x float> %6, <2 x float> %7, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
409 ret <4 x float> %interleaved.vec
413 ; Expected to transform
414 define <4 x float> @mul_mulequal(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
415 ; CHECK-LABEL: mul_mulequal:
416 ; CHECK: // %bb.0: // %entry
417 ; CHECK-NEXT: movi v3.2d, #0000000000000000
418 ; CHECK-NEXT: fcmla v3.4s, v1.4s, v0.4s, #0
419 ; CHECK-NEXT: fcmla v3.4s, v1.4s, v0.4s, #90
420 ; CHECK-NEXT: fmul v0.4s, v3.4s, v2.4s
423 %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
424 %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
425 %b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
426 %b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
427 %0 = fmul fast <2 x float> %b.imag, %strided.vec
428 %1 = fmul fast <2 x float> %b.real, %a.imag
429 %2 = fadd fast <2 x float> %1, %0
430 %3 = fmul fast <2 x float> %b.real, %strided.vec
431 %4 = fmul fast <2 x float> %a.imag, %b.imag
432 %5 = fsub fast <2 x float> %3, %4
433 %c.real = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2>
434 %c.imag = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3>
435 %6 = fmul fast <2 x float> %5, %c.real
436 %7 = fmul fast <2 x float> %2, %c.imag
437 %interleaved.vec = shufflevector <2 x float> %6, <2 x float> %7, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
438 ret <4 x float> %interleaved.vec
441 ; Expected to not transform
442 define <4 x float> @mul_divequal(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
443 ; CHECK-LABEL: mul_divequal:
444 ; CHECK: // %bb.0: // %entry
445 ; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8
446 ; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8
447 ; CHECK-NEXT: zip2 v5.2s, v0.2s, v3.2s
448 ; CHECK-NEXT: zip2 v6.2s, v1.2s, v4.2s
449 ; CHECK-NEXT: zip1 v0.2s, v0.2s, v3.2s
450 ; CHECK-NEXT: zip1 v1.2s, v1.2s, v4.2s
451 ; CHECK-NEXT: ext v3.16b, v2.16b, v2.16b, #8
452 ; CHECK-NEXT: fmul v7.2s, v5.2s, v6.2s
453 ; CHECK-NEXT: fneg v4.2s, v7.2s
454 ; CHECK-NEXT: zip1 v7.2s, v2.2s, v3.2s
455 ; CHECK-NEXT: zip2 v2.2s, v2.2s, v3.2s
456 ; CHECK-NEXT: fmla v4.2s, v0.2s, v1.2s
457 ; CHECK-NEXT: fmul v0.2s, v6.2s, v0.2s
458 ; CHECK-NEXT: fmla v0.2s, v5.2s, v1.2s
459 ; CHECK-NEXT: fdiv v4.2s, v4.2s, v7.2s
460 ; CHECK-NEXT: fdiv v0.2s, v0.2s, v2.2s
461 ; CHECK-NEXT: zip1 v0.4s, v4.4s, v0.4s
464 %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
465 %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
466 %b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
467 %b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
468 %0 = fmul fast <2 x float> %b.imag, %strided.vec
469 %1 = fmul fast <2 x float> %b.real, %a.imag
470 %2 = fadd fast <2 x float> %1, %0
471 %3 = fmul fast <2 x float> %b.real, %strided.vec
472 %4 = fmul fast <2 x float> %a.imag, %b.imag
473 %5 = fsub fast <2 x float> %3, %4
474 %c.real = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2>
475 %c.imag = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3>
476 %6 = fdiv fast <2 x float> %5, %c.real
477 %7 = fdiv fast <2 x float> %2, %c.imag
478 %interleaved.vec = shufflevector <2 x float> %6, <2 x float> %7, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
479 ret <4 x float> %interleaved.vec
482 ; Expected to transform
483 define <4 x float> @mul_negequal(<4 x float> %a, <4 x float> %b) {
484 ; CHECK-LABEL: mul_negequal:
485 ; CHECK: // %bb.0: // %entry
486 ; CHECK-NEXT: movi v2.2d, #0000000000000000
487 ; CHECK-NEXT: fcmla v2.4s, v1.4s, v0.4s, #180
488 ; CHECK-NEXT: fcmla v2.4s, v1.4s, v0.4s, #270
489 ; CHECK-NEXT: mov v0.16b, v2.16b
492 %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
493 %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
494 %b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
495 %b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
496 %0 = fmul fast <2 x float> %b.imag, %strided.vec
497 %1 = fmul fast <2 x float> %b.real, %a.imag
498 %2 = fadd fast <2 x float> %1, %0
499 %3 = fmul fast <2 x float> %b.real, %strided.vec
500 %4 = fmul fast <2 x float> %a.imag, %b.imag
501 %5 = fsub fast <2 x float> %3, %4
502 %6 = fneg fast <2 x float> %5
503 %7 = fneg fast <2 x float> %2
504 %interleaved.vec = shufflevector <2 x float> %6, <2 x float> %7, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
505 ret <4 x float> %interleaved.vec