1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s --mattr=+sve -o - | FileCheck %s
4 target triple = "aarch64-unknown-linux-gnu"
7 define <vscale x 4 x double> @mull_add(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c) {
8 ; CHECK-LABEL: mull_add:
9 ; CHECK: // %bb.0: // %entry
10 ; CHECK-NEXT: uzp2 z6.d, z0.d, z1.d
11 ; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d
12 ; CHECK-NEXT: uzp2 z1.d, z2.d, z3.d
13 ; CHECK-NEXT: uzp1 z2.d, z2.d, z3.d
14 ; CHECK-NEXT: ptrue p0.d
15 ; CHECK-NEXT: fmul z7.d, z0.d, z1.d
16 ; CHECK-NEXT: fmul z1.d, z6.d, z1.d
17 ; CHECK-NEXT: movprfx z3, z7
18 ; CHECK-NEXT: fmla z3.d, p0/m, z6.d, z2.d
19 ; CHECK-NEXT: fnmsb z0.d, p0/m, z2.d, z1.d
20 ; CHECK-NEXT: uzp2 z1.d, z4.d, z5.d
21 ; CHECK-NEXT: uzp1 z2.d, z4.d, z5.d
22 ; CHECK-NEXT: fadd z2.d, z2.d, z0.d
23 ; CHECK-NEXT: fadd z1.d, z3.d, z1.d
24 ; CHECK-NEXT: zip1 z0.d, z2.d, z1.d
25 ; CHECK-NEXT: zip2 z1.d, z2.d, z1.d
28 %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
29 %0 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0
30 %1 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1
31 %strided.vec29 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %b)
32 %2 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec29, 0
33 %3 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec29, 1
34 %4 = fmul contract <vscale x 2 x double> %0, %3
35 %5 = fmul contract <vscale x 2 x double> %1, %2
36 %6 = fadd contract <vscale x 2 x double> %5, %4
37 %7 = fmul contract <vscale x 2 x double> %0, %2
38 %8 = fmul contract <vscale x 2 x double> %1, %3
39 %9 = fsub contract <vscale x 2 x double> %7, %8
40 %strided.vec31 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %c)
41 %10 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec31, 0
42 %11 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec31, 1
43 %12 = fadd contract <vscale x 2 x double> %10, %9
44 %13 = fadd contract <vscale x 2 x double> %6, %11
45 %interleaved.vec = tail call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> %12, <vscale x 2 x double> %13)
46 ret <vscale x 4 x double> %interleaved.vec
50 define <vscale x 4 x double> @mul_add_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) {
51 ; CHECK-LABEL: mul_add_mull:
52 ; CHECK: // %bb.0: // %entry
53 ; CHECK-NEXT: mov z24.d, #0 // =0x0
54 ; CHECK-NEXT: ptrue p0.d
55 ; CHECK-NEXT: mov z25.d, z24.d
56 ; CHECK-NEXT: mov z26.d, z24.d
57 ; CHECK-NEXT: mov z27.d, z24.d
58 ; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0
59 ; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #0
60 ; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #0
61 ; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #0
62 ; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90
63 ; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #90
64 ; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #90
65 ; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #90
66 ; CHECK-NEXT: fadd z1.d, z26.d, z24.d
67 ; CHECK-NEXT: fadd z0.d, z25.d, z27.d
70 %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
71 %0 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0
72 %1 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1
73 %strided.vec52 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %b)
74 %2 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec52, 0
75 %3 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec52, 1
76 %4 = fmul contract <vscale x 2 x double> %0, %3
77 %5 = fmul contract <vscale x 2 x double> %1, %2
78 %6 = fadd contract <vscale x 2 x double> %5, %4
79 %7 = fmul contract <vscale x 2 x double> %0, %2
80 %8 = fmul contract <vscale x 2 x double> %1, %3
81 %9 = fsub contract <vscale x 2 x double> %7, %8
82 %strided.vec54 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %c)
83 %10 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec54, 0
84 %11 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec54, 1
85 %strided.vec56 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %d)
86 %12 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec56, 0
87 %13 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec56, 1
88 %14 = fmul contract <vscale x 2 x double> %10, %13
89 %15 = fmul contract <vscale x 2 x double> %11, %12
90 %16 = fadd contract <vscale x 2 x double> %15, %14
91 %17 = fmul contract <vscale x 2 x double> %10, %12
92 %18 = fmul contract <vscale x 2 x double> %11, %13
93 %19 = fsub contract <vscale x 2 x double> %17, %18
94 %20 = fadd contract <vscale x 2 x double> %9, %19
95 %21 = fadd contract <vscale x 2 x double> %6, %16
96 %interleaved.vec = tail call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> %20, <vscale x 2 x double> %21)
97 ret <vscale x 4 x double> %interleaved.vec
101 define <vscale x 4 x double> @mul_sub_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) {
102 ; CHECK-LABEL: mul_sub_mull:
103 ; CHECK: // %bb.0: // %entry
104 ; CHECK-NEXT: mov z24.d, #0 // =0x0
105 ; CHECK-NEXT: ptrue p0.d
106 ; CHECK-NEXT: mov z25.d, z24.d
107 ; CHECK-NEXT: mov z26.d, z24.d
108 ; CHECK-NEXT: mov z27.d, z24.d
109 ; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0
110 ; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #0
111 ; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #0
112 ; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #0
113 ; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90
114 ; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #90
115 ; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #90
116 ; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #90
117 ; CHECK-NEXT: fsub z1.d, z26.d, z24.d
118 ; CHECK-NEXT: fsub z0.d, z25.d, z27.d
121 %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
122 %0 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0
123 %1 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1
124 %strided.vec52 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %b)
125 %2 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec52, 0
126 %3 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec52, 1
127 %4 = fmul contract <vscale x 2 x double> %0, %3
128 %5 = fmul contract <vscale x 2 x double> %1, %2
129 %6 = fadd contract <vscale x 2 x double> %5, %4
130 %7 = fmul contract <vscale x 2 x double> %0, %2
131 %8 = fmul contract <vscale x 2 x double> %1, %3
132 %9 = fsub contract <vscale x 2 x double> %7, %8
133 %strided.vec54 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %c)
134 %10 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec54, 0
135 %11 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec54, 1
136 %strided.vec56 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %d)
137 %12 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec56, 0
138 %13 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec56, 1
139 %14 = fmul contract <vscale x 2 x double> %10, %13
140 %15 = fmul contract <vscale x 2 x double> %11, %12
141 %16 = fadd contract <vscale x 2 x double> %15, %14
142 %17 = fmul contract <vscale x 2 x double> %10, %12
143 %18 = fmul contract <vscale x 2 x double> %11, %13
144 %19 = fsub contract <vscale x 2 x double> %17, %18
145 %20 = fsub contract <vscale x 2 x double> %9, %19
146 %21 = fsub contract <vscale x 2 x double> %6, %16
147 %interleaved.vec = tail call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> %20, <vscale x 2 x double> %21)
148 ret <vscale x 4 x double> %interleaved.vec
151 ; a * b + conj(c) * d
152 define <vscale x 4 x double> @mul_conj_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) {
153 ; CHECK-LABEL: mul_conj_mull:
154 ; CHECK: // %bb.0: // %entry
155 ; CHECK-NEXT: mov z24.d, #0 // =0x0
156 ; CHECK-NEXT: ptrue p0.d
157 ; CHECK-NEXT: mov z25.d, z24.d
158 ; CHECK-NEXT: mov z26.d, z24.d
159 ; CHECK-NEXT: mov z27.d, z24.d
160 ; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #0
161 ; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #0
162 ; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #0
163 ; CHECK-NEXT: fcmla z27.d, p0/m, z4.d, z6.d, #0
164 ; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #270
165 ; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #90
166 ; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #90
167 ; CHECK-NEXT: fcmla z27.d, p0/m, z4.d, z6.d, #270
168 ; CHECK-NEXT: fadd z1.d, z26.d, z24.d
169 ; CHECK-NEXT: fadd z0.d, z25.d, z27.d
172 %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
173 %0 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0
174 %1 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1
175 %strided.vec60 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %b)
176 %2 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec60, 0
177 %3 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec60, 1
178 %4 = fmul contract <vscale x 2 x double> %0, %3
179 %5 = fmul contract <vscale x 2 x double> %1, %2
180 %6 = fadd contract <vscale x 2 x double> %5, %4
181 %7 = fmul contract <vscale x 2 x double> %0, %2
182 %8 = fmul contract <vscale x 2 x double> %1, %3
183 %9 = fsub contract <vscale x 2 x double> %7, %8
184 %strided.vec62 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %c)
185 %10 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec62, 0
186 %11 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec62, 1
187 %strided.vec64 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %d)
188 %12 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec64, 0
189 %13 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec64, 1
190 %14 = fmul contract <vscale x 2 x double> %10, %13
191 %15 = fmul contract <vscale x 2 x double> %11, %12
192 %16 = fsub contract <vscale x 2 x double> %14, %15
193 %17 = fmul contract <vscale x 2 x double> %10, %12
194 %18 = fmul contract <vscale x 2 x double> %11, %13
195 %19 = fadd contract <vscale x 2 x double> %17, %18
196 %20 = fadd contract <vscale x 2 x double> %9, %19
197 %21 = fadd contract <vscale x 2 x double> %6, %16
198 %interleaved.vec = tail call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> %20, <vscale x 2 x double> %21)
199 ret <vscale x 4 x double> %interleaved.vec
203 define <vscale x 4 x double> @mul_add_rot_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) {
204 ; CHECK-LABEL: mul_add_rot_mull:
205 ; CHECK: // %bb.0: // %entry
206 ; CHECK-NEXT: uzp2 z24.d, z4.d, z5.d
207 ; CHECK-NEXT: mov z25.d, #0 // =0x0
208 ; CHECK-NEXT: uzp1 z4.d, z4.d, z5.d
209 ; CHECK-NEXT: ptrue p0.d
210 ; CHECK-NEXT: mov z26.d, z24.d
211 ; CHECK-NEXT: and z25.d, z25.d, #0x7fffffffffffffff
212 ; CHECK-NEXT: and z26.d, z26.d, #0x8000000000000000
213 ; CHECK-NEXT: orr z5.d, z25.d, z26.d
214 ; CHECK-NEXT: fadd z5.d, z4.d, z5.d
215 ; CHECK-NEXT: and z4.d, z4.d, #0x8000000000000000
216 ; CHECK-NEXT: orr z4.d, z25.d, z4.d
217 ; CHECK-NEXT: uzp2 z25.d, z0.d, z1.d
218 ; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d
219 ; CHECK-NEXT: uzp2 z1.d, z2.d, z3.d
220 ; CHECK-NEXT: uzp1 z2.d, z2.d, z3.d
221 ; CHECK-NEXT: fsub z4.d, z4.d, z24.d
222 ; CHECK-NEXT: uzp2 z24.d, z6.d, z7.d
223 ; CHECK-NEXT: uzp1 z6.d, z6.d, z7.d
224 ; CHECK-NEXT: fmul z26.d, z0.d, z1.d
225 ; CHECK-NEXT: fmul z1.d, z25.d, z1.d
226 ; CHECK-NEXT: fmul z3.d, z4.d, z24.d
227 ; CHECK-NEXT: fmul z24.d, z5.d, z24.d
228 ; CHECK-NEXT: movprfx z7, z26
229 ; CHECK-NEXT: fmla z7.d, p0/m, z25.d, z2.d
230 ; CHECK-NEXT: fnmsb z0.d, p0/m, z2.d, z1.d
231 ; CHECK-NEXT: movprfx z1, z3
232 ; CHECK-NEXT: fmla z1.d, p0/m, z6.d, z5.d
233 ; CHECK-NEXT: movprfx z2, z24
234 ; CHECK-NEXT: fnmls z2.d, p0/m, z4.d, z6.d
235 ; CHECK-NEXT: fadd z2.d, z0.d, z2.d
236 ; CHECK-NEXT: fadd z1.d, z7.d, z1.d
237 ; CHECK-NEXT: zip1 z0.d, z2.d, z1.d
238 ; CHECK-NEXT: zip2 z1.d, z2.d, z1.d
241 %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
242 %0 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0
243 %1 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1
244 %strided.vec78 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %b)
245 %2 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec78, 0
246 %3 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec78, 1
247 %4 = fmul contract <vscale x 2 x double> %0, %3
248 %5 = fmul contract <vscale x 2 x double> %1, %2
249 %6 = fadd contract <vscale x 2 x double> %5, %4
250 %7 = fmul contract <vscale x 2 x double> %0, %2
251 %8 = fmul contract <vscale x 2 x double> %1, %3
252 %9 = fsub contract <vscale x 2 x double> %7, %8
253 %strided.vec80 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %c)
254 %10 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec80, 0
255 %11 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec80, 1
256 %12 = tail call contract <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> %11)
257 %13 = fadd contract <vscale x 2 x double> %10, %12
258 %14 = tail call contract <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> %10)
259 %15 = fsub contract <vscale x 2 x double> %14, %11
260 %strided.vec82 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %d)
261 %16 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec82, 0
262 %17 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec82, 1
263 %18 = fmul contract <vscale x 2 x double> %15, %17
264 %19 = fmul contract <vscale x 2 x double> %16, %13
265 %20 = fadd contract <vscale x 2 x double> %19, %18
266 %21 = fmul contract <vscale x 2 x double> %15, %16
267 %22 = fmul contract <vscale x 2 x double> %13, %17
268 %23 = fsub contract <vscale x 2 x double> %21, %22
269 %24 = fadd contract <vscale x 2 x double> %9, %23
270 %25 = fadd contract <vscale x 2 x double> %6, %20
271 %interleaved.vec = tail call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> %24, <vscale x 2 x double> %25)
272 ret <vscale x 4 x double> %interleaved.vec
275 declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double>)
276 declare <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double>, <vscale x 2 x double>)
277 declare <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)