1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s --mattr=+sve -o - | FileCheck %s
4 target triple = "aarch64"
6 %"class.std::complex" = type { { double, double } }
8 ; Zero initialized reduction. The IR is generated with predicated tail folding (-prefer-predicate-over-epilogue=predicate-dont-vectorize)
10 ; complex<double> x = 0.0 + 0.0i;
11 ; for (int i = 0; i < 100; ++i)
14 define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
15 ; CHECK-LABEL: complex_mul_v2f64:
16 ; CHECK: // %bb.0: // %entry
17 ; CHECK-NEXT: mov z1.d, #0 // =0x0
18 ; CHECK-NEXT: mov w8, #100 // =0x64
20 ; CHECK-NEXT: whilelo p1.d, xzr, x8
21 ; CHECK-NEXT: rdvl x10, #2
22 ; CHECK-NEXT: mov x11, x9
23 ; CHECK-NEXT: ptrue p0.d
24 ; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
25 ; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
26 ; CHECK-NEXT: .LBB0_1: // %vector.body
27 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
28 ; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
29 ; CHECK-NEXT: mov z6.d, z1.d
30 ; CHECK-NEXT: mov z7.d, z0.d
31 ; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
32 ; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]
33 ; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl]
34 ; CHECK-NEXT: ld1d { z3.d }, p1/z, [x0]
35 ; CHECK-NEXT: ld1d { z5.d }, p1/z, [x1]
36 ; CHECK-NEXT: add x1, x1, x10
37 ; CHECK-NEXT: add x0, x0, x10
38 ; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
39 ; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
40 ; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
41 ; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
42 ; CHECK-NEXT: mov z0.d, p2/m, z7.d
43 ; CHECK-NEXT: mov z1.d, p1/m, z6.d
44 ; CHECK-NEXT: whilelo p1.d, x11, x8
45 ; CHECK-NEXT: add x11, x11, x9
46 ; CHECK-NEXT: b.mi .LBB0_1
47 ; CHECK-NEXT: // %bb.2: // %exit.block
48 ; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
49 ; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d
50 ; CHECK-NEXT: faddv d0, p0, z2.d
51 ; CHECK-NEXT: faddv d1, p0, z1.d
52 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
53 ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z1
56 %active.lane.mask.entry = tail call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 100)
57 %0 = tail call i64 @llvm.vscale.i64()
59 %2 = shl nuw nsw i64 %0, 5
62 vector.body: ; preds = %vector.body, %entry
63 %lsr.iv35 = phi i64 [ %lsr.iv.next36, %vector.body ], [ %1, %entry ]
64 %lsr.iv = phi i64 [ %lsr.iv.next, %vector.body ], [ 0, %entry ]
65 %active.lane.mask = phi <vscale x 2 x i1> [ %active.lane.mask.entry, %entry ], [ %active.lane.mask.next, %vector.body ]
66 %vec.phi = phi <vscale x 2 x double> [ zeroinitializer, %entry ], [ %15, %vector.body ]
67 %vec.phi27 = phi <vscale x 2 x double> [ zeroinitializer, %entry ], [ %16, %vector.body ]
68 %scevgep = getelementptr i8, ptr %a, i64 %lsr.iv
69 %scevgep34 = getelementptr i8, ptr %b, i64 %lsr.iv
70 %interleaved.mask = tail call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %active.lane.mask, <vscale x 2 x i1> %active.lane.mask)
71 %wide.masked.vec = tail call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %scevgep, i32 8, <vscale x 4 x i1> %interleaved.mask, <vscale x 4 x double> poison)
72 %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %wide.masked.vec)
73 %3 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0
74 %4 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1
75 %interleaved.mask28 = tail call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %active.lane.mask, <vscale x 2 x i1> %active.lane.mask)
76 %wide.masked.vec29 = tail call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %scevgep34, i32 8, <vscale x 4 x i1> %interleaved.mask28, <vscale x 4 x double> poison)
77 %strided.vec30 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %wide.masked.vec29)
78 %5 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec30, 0
79 %6 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec30, 1
80 %7 = fmul fast <vscale x 2 x double> %6, %3
81 %8 = fmul fast <vscale x 2 x double> %5, %4
82 %9 = fmul fast <vscale x 2 x double> %5, %3
83 %10 = fadd fast <vscale x 2 x double> %9, %vec.phi27
84 %11 = fmul fast <vscale x 2 x double> %6, %4
85 %12 = fsub fast <vscale x 2 x double> %10, %11
86 %13 = fadd fast <vscale x 2 x double> %8, %vec.phi
87 %14 = fadd fast <vscale x 2 x double> %13, %7
88 %15 = select fast <vscale x 2 x i1> %active.lane.mask, <vscale x 2 x double> %14, <vscale x 2 x double> %vec.phi
89 %16 = select fast <vscale x 2 x i1> %active.lane.mask, <vscale x 2 x double> %12, <vscale x 2 x double> %vec.phi27
90 %active.lane.mask.next = tail call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 %lsr.iv35, i64 100)
91 %17 = extractelement <vscale x 2 x i1> %active.lane.mask.next, i64 0
92 %lsr.iv.next = add i64 %lsr.iv, %2
93 %lsr.iv.next36 = add i64 %lsr.iv35, %1
94 br i1 %17, label %vector.body, label %exit.block
96 exit.block: ; preds = %vector.body
97 %18 = tail call fast double @llvm.vector.reduce.fadd.nxv2f64(double -0.000000e+00, <vscale x 2 x double> %16)
98 %19 = tail call fast double @llvm.vector.reduce.fadd.nxv2f64(double -0.000000e+00, <vscale x 2 x double> %15)
99 %.fca.0.0.insert = insertvalue %"class.std::complex" poison, double %18, 0, 0
100 %.fca.0.1.insert = insertvalue %"class.std::complex" %.fca.0.0.insert, double %19, 0, 1
101 ret %"class.std::complex" %.fca.0.1.insert
104 ; Zero initialized reduction with conditional block. The IR is generated with scalar tail folding (-prefer-predicate-over-epilogue=scalar-epilogue)
106 ; complex<double> x = 0.0 + 0.0i;
107 ; for (int i = 0; i < 100; ++i)
111 define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %cond) {
112 ; CHECK-LABEL: complex_mul_predicated_v2f64:
113 ; CHECK: // %bb.0: // %entry
114 ; CHECK-NEXT: mov z1.d, #0 // =0x0
115 ; CHECK-NEXT: cntd x9
116 ; CHECK-NEXT: mov w11, #100 // =0x64
117 ; CHECK-NEXT: neg x10, x9
118 ; CHECK-NEXT: ptrue p0.d
119 ; CHECK-NEXT: mov x8, xzr
120 ; CHECK-NEXT: and x10, x10, x11
121 ; CHECK-NEXT: rdvl x11, #2
122 ; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
123 ; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
124 ; CHECK-NEXT: .LBB1_1: // %vector.body
125 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
126 ; CHECK-NEXT: ld1w { z2.d }, p0/z, [x2, x8, lsl #2]
127 ; CHECK-NEXT: mov z6.d, z1.d
128 ; CHECK-NEXT: mov z7.d, z0.d
129 ; CHECK-NEXT: add x8, x8, x9
130 ; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0
131 ; CHECK-NEXT: cmp x10, x8
132 ; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
133 ; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
134 ; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]
135 ; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl]
136 ; CHECK-NEXT: ld1d { z3.d }, p1/z, [x0]
137 ; CHECK-NEXT: ld1d { z5.d }, p1/z, [x1]
138 ; CHECK-NEXT: add x1, x1, x11
139 ; CHECK-NEXT: add x0, x0, x11
140 ; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
141 ; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
142 ; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
143 ; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
144 ; CHECK-NEXT: mov z0.d, p2/m, z7.d
145 ; CHECK-NEXT: mov z1.d, p1/m, z6.d
146 ; CHECK-NEXT: b.ne .LBB1_1
147 ; CHECK-NEXT: // %bb.2: // %exit.block
148 ; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
149 ; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d
150 ; CHECK-NEXT: faddv d0, p0, z2.d
151 ; CHECK-NEXT: faddv d1, p0, z1.d
152 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
153 ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z1
156 %0 = tail call i64 @llvm.vscale.i64()
157 %1 = shl nuw nsw i64 %0, 1
158 %n.mod.vf = urem i64 100, %1
159 %n.vec = sub i64 100, %n.mod.vf
160 %2 = shl nuw nsw i64 %0, 5
161 br label %vector.body
163 vector.body: ; preds = %vector.body, %entry
164 %lsr.iv48 = phi i64 [ %lsr.iv.next, %vector.body ], [ 0, %entry ]
165 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
166 %vec.phi = phi <vscale x 2 x double> [ zeroinitializer, %entry ], [ %predphi34, %vector.body ]
167 %vec.phi30 = phi <vscale x 2 x double> [ zeroinitializer, %entry ], [ %predphi, %vector.body ]
168 %3 = shl i64 %index, 2
169 %scevgep47 = getelementptr i8, ptr %cond, i64 %3
170 %wide.load = load <vscale x 2 x i32>, ptr %scevgep47, align 4
171 %4 = icmp ne <vscale x 2 x i32> %wide.load, zeroinitializer
172 %scevgep49 = getelementptr i8, ptr %a, i64 %lsr.iv48
173 %scevgep50 = getelementptr i8, ptr %b, i64 %lsr.iv48
174 %interleaved.mask = tail call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %4, <vscale x 2 x i1> %4)
175 %wide.masked.vec = tail call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %scevgep49, i32 8, <vscale x 4 x i1> %interleaved.mask, <vscale x 4 x double> poison)
176 %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %wide.masked.vec)
177 %5 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0
178 %6 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1
179 %wide.masked.vec32 = tail call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %scevgep50, i32 8, <vscale x 4 x i1> %interleaved.mask, <vscale x 4 x double> poison)
180 %strided.vec33 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %wide.masked.vec32)
181 %7 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec33, 0
182 %8 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec33, 1
183 %9 = fmul fast <vscale x 2 x double> %8, %5
184 %10 = fmul fast <vscale x 2 x double> %7, %6
185 %11 = fmul fast <vscale x 2 x double> %7, %5
186 %12 = fadd fast <vscale x 2 x double> %11, %vec.phi30
187 %13 = fmul fast <vscale x 2 x double> %8, %6
188 %14 = fsub fast <vscale x 2 x double> %12, %13
189 %15 = fadd fast <vscale x 2 x double> %10, %vec.phi
190 %16 = fadd fast <vscale x 2 x double> %15, %9
191 %predphi = select <vscale x 2 x i1> %4, <vscale x 2 x double> %14, <vscale x 2 x double> %vec.phi30
192 %predphi34 = select <vscale x 2 x i1> %4, <vscale x 2 x double> %16, <vscale x 2 x double> %vec.phi
193 %index.next = add nuw i64 %index, %1
194 %lsr.iv.next = add i64 %lsr.iv48, %2
195 %17 = icmp eq i64 %n.vec, %index.next
196 br i1 %17, label %exit.block, label %vector.body
198 exit.block: ; preds = %vector.body
199 %18 = tail call fast double @llvm.vector.reduce.fadd.nxv2f64(double -0.000000e+00, <vscale x 2 x double> %predphi)
200 %19 = tail call fast double @llvm.vector.reduce.fadd.nxv2f64(double -0.000000e+00, <vscale x 2 x double> %predphi34)
201 %.fca.0.0.insert = insertvalue %"class.std::complex" poison, double %18, 0, 0
202 %.fca.0.1.insert = insertvalue %"class.std::complex" %.fca.0.0.insert, double %19, 0, 1
203 ret %"class.std::complex" %.fca.0.1.insert
206 ; Zero initialized reduction with conditional block. The IR is generated with scalar tail folding (-predicate-over-epilogue=predicate-dont-vectorize)
208 ; complex<double> x = 0.0 + 0.0i;
209 ; for (int i = 0; i < 100; ++i)
213 define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, ptr %cond) {
214 ; CHECK-LABEL: complex_mul_predicated_x2_v2f64:
215 ; CHECK: // %bb.0: // %entry
216 ; CHECK-NEXT: mov z1.d, #0 // =0x0
217 ; CHECK-NEXT: mov w8, #100 // =0x64
218 ; CHECK-NEXT: cntd x9
219 ; CHECK-NEXT: whilelo p1.d, xzr, x8
220 ; CHECK-NEXT: rdvl x10, #2
221 ; CHECK-NEXT: cnth x11
222 ; CHECK-NEXT: ptrue p0.d
223 ; CHECK-NEXT: mov x12, x9
224 ; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
225 ; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
226 ; CHECK-NEXT: .LBB2_1: // %vector.body
227 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
228 ; CHECK-NEXT: ld1w { z2.d }, p1/z, [x2]
229 ; CHECK-NEXT: mov z6.d, z1.d
230 ; CHECK-NEXT: mov z7.d, z0.d
231 ; CHECK-NEXT: add x2, x2, x11
232 ; CHECK-NEXT: and z2.d, z2.d, #0xffffffff
233 ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
234 ; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
235 ; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
236 ; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]
237 ; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl]
238 ; CHECK-NEXT: ld1d { z3.d }, p1/z, [x0]
239 ; CHECK-NEXT: ld1d { z5.d }, p1/z, [x1]
240 ; CHECK-NEXT: add x1, x1, x10
241 ; CHECK-NEXT: add x0, x0, x10
242 ; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
243 ; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
244 ; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
245 ; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
246 ; CHECK-NEXT: mov z0.d, p2/m, z7.d
247 ; CHECK-NEXT: mov z1.d, p1/m, z6.d
248 ; CHECK-NEXT: whilelo p1.d, x12, x8
249 ; CHECK-NEXT: add x12, x12, x9
250 ; CHECK-NEXT: b.mi .LBB2_1
251 ; CHECK-NEXT: // %bb.2: // %exit.block
252 ; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
253 ; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d
254 ; CHECK-NEXT: faddv d0, p0, z2.d
255 ; CHECK-NEXT: faddv d1, p0, z1.d
256 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
257 ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z1
260 %active.lane.mask.entry = tail call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 100)
261 %0 = tail call i64 @llvm.vscale.i64()
263 %2 = shl nuw nsw i64 %0, 5
264 br label %vector.body
266 vector.body: ; preds = %vector.body, %entry
267 %lsr.iv = phi i64 [ %lsr.iv.next, %vector.body ], [ 0, %entry ]
268 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
269 %active.lane.mask = phi <vscale x 2 x i1> [ %active.lane.mask.entry, %entry ], [ %active.lane.mask.next, %vector.body ]
270 %vec.phi = phi <vscale x 2 x double> [ zeroinitializer, %entry ], [ %19, %vector.body ]
271 %vec.phi30 = phi <vscale x 2 x double> [ zeroinitializer, %entry ], [ %21, %vector.body ]
272 %3 = shl i64 %index, 2
273 %scevgep = getelementptr i8, ptr %cond, i64 %3
274 %wide.masked.load = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr %scevgep, i32 4, <vscale x 2 x i1> %active.lane.mask, <vscale x 2 x i32> poison)
275 %4 = icmp ne <vscale x 2 x i32> %wide.masked.load, zeroinitializer
276 %scevgep38 = getelementptr i8, ptr %a, i64 %lsr.iv
277 %scevgep39 = getelementptr i8, ptr %b, i64 %lsr.iv
278 %5 = select <vscale x 2 x i1> %active.lane.mask, <vscale x 2 x i1> %4, <vscale x 2 x i1> zeroinitializer
279 %interleaved.mask = tail call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %5, <vscale x 2 x i1> %5)
280 %wide.masked.vec = tail call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %scevgep38, i32 8, <vscale x 4 x i1> %interleaved.mask, <vscale x 4 x double> poison)
281 %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %wide.masked.vec)
282 %6 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0
283 %7 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1
284 %interleaved.mask31 = tail call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %5, <vscale x 2 x i1> %5)
285 %wide.masked.vec32 = tail call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %scevgep39, i32 8, <vscale x 4 x i1> %interleaved.mask31, <vscale x 4 x double> poison)
286 %strided.vec33 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %wide.masked.vec32)
287 %8 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec33, 0
288 %9 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec33, 1
289 %10 = fmul fast <vscale x 2 x double> %9, %6
290 %11 = fmul fast <vscale x 2 x double> %8, %7
291 %12 = fmul fast <vscale x 2 x double> %8, %6
292 %13 = fadd fast <vscale x 2 x double> %12, %vec.phi30
293 %14 = fmul fast <vscale x 2 x double> %9, %7
294 %15 = fsub fast <vscale x 2 x double> %13, %14
295 %16 = fadd fast <vscale x 2 x double> %11, %vec.phi
296 %17 = fadd fast <vscale x 2 x double> %16, %10
297 %18 = select <vscale x 2 x i1> %active.lane.mask, <vscale x 2 x i1> %4, <vscale x 2 x i1> zeroinitializer
298 %19 = select fast <vscale x 2 x i1> %18, <vscale x 2 x double> %17, <vscale x 2 x double> %vec.phi
299 %20 = select <vscale x 2 x i1> %active.lane.mask, <vscale x 2 x i1> %4, <vscale x 2 x i1> zeroinitializer
300 %21 = select fast <vscale x 2 x i1> %20, <vscale x 2 x double> %15, <vscale x 2 x double> %vec.phi30
301 %index.next = add i64 %index, %1
302 %22 = add i64 %1, %index
303 %active.lane.mask.next = tail call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 %22, i64 100)
304 %23 = extractelement <vscale x 2 x i1> %active.lane.mask.next, i64 0
305 %lsr.iv.next = add i64 %lsr.iv, %2
306 br i1 %23, label %vector.body, label %exit.block
308 exit.block: ; preds = %vector.body
309 %24 = tail call fast double @llvm.vector.reduce.fadd.nxv2f64(double -0.000000e+00, <vscale x 2 x double> %21)
310 %25 = tail call fast double @llvm.vector.reduce.fadd.nxv2f64(double -0.000000e+00, <vscale x 2 x double> %19)
311 %.fca.0.0.insert = insertvalue %"class.std::complex" poison, double %24, 0, 0
312 %.fca.0.1.insert = insertvalue %"class.std::complex" %.fca.0.0.insert, double %25, 0, 1
313 ret %"class.std::complex" %.fca.0.1.insert
316 declare i64 @llvm.vscale.i64()
317 declare <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64, i64)
318 declare <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr nocapture, i32 immarg, <vscale x 2 x i1>, <vscale x 2 x i32>)
319 declare <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr nocapture, i32 immarg, <vscale x 4 x i1>, <vscale x 4 x double>)
320 declare <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
321 declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double>)
322 declare double @llvm.vector.reduce.fadd.nxv2f64(double, <vscale x 2 x double>)