1 # RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -debug-only=pipeliner -pipeliner-max-stages=50 -pipeliner-max-mii=50 -pipeliner-enable-copytophi=0 -pipeliner-ii-search-range=30 2>&1 | FileCheck %s
4 # Test that each instruction must be scheduled between the early cycle and the late cycle. Previously there were cases where an instruction is scheduled outside of the valid range. See issue #93936 for details.
6 # CHECK: {{^ *}}Try to schedule with 47
7 # CHECK: {{^ *}}Inst (11) %48:fpr128 = LDRQui %35:gpr64sp, 0 :: (load (s128) from %ir.lsr.iv63, align 4, !tbaa !0)
9 # CHECK-NEXT: {{^ *}}es: ffffffe9 ls: ffffffe9
10 # CHECK-NEXT: {{^ *}}Trying to insert node between -23 and -23 II: 47
11 # CHECK-NEXT: {{^ *}}failed to insert at cycle -23 %48:fpr128 = LDRQui %35:gpr64sp, 0 :: (load (s128) from %ir.lsr.iv63, align 4, !tbaa !0)
14 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
16 define dso_local void @f(ptr nocapture noundef writeonly %a, ptr nocapture noundef readonly %b, ptr nocapture noundef readonly %c, ptr nocapture noundef readonly %d, ptr nocapture noundef readonly %e, float noundef %f, i32 noundef %N) local_unnamed_addr {
18 %cmp16 = icmp sgt i32 %N, 0
19 br i1 %cmp16, label %for.body.preheader, label %for.cond.cleanup
21 for.body.preheader: ; preds = %entry
22 %wide.trip.count = zext nneg i32 %N to i64
23 %min.iters.check = icmp ult i32 %N, 8
24 br i1 %min.iters.check, label %for.body.preheader37, label %vector.memcheck
26 vector.memcheck: ; preds = %for.body.preheader
27 %0 = ptrtoint ptr %a to i64
28 %1 = ptrtoint ptr %b to i64
29 %2 = ptrtoint ptr %c to i64
30 %3 = ptrtoint ptr %d to i64
31 %4 = ptrtoint ptr %e to i64
33 %diff.check = icmp ult i64 %5, 32
35 %diff.check22 = icmp ult i64 %6, 32
36 %conflict.rdx = or i1 %diff.check, %diff.check22
38 %diff.check24 = icmp ult i64 %7, 32
39 %conflict.rdx25 = or i1 %conflict.rdx, %diff.check24
41 %diff.check27 = icmp ult i64 %8, 32
42 %conflict.rdx28 = or i1 %conflict.rdx25, %diff.check27
43 br i1 %conflict.rdx28, label %for.body.preheader37, label %vector.ph
45 vector.ph: ; preds = %vector.memcheck
46 %n.vec = and i64 %wide.trip.count, 2147483640
47 %broadcast.splatinsert = insertelement <4 x float> poison, float %f, i64 0
48 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
49 %scevgep54 = getelementptr i8, ptr %b, i64 16
50 %scevgep58 = getelementptr i8, ptr %a, i64 16
51 %scevgep62 = getelementptr i8, ptr %c, i64 16
52 %scevgep66 = getelementptr i8, ptr %e, i64 16
53 %scevgep70 = getelementptr i8, ptr %d, i64 16
56 vector.body: ; preds = %vector.body, %vector.ph
57 %lsr.iv71 = phi ptr [ %scevgep72, %vector.body ], [ %scevgep70, %vector.ph ]
58 %lsr.iv67 = phi ptr [ %scevgep68, %vector.body ], [ %scevgep66, %vector.ph ]
59 %lsr.iv63 = phi ptr [ %scevgep64, %vector.body ], [ %scevgep62, %vector.ph ]
60 %lsr.iv59 = phi ptr [ %scevgep60, %vector.body ], [ %scevgep58, %vector.ph ]
61 %lsr.iv55 = phi ptr [ %scevgep56, %vector.body ], [ %scevgep54, %vector.ph ]
62 %lsr.iv52 = phi i64 [ %lsr.iv.next53, %vector.body ], [ %n.vec, %vector.ph ]
63 %scevgep57 = getelementptr i8, ptr %lsr.iv55, i64 -16
64 %wide.load = load <4 x float>, ptr %scevgep57, align 4, !tbaa !6
65 %wide.load29 = load <4 x float>, ptr %lsr.iv55, align 4, !tbaa !6
66 %9 = fmul <4 x float> %wide.load, %broadcast.splat
67 %10 = fmul <4 x float> %wide.load29, %broadcast.splat
68 %scevgep65 = getelementptr i8, ptr %lsr.iv63, i64 -16
69 %wide.load30 = load <4 x float>, ptr %scevgep65, align 4, !tbaa !6
70 %wide.load31 = load <4 x float>, ptr %lsr.iv63, align 4, !tbaa !6
71 %scevgep73 = getelementptr i8, ptr %lsr.iv71, i64 -16
72 %wide.load32 = load <4 x float>, ptr %scevgep73, align 4, !tbaa !6
73 %wide.load33 = load <4 x float>, ptr %lsr.iv71, align 4, !tbaa !6
74 %11 = fsub <4 x float> %wide.load30, %wide.load32
75 %12 = fsub <4 x float> %wide.load31, %wide.load33
76 %13 = fmul <4 x float> %9, %11
77 %14 = fmul <4 x float> %10, %12
78 %scevgep69 = getelementptr i8, ptr %lsr.iv67, i64 -16
79 %wide.load34 = load <4 x float>, ptr %scevgep69, align 4, !tbaa !6
80 %wide.load35 = load <4 x float>, ptr %lsr.iv67, align 4, !tbaa !6
81 %15 = fdiv <4 x float> %13, %wide.load34
82 %16 = fdiv <4 x float> %14, %wide.load35
83 %scevgep61 = getelementptr i8, ptr %lsr.iv59, i64 -16
84 store <4 x float> %15, ptr %scevgep61, align 4, !tbaa !6
85 store <4 x float> %16, ptr %lsr.iv59, align 4, !tbaa !6
86 %lsr.iv.next53 = add nsw i64 %lsr.iv52, -8
87 %scevgep56 = getelementptr i8, ptr %lsr.iv55, i64 32
88 %scevgep60 = getelementptr i8, ptr %lsr.iv59, i64 32
89 %scevgep64 = getelementptr i8, ptr %lsr.iv63, i64 32
90 %scevgep68 = getelementptr i8, ptr %lsr.iv67, i64 32
91 %scevgep72 = getelementptr i8, ptr %lsr.iv71, i64 32
92 %17 = icmp eq i64 %lsr.iv.next53, 0
93 br i1 %17, label %middle.block, label %vector.body, !llvm.loop !10
95 middle.block: ; preds = %vector.body
96 %cmp.n = icmp eq i64 %n.vec, %wide.trip.count
97 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader37
99 for.body.preheader37: ; preds = %vector.memcheck, %for.body.preheader, %middle.block
100 %indvars.iv.ph = phi i64 [ %n.vec, %middle.block ], [ 0, %for.body.preheader ], [ 0, %vector.memcheck ]
101 %18 = shl nuw nsw i64 %indvars.iv.ph, 2
102 %scevgep = getelementptr i8, ptr %a, i64 %18
103 %scevgep39 = getelementptr i8, ptr %e, i64 %18
104 %scevgep42 = getelementptr i8, ptr %d, i64 %18
105 %scevgep45 = getelementptr i8, ptr %c, i64 %18
106 %scevgep48 = getelementptr i8, ptr %b, i64 %18
107 %19 = sub i64 %wide.trip.count, %indvars.iv.ph
110 for.cond.cleanup: ; preds = %for.body, %middle.block, %entry
113 for.body: ; preds = %for.body.preheader37, %for.body
114 %lsr.iv51 = phi i64 [ %19, %for.body.preheader37 ], [ %lsr.iv.next, %for.body ]
115 %lsr.iv49 = phi ptr [ %scevgep48, %for.body.preheader37 ], [ %scevgep50, %for.body ]
116 %lsr.iv46 = phi ptr [ %scevgep45, %for.body.preheader37 ], [ %scevgep47, %for.body ]
117 %lsr.iv43 = phi ptr [ %scevgep42, %for.body.preheader37 ], [ %scevgep44, %for.body ]
118 %lsr.iv40 = phi ptr [ %scevgep39, %for.body.preheader37 ], [ %scevgep41, %for.body ]
119 %lsr.iv = phi ptr [ %scevgep, %for.body.preheader37 ], [ %scevgep38, %for.body ]
120 %20 = load float, ptr %lsr.iv49, align 4, !tbaa !6
121 %mul = fmul float %20, %f
122 %21 = load float, ptr %lsr.iv46, align 4, !tbaa !6
123 %22 = load float, ptr %lsr.iv43, align 4, !tbaa !6
124 %sub = fsub float %21, %22
125 %mul5 = fmul float %mul, %sub
126 %23 = load float, ptr %lsr.iv40, align 4, !tbaa !6
127 %div = fdiv float %mul5, %23
128 store float %div, ptr %lsr.iv, align 4, !tbaa !6
129 %scevgep38 = getelementptr i8, ptr %lsr.iv, i64 4
130 %scevgep41 = getelementptr i8, ptr %lsr.iv40, i64 4
131 %scevgep44 = getelementptr i8, ptr %lsr.iv43, i64 4
132 %scevgep47 = getelementptr i8, ptr %lsr.iv46, i64 4
133 %scevgep50 = getelementptr i8, ptr %lsr.iv49, i64 4
134 %lsr.iv.next = add i64 %lsr.iv51, -1
135 %exitcond.not = icmp eq i64 %lsr.iv.next, 0
136 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
139 !6 = !{!7, !7, i64 0}
140 !7 = !{!"float", !8, i64 0}
141 !8 = !{!"omnipotent char", !9, i64 0}
142 !9 = !{!"Simple C/C++ TBAA"}
143 !10 = distinct !{!10, !11, !12, !13}
144 !11 = !{!"llvm.loop.mustprogress"}
145 !12 = !{!"llvm.loop.isvectorized", i32 1}
146 !13 = !{!"llvm.loop.unroll.runtime.disable"}
147 !14 = distinct !{!14, !11, !12}
152 tracksRegLiveness: true
154 - { reg: '$x0', virtual-reg: '%39' }
155 - { reg: '$x1', virtual-reg: '%40' }
156 - { reg: '$x2', virtual-reg: '%41' }
157 - { reg: '$x3', virtual-reg: '%42' }
158 - { reg: '$x4', virtual-reg: '%43' }
159 - { reg: '$s0', virtual-reg: '%44' }
160 - { reg: '$w5', virtual-reg: '%45' }
163 successors: %bb.1, %bb.7
164 liveins: $x0, $x1, $x2, $x3, $x4, $s0, $w5
166 %45:gpr32common = COPY $w5
168 %43:gpr64common = COPY $x4
169 %42:gpr64common = COPY $x3
170 %41:gpr64common = COPY $x2
171 %40:gpr64common = COPY $x1
172 %39:gpr64common = COPY $x0
173 dead $wzr = SUBSWri %45, 1, 0, implicit-def $nzcv
174 Bcc 11, %bb.7, implicit $nzcv
177 bb.1.for.body.preheader:
178 successors: %bb.12, %bb.2
180 %48:gpr32 = ORRWrs $wzr, %45, 0
181 %0:gpr64 = SUBREG_TO_REG 0, killed %48, %subreg.sub_32
182 dead $wzr = SUBSWri %45, 8, 0, implicit-def $nzcv
183 Bcc 2, %bb.2, implicit $nzcv
186 %49:gpr64all = COPY $xzr
187 %47:gpr64all = COPY %49
190 bb.2.vector.memcheck:
191 successors: %bb.6, %bb.11
193 %55:gpr64common = SUBXrr %39, %40
194 %59:gpr64all = COPY $xzr
195 %51:gpr64all = COPY %59
196 dead $xzr = SUBSXri killed %55, 32, 0, implicit-def $nzcv
197 Bcc 3, %bb.6, implicit $nzcv
200 bb.11.vector.memcheck:
201 successors: %bb.6, %bb.10
203 %56:gpr64common = SUBXrr %39, %41
204 dead $xzr = SUBSXri %56, 32, 0, implicit-def $nzcv
205 Bcc 3, %bb.6, implicit $nzcv
208 bb.10.vector.memcheck:
209 successors: %bb.6, %bb.9
211 %57:gpr64common = SUBXrr %39, %42
212 dead $xzr = SUBSXri %57, 32, 0, implicit-def $nzcv
213 Bcc 3, %bb.6, implicit $nzcv
216 bb.9.vector.memcheck:
217 successors: %bb.6, %bb.3
219 %58:gpr64common = SUBXrr %39, %43
220 dead $xzr = SUBSXri %58, 32, 0, implicit-def $nzcv
221 Bcc 3, %bb.6, implicit $nzcv
225 %64:gpr64common = ANDXri %0, 8027
227 %66:fpr128 = IMPLICIT_DEF
228 %65:fpr128 = INSERT_SUBREG %66, %44, %subreg.ssub
229 %67:gpr64sp = ADDXri %40, 16, 0
230 %3:gpr64all = COPY %67
231 %68:gpr64sp = ADDXri %39, 16, 0
232 %4:gpr64all = COPY %68
233 %69:gpr64sp = ADDXri %41, 16, 0
234 %5:gpr64all = COPY %69
235 %70:gpr64sp = ADDXri %43, 16, 0
236 %6:gpr64all = COPY %70
237 %71:gpr64sp = ADDXri %42, 16, 0
238 %7:gpr64all = COPY %71
241 successors: %bb.5, %bb.4
243 %8:gpr64sp = PHI %7, %bb.3, %19, %bb.4
244 %9:gpr64sp = PHI %6, %bb.3, %18, %bb.4
245 %10:gpr64sp = PHI %5, %bb.3, %17, %bb.4
246 %11:gpr64sp = PHI %4, %bb.3, %16, %bb.4
247 %12:gpr64sp = PHI %3, %bb.3, %15, %bb.4
248 %13:gpr64sp = PHI %1, %bb.3, %14, %bb.4
249 %72:fpr128 = LDURQi %12, -16 :: (load (s128) from %ir.scevgep57, align 4, !tbaa !6)
250 %73:fpr128 = LDRQui %12, 0 :: (load (s128) from %ir.lsr.iv55, align 4, !tbaa !6)
251 %74:fpr128 = nofpexcept FMULv4i32_indexed killed %72, %65, 0, implicit $fpcr
252 %75:fpr128 = nofpexcept FMULv4i32_indexed killed %73, %65, 0, implicit $fpcr
253 %76:fpr128 = LDURQi %10, -16 :: (load (s128) from %ir.scevgep65, align 4, !tbaa !6)
254 %77:fpr128 = LDRQui %10, 0 :: (load (s128) from %ir.lsr.iv63, align 4, !tbaa !6)
255 %78:fpr128 = LDURQi %8, -16 :: (load (s128) from %ir.scevgep73, align 4, !tbaa !6)
256 %79:fpr128 = LDRQui %8, 0 :: (load (s128) from %ir.lsr.iv71, align 4, !tbaa !6)
257 %80:fpr128 = nofpexcept FSUBv4f32 killed %76, killed %78, implicit $fpcr
258 %81:fpr128 = nofpexcept FSUBv4f32 killed %77, killed %79, implicit $fpcr
259 %82:fpr128 = nofpexcept FMULv4f32 killed %74, killed %80, implicit $fpcr
260 %83:fpr128 = nofpexcept FMULv4f32 killed %75, killed %81, implicit $fpcr
261 %84:fpr128 = LDURQi %9, -16 :: (load (s128) from %ir.scevgep69, align 4, !tbaa !6)
262 %85:fpr128 = LDRQui %9, 0 :: (load (s128) from %ir.lsr.iv67, align 4, !tbaa !6)
263 %86:fpr128 = nofpexcept FDIVv4f32 killed %82, killed %84, implicit $fpcr
264 %87:fpr128 = nofpexcept FDIVv4f32 killed %83, killed %85, implicit $fpcr
265 STURQi killed %86, %11, -16 :: (store (s128) into %ir.scevgep61, align 4, !tbaa !6)
266 STRQui killed %87, %11, 0 :: (store (s128) into %ir.lsr.iv59, align 4, !tbaa !6)
267 %88:gpr64 = nsw SUBSXri %13, 8, 0, implicit-def $nzcv
268 %14:gpr64all = COPY %88
269 %89:gpr64sp = ADDXri %12, 32, 0
270 %15:gpr64all = COPY %89
271 %90:gpr64sp = ADDXri %11, 32, 0
272 %16:gpr64all = COPY %90
273 %91:gpr64sp = ADDXri %10, 32, 0
274 %17:gpr64all = COPY %91
275 %92:gpr64sp = ADDXri %9, 32, 0
276 %18:gpr64all = COPY %92
277 %93:gpr64sp = ADDXri %8, 32, 0
278 %19:gpr64all = COPY %93
279 Bcc 1, %bb.4, implicit $nzcv
283 dead $xzr = SUBSXrr %64, %0, implicit-def $nzcv
284 Bcc 0, %bb.7, implicit $nzcv
287 bb.6.for.body.preheader37:
288 %20:gpr64 = PHI %47, %bb.12, %51, %bb.2, %51, %bb.11, %51, %bb.10, %51, %bb.9, %1, %bb.5
289 %95:gpr64 = nuw nsw UBFMXri %20, 62, 61
290 %96:gpr64 = ADDXrr %39, %95
291 %21:gpr64all = COPY %96
292 %97:gpr64 = ADDXrr %43, %95
293 %22:gpr64all = COPY %97
294 %98:gpr64 = ADDXrr %42, %95
295 %23:gpr64all = COPY %98
296 %99:gpr64 = ADDXrr %41, %95
297 %24:gpr64all = COPY %99
298 %100:gpr64 = ADDXrr %40, %95
299 %25:gpr64all = COPY %100
300 %101:gpr64 = SUBXrr %0, %20
301 %26:gpr64all = COPY %101
304 bb.7.for.cond.cleanup:
308 successors: %bb.7, %bb.8
310 %27:gpr64sp = PHI %26, %bb.6, %38, %bb.8
311 %28:gpr64sp = PHI %25, %bb.6, %37, %bb.8
312 %29:gpr64sp = PHI %24, %bb.6, %36, %bb.8
313 %30:gpr64sp = PHI %23, %bb.6, %35, %bb.8
314 %31:gpr64sp = PHI %22, %bb.6, %34, %bb.8
315 %32:gpr64sp = PHI %21, %bb.6, %33, %bb.8
316 early-clobber %102:gpr64sp, %103:fpr32 = LDRSpost %28, 4 :: (load (s32) from %ir.lsr.iv49, !tbaa !6)
317 %104:fpr32 = nofpexcept FMULSrr killed %103, %44, implicit $fpcr
318 early-clobber %105:gpr64sp, %106:fpr32 = LDRSpost %29, 4 :: (load (s32) from %ir.lsr.iv46, !tbaa !6)
319 early-clobber %107:gpr64sp, %108:fpr32 = LDRSpost %30, 4 :: (load (s32) from %ir.lsr.iv43, !tbaa !6)
320 %109:fpr32 = nofpexcept FSUBSrr killed %106, killed %108, implicit $fpcr
321 %110:fpr32 = nofpexcept FMULSrr killed %104, killed %109, implicit $fpcr
322 early-clobber %111:gpr64sp, %112:fpr32 = LDRSpost %31, 4 :: (load (s32) from %ir.lsr.iv40, !tbaa !6)
323 %113:fpr32 = nofpexcept FDIVSrr killed %110, killed %112, implicit $fpcr
324 early-clobber %114:gpr64sp = STRSpost killed %113, %32, 4 :: (store (s32) into %ir.lsr.iv, !tbaa !6)
325 %33:gpr64all = COPY %114
326 %34:gpr64all = COPY %111
327 %35:gpr64all = COPY %107
328 %36:gpr64all = COPY %105
329 %37:gpr64all = COPY %102
330 %115:gpr64 = SUBSXri %27, 1, 0, implicit-def $nzcv
331 %38:gpr64all = COPY %115
332 Bcc 0, %bb.7, implicit $nzcv