1 ; RUN: opt -loop-unroll-and-jam -unroll-runtime -unroll-partial-threshold=60 < %s -S | FileCheck %s
2 ; RUN: opt -aa-pipeline=tbaa,basic-aa -passes='loop-unroll-and-jam' -unroll-runtime -unroll-partial-threshold=60 < %s -S | FileCheck %s
4 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5 target triple = "x86_64-unknown-linux-gnu"
7 ; CHECK-LABEL: function
8 ; The explicit metadata here should force this to be unroll and jammed 4 times (hence the %.pre60.3)
9 ; CHECK: %.pre = phi i8 [ %.pre60.3, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %.pre.pre, %for.cond1.preheader.us.preheader.new ]
10 ; CHECK: %indvars.iv.3 = phi i64 [ 0, %for.cond1.preheader.us ], [ %indvars.iv.next.3, %for.body4.us ]
11 define void @function(i8* noalias nocapture %dst, i32 %dst_stride, i8* noalias nocapture readonly %src, i32 %src_stride, i32 %A, i32 %B, i32 %C, i32 %D, i32 %width, i32 %height) {
13 %idxprom = sext i32 %src_stride to i64
14 %cmp52 = icmp sgt i32 %height, 0
15 br i1 %cmp52, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup
17 for.cond1.preheader.lr.ph: ; preds = %entry
18 %cmp249 = icmp sgt i32 %width, 0
19 %idx.ext = sext i32 %dst_stride to i64
20 br i1 %cmp249, label %for.cond1.preheader.us.preheader, label %for.cond.cleanup
22 for.cond1.preheader.us.preheader: ; preds = %for.cond1.preheader.lr.ph
23 %.pre.pre = load i8, i8* %src, align 1
24 %wide.trip.count = zext i32 %width to i64
25 br label %for.cond1.preheader.us
27 for.cond1.preheader.us: ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us, %for.cond1.preheader.us.preheader
28 %.pre = phi i8 [ %.pre60, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %.pre.pre, %for.cond1.preheader.us.preheader ]
29 %srcp.056.us.pn = phi i8* [ %srcp.056.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %src, %for.cond1.preheader.us.preheader ]
30 %y.055.us = phi i32 [ %inc30.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %for.cond1.preheader.us.preheader ]
31 %dst.addr.054.us = phi i8* [ %add.ptr.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %dst, %for.cond1.preheader.us.preheader ]
32 %srcp.056.us = getelementptr inbounds i8, i8* %srcp.056.us.pn, i64 %idxprom
33 %.pre60 = load i8, i8* %srcp.056.us, align 1
34 br label %for.body4.us
36 for.body4.us: ; preds = %for.body4.us, %for.cond1.preheader.us
37 %0 = phi i8 [ %.pre60, %for.cond1.preheader.us ], [ %3, %for.body4.us ]
38 %1 = phi i8 [ %.pre, %for.cond1.preheader.us ], [ %2, %for.body4.us ]
39 %indvars.iv = phi i64 [ 0, %for.cond1.preheader.us ], [ %indvars.iv.next, %for.body4.us ]
40 %conv.us = zext i8 %1 to i32
41 %mul.us = mul nsw i32 %conv.us, %A
42 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
43 %arrayidx8.us = getelementptr inbounds i8, i8* %srcp.056.us.pn, i64 %indvars.iv.next
44 %2 = load i8, i8* %arrayidx8.us, align 1
45 %conv9.us = zext i8 %2 to i32
46 %mul10.us = mul nsw i32 %conv9.us, %B
47 %conv14.us = zext i8 %0 to i32
48 %mul15.us = mul nsw i32 %conv14.us, %C
49 %arrayidx19.us = getelementptr inbounds i8, i8* %srcp.056.us, i64 %indvars.iv.next
50 %3 = load i8, i8* %arrayidx19.us, align 1
51 %conv20.us = zext i8 %3 to i32
52 %mul21.us = mul nsw i32 %conv20.us, %D
53 %add11.us = add i32 %mul.us, 32
54 %add16.us = add i32 %add11.us, %mul10.us
55 %add22.us = add i32 %add16.us, %mul15.us
56 %add23.us = add i32 %add22.us, %mul21.us
57 %4 = lshr i32 %add23.us, 6
58 %conv24.us = trunc i32 %4 to i8
59 %arrayidx26.us = getelementptr inbounds i8, i8* %dst.addr.054.us, i64 %indvars.iv
60 store i8 %conv24.us, i8* %arrayidx26.us, align 1
61 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
62 br i1 %exitcond, label %for.cond1.for.cond.cleanup3_crit_edge.us, label %for.body4.us
64 for.cond1.for.cond.cleanup3_crit_edge.us: ; preds = %for.body4.us
65 %add.ptr.us = getelementptr inbounds i8, i8* %dst.addr.054.us, i64 %idx.ext
66 %inc30.us = add nuw nsw i32 %y.055.us, 1
67 %exitcond58 = icmp eq i32 %inc30.us, %height
68 br i1 %exitcond58, label %for.cond.cleanup, label %for.cond1.preheader.us, !llvm.loop !5
70 for.cond.cleanup: ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us, %for.cond1.preheader.lr.ph, %entry
74 ; CHECK-LABEL: function2
75 ; The explicit metadata here should force this to be unroll and jammed, but
76 ; the count is left to thresholds. In this case 2 (hence %.pre60.1).
77 ; CHECK: %.pre = phi i8 [ %.pre60.1, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %.pre.pre, %for.cond1.preheader.us.preheader.new ]
78 ; CHECK: %indvars.iv.1 = phi i64 [ 0, %for.cond1.preheader.us ], [ %indvars.iv.next.1, %for.body4.us ]
79 define void @function2(i8* noalias nocapture %dst, i32 %dst_stride, i8* noalias nocapture readonly %src, i32 %src_stride, i32 %A, i32 %B, i32 %C, i32 %D, i32 %width, i32 %height) {
81 %idxprom = sext i32 %src_stride to i64
82 %cmp52 = icmp sgt i32 %height, 0
83 br i1 %cmp52, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup
85 for.cond1.preheader.lr.ph: ; preds = %entry
86 %cmp249 = icmp sgt i32 %width, 0
87 %idx.ext = sext i32 %dst_stride to i64
88 br i1 %cmp249, label %for.cond1.preheader.us.preheader, label %for.cond.cleanup
90 for.cond1.preheader.us.preheader: ; preds = %for.cond1.preheader.lr.ph
91 %.pre.pre = load i8, i8* %src, align 1
92 %wide.trip.count = zext i32 %width to i64
93 br label %for.cond1.preheader.us
95 for.cond1.preheader.us: ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us, %for.cond1.preheader.us.preheader
96 %.pre = phi i8 [ %.pre60, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %.pre.pre, %for.cond1.preheader.us.preheader ]
97 %srcp.056.us.pn = phi i8* [ %srcp.056.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %src, %for.cond1.preheader.us.preheader ]
98 %y.055.us = phi i32 [ %inc30.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %for.cond1.preheader.us.preheader ]
99 %dst.addr.054.us = phi i8* [ %add.ptr.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %dst, %for.cond1.preheader.us.preheader ]
100 %srcp.056.us = getelementptr inbounds i8, i8* %srcp.056.us.pn, i64 %idxprom
101 %.pre60 = load i8, i8* %srcp.056.us, align 1
102 br label %for.body4.us
104 for.body4.us: ; preds = %for.body4.us, %for.cond1.preheader.us
105 %0 = phi i8 [ %.pre60, %for.cond1.preheader.us ], [ %3, %for.body4.us ]
106 %1 = phi i8 [ %.pre, %for.cond1.preheader.us ], [ %2, %for.body4.us ]
107 %indvars.iv = phi i64 [ 0, %for.cond1.preheader.us ], [ %indvars.iv.next, %for.body4.us ]
108 %conv.us = zext i8 %1 to i32
109 %mul.us = mul nsw i32 %conv.us, %A
110 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
111 %arrayidx8.us = getelementptr inbounds i8, i8* %srcp.056.us.pn, i64 %indvars.iv.next
112 %2 = load i8, i8* %arrayidx8.us, align 1
113 %conv9.us = zext i8 %2 to i32
114 %mul10.us = mul nsw i32 %conv9.us, %B
115 %conv14.us = zext i8 %0 to i32
116 %mul15.us = mul nsw i32 %conv14.us, %C
117 %arrayidx19.us = getelementptr inbounds i8, i8* %srcp.056.us, i64 %indvars.iv.next
118 %3 = load i8, i8* %arrayidx19.us, align 1
119 %conv20.us = zext i8 %3 to i32
120 %mul21.us = mul nsw i32 %conv20.us, %D
121 %add11.us = add i32 %mul.us, 32
122 %add16.us = add i32 %add11.us, %mul10.us
123 %add22.us = add i32 %add16.us, %mul15.us
124 %add23.us = add i32 %add22.us, %mul21.us
125 %4 = lshr i32 %add23.us, 6
126 %conv24.us = trunc i32 %4 to i8
127 %arrayidx26.us = getelementptr inbounds i8, i8* %dst.addr.054.us, i64 %indvars.iv
128 store i8 %conv24.us, i8* %arrayidx26.us, align 1
129 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
130 br i1 %exitcond, label %for.cond1.for.cond.cleanup3_crit_edge.us, label %for.body4.us
132 for.cond1.for.cond.cleanup3_crit_edge.us: ; preds = %for.body4.us
133 %add.ptr.us = getelementptr inbounds i8, i8* %dst.addr.054.us, i64 %idx.ext
134 %inc30.us = add nuw nsw i32 %y.055.us, 1
135 %exitcond58 = icmp eq i32 %inc30.us, %height
136 br i1 %exitcond58, label %for.cond.cleanup, label %for.cond1.preheader.us, !llvm.loop !7
138 for.cond.cleanup: ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us, %for.cond1.preheader.lr.ph, %entry
142 !5 = distinct !{!5, !6}
143 !6 = !{!"llvm.loop.unroll_and_jam.count", i32 4}
144 !7 = distinct !{!7, !8}
145 !8 = !{!"llvm.loop.unroll_and_jam.enable"}