1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -mtriple=thumbv7em -arm-parallel-dsp -dce -S %s -o - | FileCheck %s
4 define void @full_unroll(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c, i32 %N) {
5 ; CHECK-LABEL: @full_unroll(
7 ; CHECK-NEXT: [[CMP29:%.*]] = icmp eq i32 [[N:%.*]], 0
8 ; CHECK-NEXT: br i1 [[CMP29]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]]
9 ; CHECK: for.cond.cleanup:
10 ; CHECK-NEXT: ret void
12 ; CHECK-NEXT: [[I_030:%.*]] = phi i32 [ [[INC12:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
13 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I_030]]
14 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds ptr, ptr [[B:%.*]], i32 [[I_030]]
15 ; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX5]], align 4
16 ; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[C:%.*]], i32 [[I_030]]
17 ; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 4
18 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 2
19 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 2
20 ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP5]], i32 [[TMP3]], i32 0)
21 ; CHECK-NEXT: [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i32 2
22 ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX6_2]], align 2
23 ; CHECK-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 2
24 ; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX8_2]], align 2
25 ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP10]], i32 [[TMP8]], i32 [[TMP6]])
26 ; CHECK-NEXT: store i32 [[TMP11]], ptr [[ARRAYIDX]], align 4
27 ; CHECK-NEXT: [[INC12]] = add nuw i32 [[I_030]], 1
28 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC12]], [[N]]
29 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
32 %cmp29 = icmp eq i32 %N, 0
33 br i1 %cmp29, label %for.cond.cleanup, label %for.body
35 for.cond.cleanup: ; preds = %for.body, %entry
38 for.body: ; preds = %entry, %for.body
39 %i.030 = phi i32 [ %inc12, %for.body ], [ 0, %entry ]
40 %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.030
41 %arrayidx5 = getelementptr inbounds ptr, ptr %b, i32 %i.030
42 %0 = load ptr, ptr %arrayidx5, align 4
43 %arrayidx7 = getelementptr inbounds ptr, ptr %c, i32 %i.030
44 %1 = load ptr, ptr %arrayidx7, align 4
45 %2 = load i16, ptr %0, align 2
46 %conv = sext i16 %2 to i32
47 %3 = load i16, ptr %1, align 2
48 %conv9 = sext i16 %3 to i32
49 %mul = mul nsw i32 %conv9, %conv
50 %arrayidx6.1 = getelementptr inbounds i16, ptr %0, i32 1
51 %4 = load i16, ptr %arrayidx6.1, align 2
52 %conv.1 = sext i16 %4 to i32
53 %arrayidx8.1 = getelementptr inbounds i16, ptr %1, i32 1
54 %5 = load i16, ptr %arrayidx8.1, align 2
55 %conv9.1 = sext i16 %5 to i32
56 %mul.1 = mul nsw i32 %conv9.1, %conv.1
57 %add.1 = add nsw i32 %mul.1, %mul
58 %arrayidx6.2 = getelementptr inbounds i16, ptr %0, i32 2
59 %6 = load i16, ptr %arrayidx6.2, align 2
60 %conv.2 = sext i16 %6 to i32
61 %arrayidx8.2 = getelementptr inbounds i16, ptr %1, i32 2
62 %7 = load i16, ptr %arrayidx8.2, align 2
63 %conv9.2 = sext i16 %7 to i32
64 %mul.2 = mul nsw i32 %conv9.2, %conv.2
65 %add.2 = add nsw i32 %mul.2, %add.1
66 %arrayidx6.3 = getelementptr inbounds i16, ptr %0, i32 3
67 %8 = load i16, ptr %arrayidx6.3, align 2
68 %conv.3 = sext i16 %8 to i32
69 %arrayidx8.3 = getelementptr inbounds i16, ptr %1, i32 3
70 %9 = load i16, ptr %arrayidx8.3, align 2
71 %conv9.3 = sext i16 %9 to i32
72 %mul.3 = mul nsw i32 %conv9.3, %conv.3
73 %add.3 = add nsw i32 %mul.3, %add.2
74 store i32 %add.3, ptr %arrayidx, align 4
75 %inc12 = add nuw i32 %i.030, 1
76 %exitcond = icmp eq i32 %inc12, %N
77 br i1 %exitcond, label %for.cond.cleanup, label %for.body
80 define void @full_unroll_sub(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c, i32 %N) {
81 ; CHECK-LABEL: @full_unroll_sub(
83 ; CHECK-NEXT: [[CMP29:%.*]] = icmp eq i32 [[N:%.*]], 0
84 ; CHECK-NEXT: br i1 [[CMP29]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]]
85 ; CHECK: for.cond.cleanup:
86 ; CHECK-NEXT: ret void
88 ; CHECK-NEXT: [[I_030:%.*]] = phi i32 [ [[INC12:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
89 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I_030]]
90 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds ptr, ptr [[B:%.*]], i32 [[I_030]]
91 ; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX5]], align 4
92 ; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[C:%.*]], i32 [[I_030]]
93 ; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 4
94 ; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP0]], align 2
95 ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32
96 ; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP1]], align 2
97 ; CHECK-NEXT: [[CONV9:%.*]] = sext i16 [[TMP3]] to i32
98 ; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[CONV9]], [[CONV]]
99 ; CHECK-NEXT: [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i32 1
100 ; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX6_1]], align 2
101 ; CHECK-NEXT: [[CONV_1:%.*]] = sext i16 [[TMP4]] to i32
102 ; CHECK-NEXT: [[ARRAYIDX8_1:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 1
103 ; CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX8_1]], align 2
104 ; CHECK-NEXT: [[CONV9_1:%.*]] = sext i16 [[TMP5]] to i32
105 ; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[CONV9_1]], [[CONV_1]]
106 ; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[MUL_1]], [[SUB]]
107 ; CHECK-NEXT: [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i32 2
108 ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX6_2]], align 2
109 ; CHECK-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 2
110 ; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX8_2]], align 2
111 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP9]], i32 [[TMP7]], i32 [[ADD_1]])
112 ; CHECK-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4
113 ; CHECK-NEXT: [[INC12]] = add nuw i32 [[I_030]], 1
114 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC12]], [[N]]
115 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
118 %cmp29 = icmp eq i32 %N, 0
119 br i1 %cmp29, label %for.cond.cleanup, label %for.body
121 for.cond.cleanup: ; preds = %for.body, %entry
124 for.body: ; preds = %entry, %for.body
125 %i.030 = phi i32 [ %inc12, %for.body ], [ 0, %entry ]
126 %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.030
127 %arrayidx5 = getelementptr inbounds ptr, ptr %b, i32 %i.030
128 %0 = load ptr, ptr %arrayidx5, align 4
129 %arrayidx7 = getelementptr inbounds ptr, ptr %c, i32 %i.030
130 %1 = load ptr, ptr %arrayidx7, align 4
131 %2 = load i16, ptr %0, align 2
132 %conv = sext i16 %2 to i32
133 %3 = load i16, ptr %1, align 2
134 %conv9 = sext i16 %3 to i32
135 %sub = sub nsw i32 %conv9, %conv
136 %arrayidx6.1 = getelementptr inbounds i16, ptr %0, i32 1
137 %4 = load i16, ptr %arrayidx6.1, align 2
138 %conv.1 = sext i16 %4 to i32
139 %arrayidx8.1 = getelementptr inbounds i16, ptr %1, i32 1
140 %5 = load i16, ptr %arrayidx8.1, align 2
141 %conv9.1 = sext i16 %5 to i32
142 %mul.1 = mul nsw i32 %conv9.1, %conv.1
143 %add.1 = add nsw i32 %mul.1, %sub
144 %arrayidx6.2 = getelementptr inbounds i16, ptr %0, i32 2
145 %6 = load i16, ptr %arrayidx6.2, align 2
146 %conv.2 = sext i16 %6 to i32
147 %arrayidx8.2 = getelementptr inbounds i16, ptr %1, i32 2
148 %7 = load i16, ptr %arrayidx8.2, align 2
149 %conv9.2 = sext i16 %7 to i32
150 %mul.2 = mul nsw i32 %conv9.2, %conv.2
151 %add.2 = add nsw i32 %mul.2, %add.1
152 %arrayidx6.3 = getelementptr inbounds i16, ptr %0, i32 3
153 %8 = load i16, ptr %arrayidx6.3, align 2
154 %conv.3 = sext i16 %8 to i32
155 %arrayidx8.3 = getelementptr inbounds i16, ptr %1, i32 3
156 %9 = load i16, ptr %arrayidx8.3, align 2
157 %conv9.3 = sext i16 %9 to i32
158 %mul.3 = mul nsw i32 %conv9.3, %conv.3
159 %add.3 = add nsw i32 %mul.3, %add.2
160 store i32 %add.3, ptr %arrayidx, align 4
161 %inc12 = add nuw i32 %i.030, 1
162 %exitcond = icmp eq i32 %inc12, %N
163 br i1 %exitcond, label %for.cond.cleanup, label %for.body