1 ; RUN: llc -O3 -mtriple=thumbv7em -mcpu=cortex-m4 %s -o - | FileCheck %s --check-prefix=CHECK-REG-PRESSURE
2 ; RUN: llc -O3 -mtriple=thumbv7eb %s -o - | FileCheck %s --check-prefix=CHECK-UNSUPPORTED
3 ; RUN: llc -O3 -mtriple=thumbv8m.main -mattr=+dsp %s -o - | FileCheck %s --check-prefix=CHECK
5 ; CHECK-UNSUPPORTED-LABEL: unroll_n_jam_smlad
6 ; CHECK-UNSUPPORTED-NOT: smlad r{{.}}
8 ; Test that the duplicate loads are removed, which allows parallel dsp to find
9 ; the parallel operations.
11 ; CHECK-LABEL: unroll_n_jam_smlad
12 define void @unroll_n_jam_smlad(i32* %res, i16* %A, i16* %B, i32 %N, i32 %idx) {
14 %xtraiter306.i = and i32 %N, 3
15 %unroll_iter310.i = sub i32 %N, %xtraiter306.i
16 %arrayidx.us.i117.i = getelementptr inbounds i32, i32* %res, i32 %idx
17 store i32 0, i32* %arrayidx.us.i117.i, align 4
18 %mul.us.i118.i = mul i32 %idx, %N
19 %inc11.us.i.i = or i32 %idx, 1
20 %arrayidx.us.i117.1.i = getelementptr inbounds i32, i32* %res, i32 %inc11.us.i.i
21 store i32 0, i32* %arrayidx.us.i117.1.i, align 4
22 %mul.us.i118.1.i = mul i32 %inc11.us.i.i, %N
23 %inc11.us.i.1.i = or i32 %idx, 2
24 %arrayidx.us.i117.2.i = getelementptr inbounds i32, i32* %res, i32 %inc11.us.i.1.i
25 store i32 0, i32* %arrayidx.us.i117.2.i, align 4
26 %mul.us.i118.2.i = mul i32 %inc11.us.i.1.i, %N
27 %inc11.us.i.2.i = or i32 %idx, 3
28 %arrayidx.us.i117.3.i = getelementptr inbounds i32, i32* %res, i32 %inc11.us.i.2.i
29 store i32 0, i32* %arrayidx.us.i117.3.i, align 4
30 %mul.us.i118.3.i = mul i32 %inc11.us.i.2.i, %N
31 %inc11.us.i.3.i = add i32 %idx, 4
34 ; TODO: CSE, or something similar, is required to remove the duplicate loads.
38 ; CHECK-NOT: smlad r{{.*}}
40 ; CHECK-REG-PRESSURE: .LBB0_1:
41 ; CHECK-REG-PRESSURE: ldr{{.*}}, [sp
42 ; CHECK-REG-PRESSURE: ldr{{.*}}, [sp
43 ; CHECK-REG-PRESSURE: ldr{{.*}}, [sp
44 ; CHECK-REG-PRESSURE: ldr{{.*}}, [sp
45 ; CHECK-REG-PRESSURE: ldr{{.*}}, [sp
46 ; CHECK-REG-PRESSURE: ldr{{.*}}, [sp
47 ; CHECK-REG-PRESSURE: ldr{{.*}}, [sp
48 ; CHECK-REG-PRESSURE: bne .LBB0_1
51 %A3 = phi i32 [ %add9.us.i.3361.i, %for.body ], [ 0, %entry ]
52 %j.026.us.i.i = phi i32 [ %inc.us.i.3362.i, %for.body ], [ 0, %entry ]
53 %A4 = phi i32 [ %add9.us.i.1.3.i, %for.body ], [ 0, %entry ]
54 %A5 = phi i32 [ %add9.us.i.2.3.i, %for.body ], [ 0, %entry ]
55 %A6 = phi i32 [ %add9.us.i.3.3.i, %for.body ], [ 0, %entry ]
56 %niter335.i = phi i32 [ %niter335.nsub.3.i, %for.body ], [ %unroll_iter310.i, %entry ]
57 %add.us.i.i = add i32 %j.026.us.i.i, %mul.us.i118.i
58 %arrayidx4.us.i.i = getelementptr inbounds i16, i16* %A, i32 %add.us.i.i
59 %A7 = load i16, i16* %arrayidx4.us.i.i, align 2
60 %conv.us.i.i = sext i16 %A7 to i32
61 %arrayidx5.us.i.i = getelementptr inbounds i16, i16* %B, i32 %j.026.us.i.i
62 %A8 = load i16, i16* %arrayidx5.us.i.i, align 2
63 %conv6.us.i.i = sext i16 %A8 to i32
64 %mul7.us.i.i = mul nsw i32 %conv6.us.i.i, %conv.us.i.i
65 %add9.us.i.i = add nsw i32 %mul7.us.i.i, %A3
66 %inc.us.i.i = or i32 %j.026.us.i.i, 1
67 %add.us.i.1.i = add i32 %j.026.us.i.i, %mul.us.i118.1.i
68 %arrayidx4.us.i.1.i = getelementptr inbounds i16, i16* %A, i32 %add.us.i.1.i
69 %A9 = load i16, i16* %arrayidx4.us.i.1.i, align 2
70 %conv.us.i.1.i = sext i16 %A9 to i32
71 %arrayidx5.us.i.1.i = getelementptr inbounds i16, i16* %B, i32 %j.026.us.i.i
72 %B0 = load i16, i16* %arrayidx5.us.i.1.i, align 2
73 %conv6.us.i.1.i = sext i16 %B0 to i32
74 %mul7.us.i.1.i = mul nsw i32 %conv6.us.i.1.i, %conv.us.i.1.i
75 %add9.us.i.1.i = add nsw i32 %mul7.us.i.1.i, %A4
76 %inc.us.i.1.i = or i32 %j.026.us.i.i, 1
77 %add.us.i.2.i = add i32 %j.026.us.i.i, %mul.us.i118.2.i
78 %arrayidx4.us.i.2.i = getelementptr inbounds i16, i16* %A, i32 %add.us.i.2.i
79 %B1 = load i16, i16* %arrayidx4.us.i.2.i, align 2
80 %conv.us.i.2.i = sext i16 %B1 to i32
81 %arrayidx5.us.i.2.i = getelementptr inbounds i16, i16* %B, i32 %j.026.us.i.i
82 %B2 = load i16, i16* %arrayidx5.us.i.2.i, align 2
83 %conv6.us.i.2.i = sext i16 %B2 to i32
84 %mul7.us.i.2.i = mul nsw i32 %conv6.us.i.2.i, %conv.us.i.2.i
85 %add9.us.i.2.i = add nsw i32 %mul7.us.i.2.i, %A5
86 %inc.us.i.2.i = or i32 %j.026.us.i.i, 1
87 %add.us.i.3.i = add i32 %j.026.us.i.i, %mul.us.i118.3.i
88 %arrayidx4.us.i.3.i = getelementptr inbounds i16, i16* %A, i32 %add.us.i.3.i
89 %B3 = load i16, i16* %arrayidx4.us.i.3.i, align 2
90 %conv.us.i.3.i = sext i16 %B3 to i32
91 %arrayidx5.us.i.3.i = getelementptr inbounds i16, i16* %B, i32 %j.026.us.i.i
92 %B4 = load i16, i16* %arrayidx5.us.i.3.i, align 2
93 %conv6.us.i.3.i = sext i16 %B4 to i32
94 %mul7.us.i.3.i = mul nsw i32 %conv6.us.i.3.i, %conv.us.i.3.i
95 %add9.us.i.3.i = add nsw i32 %mul7.us.i.3.i, %A6
96 %inc.us.i.3.i = or i32 %j.026.us.i.i, 1
97 %add.us.i.1337.i = add i32 %inc.us.i.i, %mul.us.i118.i
98 %arrayidx4.us.i.1338.i = getelementptr inbounds i16, i16* %A, i32 %add.us.i.1337.i
99 %B5 = load i16, i16* %arrayidx4.us.i.1338.i, align 2
100 %conv.us.i.1339.i = sext i16 %B5 to i32
101 %arrayidx5.us.i.1340.i = getelementptr inbounds i16, i16* %B, i32 %inc.us.i.i
102 %B6 = load i16, i16* %arrayidx5.us.i.1340.i, align 2
103 %conv6.us.i.1341.i = sext i16 %B6 to i32
104 %mul7.us.i.1342.i = mul nsw i32 %conv6.us.i.1341.i, %conv.us.i.1339.i
105 %add9.us.i.1343.i = add nsw i32 %mul7.us.i.1342.i, %add9.us.i.i
106 %inc.us.i.1344.i = or i32 %j.026.us.i.i, 2
107 %add.us.i.1.1.i = add i32 %inc.us.i.1.i, %mul.us.i118.1.i
108 %arrayidx4.us.i.1.1.i = getelementptr inbounds i16, i16* %A, i32 %add.us.i.1.1.i
109 %B7 = load i16, i16* %arrayidx4.us.i.1.1.i, align 2
110 %conv.us.i.1.1.i = sext i16 %B7 to i32
111 %arrayidx5.us.i.1.1.i = getelementptr inbounds i16, i16* %B, i32 %inc.us.i.1.i
112 %B6.dup = load i16, i16* %arrayidx5.us.i.1.1.i, align 2
113 %conv6.us.i.1.1.i = sext i16 %B6.dup to i32
114 %mul7.us.i.1.1.i = mul nsw i32 %conv6.us.i.1.1.i, %conv.us.i.1.1.i
115 %add9.us.i.1.1.i = add nsw i32 %mul7.us.i.1.1.i, %add9.us.i.1.i
116 %inc.us.i.1.1.i = or i32 %j.026.us.i.i, 2
117 %add.us.i.2.1.i = add i32 %inc.us.i.2.i, %mul.us.i118.2.i
118 %arrayidx4.us.i.2.1.i = getelementptr inbounds i16, i16* %A, i32 %add.us.i.2.1.i
119 %B9 = load i16, i16* %arrayidx4.us.i.2.1.i, align 2
120 %conv.us.i.2.1.i = sext i16 %B9 to i32
121 %arrayidx5.us.i.2.1.i = getelementptr inbounds i16, i16* %B, i32 %inc.us.i.2.i
122 %B6.dup.i = load i16, i16* %arrayidx5.us.i.2.1.i, align 2
123 %conv6.us.i.2.1.i = sext i16 %B6.dup.i to i32
124 %mul7.us.i.2.1.i = mul nsw i32 %conv6.us.i.2.1.i, %conv.us.i.2.1.i
125 %add9.us.i.2.1.i = add nsw i32 %mul7.us.i.2.1.i, %add9.us.i.2.i
126 %inc.us.i.2.1.i = or i32 %j.026.us.i.i, 2
127 %add.us.i.3.1.i = add i32 %inc.us.i.3.i, %mul.us.i118.3.i
128 %arrayidx4.us.i.3.1.i = getelementptr inbounds i16, i16* %A, i32 %add.us.i.3.1.i
129 %B11 = load i16, i16* %arrayidx4.us.i.3.1.i, align 2
130 %conv.us.i.3.1.i = sext i16 %B11 to i32
131 %arrayidx5.us.i.3.1.i = getelementptr inbounds i16, i16* %B, i32 %inc.us.i.3.i
132 %B6.dup.i.i = load i16, i16* %arrayidx5.us.i.3.1.i, align 2
133 %conv6.us.i.3.1.i = sext i16 %B6.dup.i.i to i32
134 %mul7.us.i.3.1.i = mul nsw i32 %conv6.us.i.3.1.i, %conv.us.i.3.1.i
135 %add9.us.i.3.1.i = add nsw i32 %mul7.us.i.3.1.i, %add9.us.i.3.i
136 %inc.us.i.3.1.i = or i32 %j.026.us.i.i, 2
137 %add.us.i.2346.i = add i32 %inc.us.i.1344.i, %mul.us.i118.i
138 %arrayidx4.us.i.2347.i = getelementptr inbounds i16, i16* %A, i32 %add.us.i.2346.i
139 %B13 = load i16, i16* %arrayidx4.us.i.2347.i, align 2
140 %conv.us.i.2348.i = sext i16 %B13 to i32
141 %arrayidx5.us.i.2349.i = getelementptr inbounds i16, i16* %B, i32 %inc.us.i.1344.i
142 %B14 = load i16, i16* %arrayidx5.us.i.2349.i, align 2
143 %conv6.us.i.2350.i = sext i16 %B14 to i32
144 %mul7.us.i.2351.i = mul nsw i32 %conv6.us.i.2350.i, %conv.us.i.2348.i
145 %add9.us.i.2352.i = add nsw i32 %mul7.us.i.2351.i, %add9.us.i.1343.i
146 %inc.us.i.2353.i = or i32 %j.026.us.i.i, 3
147 %add.us.i.1.2.i = add i32 %inc.us.i.1.1.i, %mul.us.i118.1.i
148 %arrayidx4.us.i.1.2.i = getelementptr inbounds i16, i16* %A, i32 %add.us.i.1.2.i
149 %B15 = load i16, i16* %arrayidx4.us.i.1.2.i, align 2
150 %conv.us.i.1.2.i = sext i16 %B15 to i32
151 %arrayidx5.us.i.1.2.i = getelementptr inbounds i16, i16* %B, i32 %inc.us.i.1.1.i
152 %B14.dup = load i16, i16* %arrayidx5.us.i.1.2.i, align 2
153 %conv6.us.i.1.2.i = sext i16 %B14.dup to i32
154 %mul7.us.i.1.2.i = mul nsw i32 %conv6.us.i.1.2.i, %conv.us.i.1.2.i
155 %add9.us.i.1.2.i = add nsw i32 %mul7.us.i.1.2.i, %add9.us.i.1.1.i
156 %inc.us.i.1.2.i = or i32 %j.026.us.i.i, 3
157 %add.us.i.2.2.i = add i32 %inc.us.i.2.1.i, %mul.us.i118.2.i
158 %arrayidx4.us.i.2.2.i = getelementptr inbounds i16, i16* %A, i32 %add.us.i.2.2.i
159 %B17 = load i16, i16* %arrayidx4.us.i.2.2.i, align 2
160 %conv.us.i.2.2.i = sext i16 %B17 to i32
161 %arrayidx5.us.i.2.2.i = getelementptr inbounds i16, i16* %B, i32 %inc.us.i.2.1.i
162 %B14.dup.i = load i16, i16* %arrayidx5.us.i.2.2.i, align 2
163 %conv6.us.i.2.2.i = sext i16 %B14.dup.i to i32
164 %mul7.us.i.2.2.i = mul nsw i32 %conv6.us.i.2.2.i, %conv.us.i.2.2.i
165 %add9.us.i.2.2.i = add nsw i32 %mul7.us.i.2.2.i, %add9.us.i.2.1.i
166 %inc.us.i.2.2.i = or i32 %j.026.us.i.i, 3
167 %add.us.i.3.2.i = add i32 %inc.us.i.3.1.i, %mul.us.i118.3.i
168 %arrayidx4.us.i.3.2.i = getelementptr inbounds i16, i16* %A, i32 %add.us.i.3.2.i
169 %B19 = load i16, i16* %arrayidx4.us.i.3.2.i, align 2
170 %conv.us.i.3.2.i = sext i16 %B19 to i32
171 %arrayidx5.us.i.3.2.i = getelementptr inbounds i16, i16* %B, i32 %inc.us.i.3.1.i
172 %B14.dup.i.i = load i16, i16* %arrayidx5.us.i.3.2.i, align 2
173 %conv6.us.i.3.2.i = sext i16 %B14.dup.i.i to i32
174 %mul7.us.i.3.2.i = mul nsw i32 %conv6.us.i.3.2.i, %conv.us.i.3.2.i
175 %add9.us.i.3.2.i = add nsw i32 %mul7.us.i.3.2.i, %add9.us.i.3.1.i
176 %inc.us.i.3.2.i = or i32 %j.026.us.i.i, 3
177 %add.us.i.3355.i = add i32 %inc.us.i.2353.i, %mul.us.i118.i
178 %arrayidx4.us.i.3356.i = getelementptr inbounds i16, i16* %A, i32 %add.us.i.3355.i
179 %B21 = load i16, i16* %arrayidx4.us.i.3356.i, align 2
180 %conv.us.i.3357.i = sext i16 %B21 to i32
181 %arrayidx5.us.i.3358.i = getelementptr inbounds i16, i16* %B, i32 %inc.us.i.2353.i
182 %B22 = load i16, i16* %arrayidx5.us.i.3358.i, align 2
183 %conv6.us.i.3359.i = sext i16 %B22 to i32
184 %mul7.us.i.3360.i = mul nsw i32 %conv6.us.i.3359.i, %conv.us.i.3357.i
185 %add9.us.i.3361.i = add nsw i32 %mul7.us.i.3360.i, %add9.us.i.2352.i
186 %inc.us.i.3362.i = add i32 %j.026.us.i.i, 4
187 %add.us.i.1.3.i = add i32 %inc.us.i.1.2.i, %mul.us.i118.1.i
188 %arrayidx4.us.i.1.3.i = getelementptr inbounds i16, i16* %A, i32 %add.us.i.1.3.i
189 %B23 = load i16, i16* %arrayidx4.us.i.1.3.i, align 2
190 %conv.us.i.1.3.i = sext i16 %B23 to i32
191 %arrayidx5.us.i.1.3.i = getelementptr inbounds i16, i16* %B, i32 %inc.us.i.1.2.i
192 %B22.dup = load i16, i16* %arrayidx5.us.i.1.3.i, align 2
193 %conv6.us.i.1.3.i = sext i16 %B22.dup to i32
194 %mul7.us.i.1.3.i = mul nsw i32 %conv6.us.i.1.3.i, %conv.us.i.1.3.i
195 %add9.us.i.1.3.i = add nsw i32 %mul7.us.i.1.3.i, %add9.us.i.1.2.i
196 %add.us.i.2.3.i = add i32 %inc.us.i.2.2.i, %mul.us.i118.2.i
197 %arrayidx4.us.i.2.3.i = getelementptr inbounds i16, i16* %A, i32 %add.us.i.2.3.i
198 %B25 = load i16, i16* %arrayidx4.us.i.2.3.i, align 2
199 %conv.us.i.2.3.i = sext i16 %B25 to i32
200 %arrayidx5.us.i.2.3.i = getelementptr inbounds i16, i16* %B, i32 %inc.us.i.2.2.i
201 %B22.dup.i = load i16, i16* %arrayidx5.us.i.2.3.i, align 2
202 %conv6.us.i.2.3.i = sext i16 %B22.dup.i to i32
203 %mul7.us.i.2.3.i = mul nsw i32 %conv6.us.i.2.3.i, %conv.us.i.2.3.i
204 %add9.us.i.2.3.i = add nsw i32 %mul7.us.i.2.3.i, %add9.us.i.2.2.i
205 %add.us.i.3.3.i = add i32 %inc.us.i.3.2.i, %mul.us.i118.3.i
206 %arrayidx4.us.i.3.3.i = getelementptr inbounds i16, i16* %A, i32 %add.us.i.3.3.i
207 %B27 = load i16, i16* %arrayidx4.us.i.3.3.i, align 2
208 %conv.us.i.3.3.i = sext i16 %B27 to i32
209 %arrayidx5.us.i.3.3.i = getelementptr inbounds i16, i16* %B, i32 %inc.us.i.3.2.i
210 %B22.dup.i.i = load i16, i16* %arrayidx5.us.i.3.3.i, align 2
211 %conv6.us.i.3.3.i = sext i16 %B22.dup.i.i to i32
212 %mul7.us.i.3.3.i = mul nsw i32 %conv6.us.i.3.3.i, %conv.us.i.3.3.i
213 %add9.us.i.3.3.i = add nsw i32 %mul7.us.i.3.3.i, %add9.us.i.3.2.i
214 %niter335.nsub.3.i = add i32 %niter335.i, -4
215 %niter335.ncmp.3.i = icmp eq i32 %niter335.nsub.3.i, 0
216 br i1 %niter335.ncmp.3.i, label %exit, label %for.body
219 %arrayidx.out.i = getelementptr inbounds i32, i32* %res, i32 0
220 store i32 %add9.us.i.3361.i, i32* %arrayidx.out.i, align 4
221 %arrayidx.out.1.i = getelementptr inbounds i32, i32* %res, i32 1
222 store i32 %add9.us.i.1.3.i, i32* %arrayidx.out.1.i, align 4
223 %arrayidx.out.2.i = getelementptr inbounds i32, i32* %res, i32 2
224 store i32 %add9.us.i.2.3.i, i32* %arrayidx.out.2.i, align 4
225 %arrayidx.out.3.i = getelementptr inbounds i32, i32* %res, i32 3
226 store i32 %add9.us.i.3.3.i, i32* %arrayidx.out.3.i, align 4