1 ; RUN: opt -arm-parallel-dsp -dce -mtriple=armv7-a -S %s -o - | FileCheck %s
3 ; CHECK-LABEL: sext_acc_1
4 ; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
5 ; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]]
6 ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
7 ; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]]
8 ; CHECK: [[ACC:%[^ ]+]] = sext i32 %acc to i64
9 ; CHECK: call i64 @llvm.arm.smlald(i32 [[A]], i32 [[B]], i64 [[ACC]])
10 define i64 @sext_acc_1(i16* %a, i16* %b, i32 %acc) {
12 %ld.a.0 = load i16, i16* %a
13 %sext.a.0 = sext i16 %ld.a.0 to i32
14 %ld.b.0 = load i16, i16* %b
15 %sext.b.0 = sext i16 %ld.b.0 to i32
16 %mul.0 = mul i32 %sext.a.0, %sext.b.0
17 %addr.a.1 = getelementptr i16, i16* %a, i32 1
18 %addr.b.1 = getelementptr i16, i16* %b, i32 1
19 %ld.a.1 = load i16, i16* %addr.a.1
20 %sext.a.1 = sext i16 %ld.a.1 to i32
21 %ld.b.1 = load i16, i16* %addr.b.1
22 %sext.b.1 = sext i16 %ld.b.1 to i32
23 %mul.1 = mul i32 %sext.a.1, %sext.b.1
24 %sext.mul.0 = sext i32 %mul.0 to i64
25 %sext.mul.1 = sext i32 %mul.1 to i64
26 %add = add i64 %sext.mul.0, %sext.mul.1
27 %sext.acc = sext i32 %acc to i64
28 %res = add i64 %add, %sext.acc
32 ; CHECK-LABEL: sext_acc_2
33 ; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
34 ; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]]
35 ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
36 ; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]]
37 ; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* %addr.a.2 to i32*
38 ; CHECK: [[A_2:%[^ ]+]] = load i32, i32* %4
39 ; CHECK: [[CAST_B_2:%[^ ]+]] = bitcast i16* %addr.b.2 to i32*
40 ; CHECK: [[B_2:%[^ ]+]] = load i32, i32* %6
41 ; CHECK: [[ACC:%[^ ]+]] = sext i32 %acc to i64
42 ; CHECK: [[SMLALD:%[^ ]+]] = call i64 @llvm.arm.smlald(i32 [[A]], i32 [[B]], i64 [[ACC]])
43 ; CHECK: call i64 @llvm.arm.smlald(i32 [[A_2]], i32 [[B_2]], i64 [[SMLALD]])
44 define i64 @sext_acc_2(i16* %a, i16* %b, i32 %acc) {
46 %ld.a.0 = load i16, i16* %a
47 %sext.a.0 = sext i16 %ld.a.0 to i32
48 %ld.b.0 = load i16, i16* %b
49 %sext.b.0 = sext i16 %ld.b.0 to i32
50 %mul.0 = mul i32 %sext.a.0, %sext.b.0
51 %addr.a.1 = getelementptr i16, i16* %a, i32 1
52 %addr.b.1 = getelementptr i16, i16* %b, i32 1
53 %ld.a.1 = load i16, i16* %addr.a.1
54 %sext.a.1 = sext i16 %ld.a.1 to i32
55 %ld.b.1 = load i16, i16* %addr.b.1
56 %sext.b.1 = sext i16 %ld.b.1 to i32
57 %mul.1 = mul i32 %sext.a.1, %sext.b.1
58 %sext.mul.0 = sext i32 %mul.0 to i64
59 %sext.mul.1 = sext i32 %mul.1 to i64
60 %add = add i64 %sext.mul.0, %sext.mul.1
61 %sext.acc = sext i32 %acc to i64
62 %add.1 = add i64 %add, %sext.acc
63 %addr.a.2 = getelementptr i16, i16* %a, i32 2
64 %addr.b.2 = getelementptr i16, i16* %b, i32 2
65 %ld.a.2 = load i16, i16* %addr.a.2
66 %sext.a.2 = sext i16 %ld.a.2 to i32
67 %ld.b.2 = load i16, i16* %addr.b.2
68 %sext.b.2 = sext i16 %ld.b.2 to i32
69 %mul.2 = mul i32 %sext.a.2, %sext.b.2
70 %sext.mul.2 = sext i32 %mul.2 to i64
71 %addr.a.3 = getelementptr i16, i16* %a, i32 3
72 %addr.b.3 = getelementptr i16, i16* %b, i32 3
73 %ld.a.3 = load i16, i16* %addr.a.3
74 %sext.a.3 = sext i16 %ld.a.3 to i32
75 %ld.b.3 = load i16, i16* %addr.b.3
76 %sext.b.3 = sext i16 %ld.b.3 to i32
77 %mul.3 = mul i32 %sext.a.3, %sext.b.3
78 %sext.mul.3 = sext i32 %mul.3 to i64
79 %add.2 = add i64 %sext.mul.2, %sext.mul.3
80 %add.3 = add i64 %add.1, %add.2
84 ; CHECK-LABEL: sext_acc_3
85 ; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
86 ; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]]
87 ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
88 ; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]]
89 ; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* %addr.a.2 to i32*
90 ; CHECK: [[A_2:%[^ ]+]] = load i32, i32* %4
91 ; CHECK: [[CAST_B_2:%[^ ]+]] = bitcast i16* %addr.b.2 to i32*
92 ; CHECK: [[B_2:%[^ ]+]] = load i32, i32* %6
93 ; CHECK: [[ACC:%[^ ]+]] = sext i32 %acc to i64
94 ; CHECK: [[SMLALD:%[^ ]+]] = call i64 @llvm.arm.smlald(i32 [[A]], i32 [[B]], i64 [[ACC]])
95 ; CHECK: call i64 @llvm.arm.smlald(i32 [[A_2]], i32 [[B_2]], i64 [[SMLALD]])
96 define i64 @sext_acc_3(i16* %a, i16* %b, i32 %acc) {
98 %ld.a.0 = load i16, i16* %a
99 %sext.a.0 = sext i16 %ld.a.0 to i32
100 %ld.b.0 = load i16, i16* %b
101 %sext.b.0 = sext i16 %ld.b.0 to i32
102 %mul.0 = mul i32 %sext.a.0, %sext.b.0
103 %addr.a.1 = getelementptr i16, i16* %a, i32 1
104 %addr.b.1 = getelementptr i16, i16* %b, i32 1
105 %ld.a.1 = load i16, i16* %addr.a.1
106 %sext.a.1 = sext i16 %ld.a.1 to i32
107 %ld.b.1 = load i16, i16* %addr.b.1
108 %sext.b.1 = sext i16 %ld.b.1 to i32
109 %mul.1 = mul i32 %sext.a.1, %sext.b.1
110 %sext.mul.0 = sext i32 %mul.0 to i64
111 %sext.mul.1 = sext i32 %mul.1 to i64
112 %add = add i64 %sext.mul.0, %sext.mul.1
113 %addr.a.2 = getelementptr i16, i16* %a, i32 2
114 %addr.b.2 = getelementptr i16, i16* %b, i32 2
115 %ld.a.2 = load i16, i16* %addr.a.2
116 %sext.a.2 = sext i16 %ld.a.2 to i32
117 %ld.b.2 = load i16, i16* %addr.b.2
118 %sext.b.2 = sext i16 %ld.b.2 to i32
119 %mul.2 = mul i32 %sext.a.2, %sext.b.2
120 %sext.mul.2 = sext i32 %mul.2 to i64
121 %addr.a.3 = getelementptr i16, i16* %a, i32 3
122 %addr.b.3 = getelementptr i16, i16* %b, i32 3
123 %ld.a.3 = load i16, i16* %addr.a.3
124 %sext.a.3 = sext i16 %ld.a.3 to i32
125 %ld.b.3 = load i16, i16* %addr.b.3
126 %sext.b.3 = sext i16 %ld.b.3 to i32
127 %mul.3 = mul i32 %sext.a.3, %sext.b.3
128 %sext.mul.3 = sext i32 %mul.3 to i64
129 %add.1 = add i64 %sext.mul.2, %sext.mul.3
130 %add.2 = add i64 %add, %add.1
131 %sext.acc = sext i32 %acc to i64
132 %add.3 = add i64 %add.2, %sext.acc
136 ; CHECK-LABEL: sext_acc_4
137 ; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
138 ; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]]
139 ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
140 ; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]]
141 ; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* %addr.a.2 to i32*
142 ; CHECK: [[A_2:%[^ ]+]] = load i32, i32* %4
143 ; CHECK: [[CAST_B_2:%[^ ]+]] = bitcast i16* %addr.b.2 to i32*
144 ; CHECK: [[B_2:%[^ ]+]] = load i32, i32* %6
145 ; CHECK: [[ACC:%[^ ]+]] = sext i32 %acc to i64
146 ; CHECK: [[SMLALD:%[^ ]+]] = call i64 @llvm.arm.smlald(i32 [[A]], i32 [[B]], i64 [[ACC]])
147 ; CHECK: call i64 @llvm.arm.smlald(i32 [[A_2]], i32 [[B_2]], i64 [[SMLALD]])
148 define i64 @sext_acc_4(i16* %a, i16* %b, i32 %acc) {
150 %ld.a.0 = load i16, i16* %a
151 %sext.a.0 = sext i16 %ld.a.0 to i32
152 %ld.b.0 = load i16, i16* %b
153 %sext.b.0 = sext i16 %ld.b.0 to i32
154 %mul.0 = mul i32 %sext.a.0, %sext.b.0
155 %addr.a.1 = getelementptr i16, i16* %a, i32 1
156 %addr.b.1 = getelementptr i16, i16* %b, i32 1
157 %ld.a.1 = load i16, i16* %addr.a.1
158 %sext.a.1 = sext i16 %ld.a.1 to i32
159 %ld.b.1 = load i16, i16* %addr.b.1
160 %sext.b.1 = sext i16 %ld.b.1 to i32
161 %mul.1 = mul i32 %sext.a.1, %sext.b.1
162 %add = add i32 %mul.0, %mul.1
163 %sext.add = sext i32 %add to i64
164 %addr.a.2 = getelementptr i16, i16* %a, i32 2
165 %addr.b.2 = getelementptr i16, i16* %b, i32 2
166 %ld.a.2 = load i16, i16* %addr.a.2
167 %sext.a.2 = sext i16 %ld.a.2 to i32
168 %ld.b.2 = load i16, i16* %addr.b.2
169 %sext.b.2 = sext i16 %ld.b.2 to i32
170 %mul.2 = mul i32 %sext.a.2, %sext.b.2
171 %sext.mul.2 = sext i32 %mul.2 to i64
172 %addr.a.3 = getelementptr i16, i16* %a, i32 3
173 %addr.b.3 = getelementptr i16, i16* %b, i32 3
174 %ld.a.3 = load i16, i16* %addr.a.3
175 %sext.a.3 = sext i16 %ld.a.3 to i32
176 %ld.b.3 = load i16, i16* %addr.b.3
177 %sext.b.3 = sext i16 %ld.b.3 to i32
178 %mul.3 = mul i32 %sext.a.3, %sext.b.3
179 %sext.mul.3 = sext i32 %mul.3 to i64
180 %sext.acc = sext i32 %acc to i64
181 %add.1 = add i64 %sext.mul.2, %sext.add
182 %add.2 = add i64 %sext.add, %add.1
183 %add.3 = add i64 %add.2, %sext.mul.3
184 %add.4 = add i64 %add.3, %sext.acc