1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -arm-parallel-dsp -dce -mtriple=armv7-a -S %s -o - | FileCheck %s
4 define i64 @sext_acc_1(ptr %a, ptr %b, i32 %acc) {
5 ; CHECK-LABEL: @sext_acc_1(
7 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 2
8 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[B:%.*]], align 2
9 ; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[ACC:%.*]] to i64
10 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP1]], i32 [[TMP3]], i64 [[TMP4]])
11 ; CHECK-NEXT: ret i64 [[TMP5]]
14 %ld.a.0 = load i16, ptr %a
15 %sext.a.0 = sext i16 %ld.a.0 to i32
16 %ld.b.0 = load i16, ptr %b
17 %sext.b.0 = sext i16 %ld.b.0 to i32
18 %mul.0 = mul i32 %sext.a.0, %sext.b.0
19 %addr.a.1 = getelementptr i16, ptr %a, i32 1
20 %addr.b.1 = getelementptr i16, ptr %b, i32 1
21 %ld.a.1 = load i16, ptr %addr.a.1
22 %sext.a.1 = sext i16 %ld.a.1 to i32
23 %ld.b.1 = load i16, ptr %addr.b.1
24 %sext.b.1 = sext i16 %ld.b.1 to i32
25 %mul.1 = mul i32 %sext.a.1, %sext.b.1
26 %sext.mul.0 = sext i32 %mul.0 to i64
27 %sext.mul.1 = sext i32 %mul.1 to i64
28 %add = add i64 %sext.mul.0, %sext.mul.1
29 %sext.acc = sext i32 %acc to i64
30 %res = add i64 %add, %sext.acc
34 define i64 @sext_acc_2(ptr %a, ptr %b, i32 %acc) {
35 ; CHECK-LABEL: @sext_acc_2(
37 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 2
38 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[B:%.*]], align 2
39 ; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2
40 ; CHECK-NEXT: [[ADDR_B_2:%.*]] = getelementptr i16, ptr [[B]], i32 2
41 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ADDR_A_2]], align 2
42 ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ADDR_B_2]], align 2
43 ; CHECK-NEXT: [[TMP8:%.*]] = sext i32 [[ACC:%.*]] to i64
44 ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP1]], i32 [[TMP3]], i64 [[TMP8]])
45 ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP5]], i32 [[TMP7]], i64 [[TMP9]])
46 ; CHECK-NEXT: ret i64 [[TMP10]]
49 %ld.a.0 = load i16, ptr %a
50 %sext.a.0 = sext i16 %ld.a.0 to i32
51 %ld.b.0 = load i16, ptr %b
52 %sext.b.0 = sext i16 %ld.b.0 to i32
53 %mul.0 = mul i32 %sext.a.0, %sext.b.0
54 %addr.a.1 = getelementptr i16, ptr %a, i32 1
55 %addr.b.1 = getelementptr i16, ptr %b, i32 1
56 %ld.a.1 = load i16, ptr %addr.a.1
57 %sext.a.1 = sext i16 %ld.a.1 to i32
58 %ld.b.1 = load i16, ptr %addr.b.1
59 %sext.b.1 = sext i16 %ld.b.1 to i32
60 %mul.1 = mul i32 %sext.a.1, %sext.b.1
61 %sext.mul.0 = sext i32 %mul.0 to i64
62 %sext.mul.1 = sext i32 %mul.1 to i64
63 %add = add i64 %sext.mul.0, %sext.mul.1
64 %sext.acc = sext i32 %acc to i64
65 %add.1 = add i64 %add, %sext.acc
66 %addr.a.2 = getelementptr i16, ptr %a, i32 2
67 %addr.b.2 = getelementptr i16, ptr %b, i32 2
68 %ld.a.2 = load i16, ptr %addr.a.2
69 %sext.a.2 = sext i16 %ld.a.2 to i32
70 %ld.b.2 = load i16, ptr %addr.b.2
71 %sext.b.2 = sext i16 %ld.b.2 to i32
72 %mul.2 = mul i32 %sext.a.2, %sext.b.2
73 %sext.mul.2 = sext i32 %mul.2 to i64
74 %addr.a.3 = getelementptr i16, ptr %a, i32 3
75 %addr.b.3 = getelementptr i16, ptr %b, i32 3
76 %ld.a.3 = load i16, ptr %addr.a.3
77 %sext.a.3 = sext i16 %ld.a.3 to i32
78 %ld.b.3 = load i16, ptr %addr.b.3
79 %sext.b.3 = sext i16 %ld.b.3 to i32
80 %mul.3 = mul i32 %sext.a.3, %sext.b.3
81 %sext.mul.3 = sext i32 %mul.3 to i64
82 %add.2 = add i64 %sext.mul.2, %sext.mul.3
83 %add.3 = add i64 %add.1, %add.2
87 define i64 @sext_acc_3(ptr %a, ptr %b, i32 %acc) {
88 ; CHECK-LABEL: @sext_acc_3(
90 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 2
91 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[B:%.*]], align 2
92 ; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2
93 ; CHECK-NEXT: [[ADDR_B_2:%.*]] = getelementptr i16, ptr [[B]], i32 2
94 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ADDR_A_2]], align 2
95 ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ADDR_B_2]], align 2
96 ; CHECK-NEXT: [[TMP8:%.*]] = sext i32 [[ACC:%.*]] to i64
97 ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP1]], i32 [[TMP3]], i64 [[TMP8]])
98 ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP5]], i32 [[TMP7]], i64 [[TMP9]])
99 ; CHECK-NEXT: ret i64 [[TMP10]]
102 %ld.a.0 = load i16, ptr %a
103 %sext.a.0 = sext i16 %ld.a.0 to i32
104 %ld.b.0 = load i16, ptr %b
105 %sext.b.0 = sext i16 %ld.b.0 to i32
106 %mul.0 = mul i32 %sext.a.0, %sext.b.0
107 %addr.a.1 = getelementptr i16, ptr %a, i32 1
108 %addr.b.1 = getelementptr i16, ptr %b, i32 1
109 %ld.a.1 = load i16, ptr %addr.a.1
110 %sext.a.1 = sext i16 %ld.a.1 to i32
111 %ld.b.1 = load i16, ptr %addr.b.1
112 %sext.b.1 = sext i16 %ld.b.1 to i32
113 %mul.1 = mul i32 %sext.a.1, %sext.b.1
114 %sext.mul.0 = sext i32 %mul.0 to i64
115 %sext.mul.1 = sext i32 %mul.1 to i64
116 %add = add i64 %sext.mul.0, %sext.mul.1
117 %addr.a.2 = getelementptr i16, ptr %a, i32 2
118 %addr.b.2 = getelementptr i16, ptr %b, i32 2
119 %ld.a.2 = load i16, ptr %addr.a.2
120 %sext.a.2 = sext i16 %ld.a.2 to i32
121 %ld.b.2 = load i16, ptr %addr.b.2
122 %sext.b.2 = sext i16 %ld.b.2 to i32
123 %mul.2 = mul i32 %sext.a.2, %sext.b.2
124 %sext.mul.2 = sext i32 %mul.2 to i64
125 %addr.a.3 = getelementptr i16, ptr %a, i32 3
126 %addr.b.3 = getelementptr i16, ptr %b, i32 3
127 %ld.a.3 = load i16, ptr %addr.a.3
128 %sext.a.3 = sext i16 %ld.a.3 to i32
129 %ld.b.3 = load i16, ptr %addr.b.3
130 %sext.b.3 = sext i16 %ld.b.3 to i32
131 %mul.3 = mul i32 %sext.a.3, %sext.b.3
132 %sext.mul.3 = sext i32 %mul.3 to i64
133 %add.1 = add i64 %sext.mul.2, %sext.mul.3
134 %add.2 = add i64 %add, %add.1
135 %sext.acc = sext i32 %acc to i64
136 %add.3 = add i64 %add.2, %sext.acc
140 define i64 @sext_acc_4(ptr %a, ptr %b, i32 %acc) {
141 ; CHECK-LABEL: @sext_acc_4(
143 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 2
144 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[B:%.*]], align 2
145 ; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2
146 ; CHECK-NEXT: [[ADDR_B_2:%.*]] = getelementptr i16, ptr [[B]], i32 2
147 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ADDR_A_2]], align 2
148 ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ADDR_B_2]], align 2
149 ; CHECK-NEXT: [[TMP8:%.*]] = sext i32 [[ACC:%.*]] to i64
150 ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP1]], i32 [[TMP3]], i64 [[TMP8]])
151 ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP5]], i32 [[TMP7]], i64 [[TMP9]])
152 ; CHECK-NEXT: ret i64 [[TMP10]]
155 %ld.a.0 = load i16, ptr %a
156 %sext.a.0 = sext i16 %ld.a.0 to i32
157 %ld.b.0 = load i16, ptr %b
158 %sext.b.0 = sext i16 %ld.b.0 to i32
159 %mul.0 = mul i32 %sext.a.0, %sext.b.0
160 %addr.a.1 = getelementptr i16, ptr %a, i32 1
161 %addr.b.1 = getelementptr i16, ptr %b, i32 1
162 %ld.a.1 = load i16, ptr %addr.a.1
163 %sext.a.1 = sext i16 %ld.a.1 to i32
164 %ld.b.1 = load i16, ptr %addr.b.1
165 %sext.b.1 = sext i16 %ld.b.1 to i32
166 %mul.1 = mul i32 %sext.a.1, %sext.b.1
167 %add = add i32 %mul.0, %mul.1
168 %sext.add = sext i32 %add to i64
169 %addr.a.2 = getelementptr i16, ptr %a, i32 2
170 %addr.b.2 = getelementptr i16, ptr %b, i32 2
171 %ld.a.2 = load i16, ptr %addr.a.2
172 %sext.a.2 = sext i16 %ld.a.2 to i32
173 %ld.b.2 = load i16, ptr %addr.b.2
174 %sext.b.2 = sext i16 %ld.b.2 to i32
175 %mul.2 = mul i32 %sext.a.2, %sext.b.2
176 %sext.mul.2 = sext i32 %mul.2 to i64
177 %addr.a.3 = getelementptr i16, ptr %a, i32 3
178 %addr.b.3 = getelementptr i16, ptr %b, i32 3
179 %ld.a.3 = load i16, ptr %addr.a.3
180 %sext.a.3 = sext i16 %ld.a.3 to i32
181 %ld.b.3 = load i16, ptr %addr.b.3
182 %sext.b.3 = sext i16 %ld.b.3 to i32
183 %mul.3 = mul i32 %sext.a.3, %sext.b.3
184 %sext.mul.3 = sext i32 %mul.3 to i64
185 %sext.acc = sext i32 %acc to i64
186 %add.1 = add i64 %sext.mul.2, %sext.add
187 %add.2 = add i64 %sext.add, %add.1
188 %add.3 = add i64 %add.2, %sext.mul.3
189 %add.4 = add i64 %add.3, %sext.acc