1 ; RUN: opt -arm-parallel-dsp -dce -mtriple=armv7-a -S %s -o - | FileCheck %s
3 ; CHECK-LABEL: single_block
4 ; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
5 ; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]]
6 ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
7 ; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]]
8 ; CHECK: call i32 @llvm.arm.smlad(i32 [[A]], i32 [[B]], i32 %acc)
9 define i32 @single_block(i16* %a, i16* %b, i32 %acc) {
11 %ld.a.0 = load i16, i16* %a
12 %sext.a.0 = sext i16 %ld.a.0 to i32
13 %ld.b.0 = load i16, i16* %b
14 %sext.b.0 = sext i16 %ld.b.0 to i32
15 %mul.0 = mul i32 %sext.a.0, %sext.b.0
16 %addr.a.1 = getelementptr i16, i16* %a, i32 1
17 %addr.b.1 = getelementptr i16, i16* %b, i32 1
18 %ld.a.1 = load i16, i16* %addr.a.1
19 %sext.a.1 = sext i16 %ld.a.1 to i32
20 %ld.b.1 = load i16, i16* %addr.b.1
21 %sext.b.1 = sext i16 %ld.b.1 to i32
22 %mul.1 = mul i32 %sext.a.1, %sext.b.1
23 %add = add i32 %mul.0, %mul.1
24 %res = add i32 %add, %acc
28 ; CHECK-LABEL: single_block_64
29 ; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
30 ; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]]
31 ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
32 ; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]]
33 ; CHECK: call i64 @llvm.arm.smlald(i32 [[A]], i32 [[B]], i64 %acc)
34 define i64 @single_block_64(i16* %a, i16* %b, i64 %acc) {
36 %ld.a.0 = load i16, i16* %a
37 %sext.a.0 = sext i16 %ld.a.0 to i32
38 %ld.b.0 = load i16, i16* %b
39 %sext.b.0 = sext i16 %ld.b.0 to i32
40 %mul.0 = mul i32 %sext.a.0, %sext.b.0
41 %addr.a.1 = getelementptr i16, i16* %a, i32 1
42 %addr.b.1 = getelementptr i16, i16* %b, i32 1
43 %ld.a.1 = load i16, i16* %addr.a.1
44 %sext.a.1 = sext i16 %ld.a.1 to i32
45 %ld.b.1 = load i16, i16* %addr.b.1
46 %sext.b.1 = sext i16 %ld.b.1 to i32
47 %mul.1 = mul i32 %sext.a.1, %sext.b.1
48 %sext.mul.0 = sext i32 %mul.0 to i64
49 %sext.mul.1 = sext i32 %mul.1 to i64
50 %add = add i64 %sext.mul.0, %sext.mul.1
51 %res = add i64 %add, %acc
55 ; CHECK-LABEL: multi_block
56 ; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
57 ; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]]
58 ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
59 ; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]]
60 ; CHECK: call i32 @llvm.arm.smlad(i32 [[A]], i32 [[B]], i32 0)
61 define i32 @multi_block(i16* %a, i16* %b, i32 %acc) {
63 %ld.a.0 = load i16, i16* %a
64 %sext.a.0 = sext i16 %ld.a.0 to i32
65 %ld.b.0 = load i16, i16* %b
66 %sext.b.0 = sext i16 %ld.b.0 to i32
67 %mul.0 = mul i32 %sext.a.0, %sext.b.0
68 %addr.a.1 = getelementptr i16, i16* %a, i32 1
69 %addr.b.1 = getelementptr i16, i16* %b, i32 1
70 %ld.a.1 = load i16, i16* %addr.a.1
71 %sext.a.1 = sext i16 %ld.a.1 to i32
72 %ld.b.1 = load i16, i16* %addr.b.1
73 %sext.b.1 = sext i16 %ld.b.1 to i32
74 %mul.1 = mul i32 %sext.a.1, %sext.b.1
75 %add = add i32 %mul.0, %mul.1
79 %res = add i32 %add, %acc
83 ; CHECK-LABEL: multi_block_64
84 ; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
85 ; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]]
86 ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
87 ; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]]
88 ; CHECK: call i64 @llvm.arm.smlald(i32 [[A]], i32 [[B]], i64 0)
89 define i64 @multi_block_64(i16* %a, i16* %b, i64 %acc) {
91 %ld.a.0 = load i16, i16* %a
92 %sext.a.0 = sext i16 %ld.a.0 to i32
93 %ld.b.0 = load i16, i16* %b
94 %sext.b.0 = sext i16 %ld.b.0 to i32
95 %mul.0 = mul i32 %sext.a.0, %sext.b.0
96 %addr.a.1 = getelementptr i16, i16* %a, i32 1
97 %addr.b.1 = getelementptr i16, i16* %b, i32 1
98 %ld.a.1 = load i16, i16* %addr.a.1
99 %sext.a.1 = sext i16 %ld.a.1 to i32
100 %ld.b.1 = load i16, i16* %addr.b.1
101 %sext.b.1 = sext i16 %ld.b.1 to i32
102 %mul.1 = mul i32 %sext.a.1, %sext.b.1
103 %sext.mul.0 = sext i32 %mul.0 to i64
104 %sext.mul.1 = sext i32 %mul.1 to i64
105 %add = add i64 %sext.mul.0, %sext.mul.1
109 %res = add i64 %add, %acc
113 ; CHECK-LABEL: multi_block_1
114 ; CHECK-NOT: call i32 @llvm.arm.smlad
115 define i32 @multi_block_1(i16* %a, i16* %b, i32 %acc) {
117 %ld.a.0 = load i16, i16* %a
118 %sext.a.0 = sext i16 %ld.a.0 to i32
119 %ld.b.0 = load i16, i16* %b
120 %sext.b.0 = sext i16 %ld.b.0 to i32
121 %mul.0 = mul i32 %sext.a.0, %sext.b.0
125 %addr.a.1 = getelementptr i16, i16* %a, i32 1
126 %addr.b.1 = getelementptr i16, i16* %b, i32 1
127 %ld.a.1 = load i16, i16* %addr.a.1
128 %sext.a.1 = sext i16 %ld.a.1 to i32
129 %ld.b.1 = load i16, i16* %addr.b.1
130 %sext.b.1 = sext i16 %ld.b.1 to i32
131 %mul.1 = mul i32 %sext.a.1, %sext.b.1
132 %add = add i32 %mul.0, %mul.1
133 %res = add i32 %add, %acc