1 ; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp < %s -codegenprepare -S | FileCheck -check-prefix=CHECK %s
3 define void @sink_add_mul(ptr %s1, i32 %x, ptr %d, i32 %n) {
4 ; CHECK-LABEL: @sink_add_mul(
6 ; CHECK-NOT: %{{.*}} = insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
7 ; CHECK-NOT: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer
9 ; CHECK: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
10 ; CHECK: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer
13 %cmp6 = icmp sgt i32 %n, 0
14 br i1 %cmp6, label %vector.ph, label %for.cond.cleanup
16 vector.ph: ; preds = %for.body.preheader
17 %n.vec = and i32 %n, -4
18 %broadcast.splatinsert8 = insertelement <4 x i32> undef, i32 %x, i32 0
19 %broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer
22 vector.body: ; preds = %vector.body, %vector.ph
23 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
24 %0 = getelementptr inbounds i32, ptr %s1, i32 %index
25 %wide.load = load <4 x i32>, ptr %0, align 4
26 %1 = mul nsw <4 x i32> %wide.load, %broadcast.splat9
27 %2 = getelementptr inbounds i32, ptr %d, i32 %index
28 %wide.load10 = load <4 x i32>, ptr %2, align 4
29 %3 = add nsw <4 x i32> %wide.load10, %1
30 store <4 x i32> %3, ptr %2, align 4
31 %index.next = add i32 %index, 4
32 %4 = icmp eq i32 %index.next, %n.vec
33 br i1 %4, label %for.cond.cleanup, label %vector.body
35 for.cond.cleanup: ; preds = %for.body, %middle.block, %entry
39 define void @sink_add_mul_multiple(ptr %s1, ptr %s2, i32 %x, ptr %d, ptr %d2, i32 %n) {
40 ; CHECK-LABEL: @sink_add_mul_multiple(
42 ; CHECK-NOT: %{{.*}} = insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
43 ; CHECK-NOT: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer
45 ; CHECK: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 %x, i32 0
46 ; CHECK: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer
47 ; CHECK: mul nsw <4 x i32> %wide.load, [[TMP3]]
48 ; CHECK: [[TMP2b:%.*]] = insertelement <4 x i32> undef, i32 %x, i32 0
49 ; CHECK: [[TMP3b:%.*]] = shufflevector <4 x i32> [[TMP2b]], <4 x i32> undef, <4 x i32> zeroinitializer
50 ; CHECK: mul nsw <4 x i32> %wide.load18, [[TMP3b]]
53 %cmp13 = icmp sgt i32 %n, 0
54 br i1 %cmp13, label %vector.ph, label %for.cond.cleanup
56 vector.ph: ; preds = %for.body.preheader
57 %n.vec = and i32 %n, -4
58 %broadcast.splatinsert15 = insertelement <4 x i32> undef, i32 %x, i32 0
59 %broadcast.splat16 = shufflevector <4 x i32> %broadcast.splatinsert15, <4 x i32> undef, <4 x i32> zeroinitializer
62 vector.body: ; preds = %vector.body, %vector.ph
63 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
64 %0 = getelementptr inbounds i32, ptr %s1, i32 %index
65 %wide.load = load <4 x i32>, ptr %0, align 4
66 %1 = mul nsw <4 x i32> %wide.load, %broadcast.splat16
67 %2 = getelementptr inbounds i32, ptr %d, i32 %index
68 %wide.load17 = load <4 x i32>, ptr %2, align 4
69 %3 = add nsw <4 x i32> %wide.load17, %1
70 store <4 x i32> %3, ptr %2, align 4
71 %4 = getelementptr inbounds i32, ptr %s2, i32 %index
72 %wide.load18 = load <4 x i32>, ptr %4, align 4
73 %5 = mul nsw <4 x i32> %wide.load18, %broadcast.splat16
74 %6 = getelementptr inbounds i32, ptr %d2, i32 %index
75 %wide.load19 = load <4 x i32>, ptr %6, align 4
76 %7 = add nsw <4 x i32> %wide.load19, %5
77 store <4 x i32> %7, ptr %6, align 4
78 %index.next = add i32 %index, 4
79 %8 = icmp eq i32 %index.next, %n.vec
80 br i1 %8, label %for.cond.cleanup, label %vector.body
82 for.cond.cleanup: ; preds = %for.body, %middle.block, %entry
87 define void @sink_add_sub_unsinkable(ptr %s1, ptr %s2, i32 %x, ptr %d, ptr %d2, i32 %n) {
88 ; CHECK-LABEL: @sink_add_sub_unsinkable(
90 ; CHECK-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[N:%.*]], 0
91 ; CHECK-NEXT: br i1 [[CMP13]], label [[VECTOR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]]
93 ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N]], -4
94 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
95 ; CHECK-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT15]], <4 x i32> undef, <4 x i32> zeroinitializer
96 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
99 %cmp13 = icmp sgt i32 %n, 0
100 br i1 %cmp13, label %vector.ph, label %for.cond.cleanup
102 vector.ph: ; preds = %for.body.preheader
103 %n.vec = and i32 %n, -4
104 %broadcast.splatinsert15 = insertelement <4 x i32> undef, i32 %x, i32 0
105 %broadcast.splat16 = shufflevector <4 x i32> %broadcast.splatinsert15, <4 x i32> undef, <4 x i32> zeroinitializer
106 br label %vector.body
108 vector.body: ; preds = %vector.body, %vector.ph
109 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
110 %0 = getelementptr inbounds i32, ptr %s1, i32 %index
111 %wide.load = load <4 x i32>, ptr %0, align 4
112 %1 = mul nsw <4 x i32> %wide.load, %broadcast.splat16
113 %2 = getelementptr inbounds i32, ptr %d, i32 %index
114 %wide.load17 = load <4 x i32>, ptr %2, align 4
115 %3 = add nsw <4 x i32> %wide.load17, %1
116 store <4 x i32> %3, ptr %2, align 4
117 %4 = getelementptr inbounds i32, ptr %s2, i32 %index
118 %wide.load18 = load <4 x i32>, ptr %4, align 4
119 %5 = sub nsw <4 x i32> %broadcast.splat16, %wide.load18
120 %6 = getelementptr inbounds i32, ptr %d2, i32 %index
121 %wide.load19 = load <4 x i32>, ptr %6, align 4
122 %7 = add nsw <4 x i32> %wide.load19, %5
123 store <4 x i32> %7, ptr %6, align 4
124 %index.next = add i32 %index, 4
125 %8 = icmp eq i32 %index.next, %n.vec
126 br i1 %8, label %for.cond.cleanup, label %vector.body
128 for.cond.cleanup: ; preds = %for.body, %middle.block, %entry
132 define void @sink_sub(ptr %s1, i32 %x, ptr %d, i32 %n) {
133 ; CHECK-LABEL: @sink_sub(
135 ; CHECK-NOT: %{{.*}} = insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
136 ; CHECK-NOT: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer
137 ; CHECK: vector.body:
138 ; CHECK: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
139 ; CHECK: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer
142 %cmp6 = icmp sgt i32 %n, 0
143 br i1 %cmp6, label %vector.ph, label %for.cond.cleanup
145 vector.ph: ; preds = %for.body.preheader
146 %n.vec = and i32 %n, -4
147 %broadcast.splatinsert8 = insertelement <4 x i32> undef, i32 %x, i32 0
148 %broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer
149 br label %vector.body
151 vector.body: ; preds = %vector.body, %vector.ph
152 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
153 %0 = getelementptr inbounds i32, ptr %s1, i32 %index
154 %wide.load = load <4 x i32>, ptr %0, align 4
155 %1 = sub nsw <4 x i32> %wide.load, %broadcast.splat9
156 %2 = getelementptr inbounds i32, ptr %d, i32 %index
157 store <4 x i32> %1, ptr %2, align 4
158 %index.next = add i32 %index, 4
159 %3 = icmp eq i32 %index.next, %n.vec
160 br i1 %3, label %for.cond.cleanup, label %vector.body
162 for.cond.cleanup: ; preds = %for.body, %middle.block, %entry
166 define void @sink_sub_unsinkable(ptr %s1, i32 %x, ptr %d, i32 %n) {
168 ; CHECK-LABEL: @sink_sub_unsinkable(
170 ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N]], -4
171 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
172 ; CHECK-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT15]], <4 x i32> undef, <4 x i32> zeroinitializer
173 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
174 ; CHECK: vector.body:
175 ; CHECK-NOT: %{{.*}} = insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
176 ; CHECK-NOT: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer
178 %cmp6 = icmp sgt i32 %n, 0
179 br i1 %cmp6, label %vector.ph, label %for.cond.cleanup
181 vector.ph: ; preds = %for.body.preheader
182 %n.vec = and i32 %n, -4
183 %broadcast.splatinsert8 = insertelement <4 x i32> undef, i32 %x, i32 0
184 %broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer
185 br label %vector.body
187 vector.body: ; preds = %vector.body, %vector.ph
188 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
189 %0 = getelementptr inbounds i32, ptr %s1, i32 %index
190 %wide.load = load <4 x i32>, ptr %0, align 4
191 %1 = sub nsw <4 x i32> %broadcast.splat9, %wide.load
192 %2 = getelementptr inbounds i32, ptr %d, i32 %index
193 store <4 x i32> %1, ptr %2, align 4
194 %index.next = add i32 %index, 4
195 %3 = icmp eq i32 %index.next, %n.vec
196 br i1 %3, label %for.cond.cleanup, label %vector.body
198 for.cond.cleanup: ; preds = %for.body, %middle.block, %entry