1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -o - -S %s | FileCheck %s
4 ; Test case with a large number of pointer groups to check for memory
5 ; conflicts, but with many redundant checks that can be simplified.
6 define void @test_large_number_of_group(ptr %dst, i64 %off, i64 %N) {
7 ; CHECK-LABEL: @test_large_number_of_group(
9 ; CHECK-NEXT: [[OFF_MUL_2:%.*]] = shl i64 [[OFF:%.*]], 1
10 ; CHECK-NEXT: [[OFF_MUL_3:%.*]] = mul i64 [[OFF]], 3
11 ; CHECK-NEXT: [[OFF_MUL_4:%.*]] = shl i64 [[OFF]], 2
12 ; CHECK-NEXT: [[OFF_MUL_5:%.*]] = mul i64 [[OFF]], 5
13 ; CHECK-NEXT: [[OFF_MUL_6:%.*]] = mul i64 [[OFF]], 6
14 ; CHECK-NEXT: [[OFF_MUL_7:%.*]] = mul i64 [[OFF]], 7
15 ; CHECK-NEXT: [[OFF_MUL_8:%.*]] = shl i64 [[OFF]], 3
16 ; CHECK-NEXT: [[OFF_MUL_9:%.*]] = mul i64 [[OFF]], 9
17 ; CHECK-NEXT: [[OFF_MUL_10:%.*]] = mul i64 [[OFF]], 10
18 ; CHECK-NEXT: [[OFF_MUL_11:%.*]] = mul i64 [[OFF]], 11
19 ; CHECK-NEXT: [[OFF_MUL_12:%.*]] = mul i64 [[OFF]], 12
20 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
21 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
22 ; CHECK: vector.memcheck:
23 ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
24 ; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[OFF]], 4
25 ; CHECK-NEXT: [[DIFF_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 32
26 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK1]]
27 ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[OFF]], 24
28 ; CHECK-NEXT: [[DIFF_CHECK2:%.*]] = icmp ult i64 [[TMP1]], 32
29 ; CHECK-NEXT: [[CONFLICT_RDX3:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK2]]
30 ; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[OFF]], 5
31 ; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP2]], 32
32 ; CHECK-NEXT: [[CONFLICT_RDX5:%.*]] = or i1 [[CONFLICT_RDX3]], [[DIFF_CHECK4]]
33 ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[OFF]], 40
34 ; CHECK-NEXT: [[DIFF_CHECK6:%.*]] = icmp ult i64 [[TMP3]], 32
35 ; CHECK-NEXT: [[CONFLICT_RDX7:%.*]] = or i1 [[CONFLICT_RDX5]], [[DIFF_CHECK6]]
36 ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[OFF]], 48
37 ; CHECK-NEXT: [[DIFF_CHECK8:%.*]] = icmp ult i64 [[TMP4]], 32
38 ; CHECK-NEXT: [[CONFLICT_RDX9:%.*]] = or i1 [[CONFLICT_RDX7]], [[DIFF_CHECK8]]
39 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[OFF]], 56
40 ; CHECK-NEXT: [[DIFF_CHECK10:%.*]] = icmp ult i64 [[TMP5]], 32
41 ; CHECK-NEXT: [[CONFLICT_RDX11:%.*]] = or i1 [[CONFLICT_RDX9]], [[DIFF_CHECK10]]
42 ; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[OFF]], 6
43 ; CHECK-NEXT: [[DIFF_CHECK12:%.*]] = icmp ult i64 [[TMP6]], 32
44 ; CHECK-NEXT: [[CONFLICT_RDX13:%.*]] = or i1 [[CONFLICT_RDX11]], [[DIFF_CHECK12]]
45 ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[OFF]], 72
46 ; CHECK-NEXT: [[DIFF_CHECK14:%.*]] = icmp ult i64 [[TMP7]], 32
47 ; CHECK-NEXT: [[CONFLICT_RDX15:%.*]] = or i1 [[CONFLICT_RDX13]], [[DIFF_CHECK14]]
48 ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[OFF]], 80
49 ; CHECK-NEXT: [[DIFF_CHECK16:%.*]] = icmp ult i64 [[TMP8]], 32
50 ; CHECK-NEXT: [[CONFLICT_RDX17:%.*]] = or i1 [[CONFLICT_RDX15]], [[DIFF_CHECK16]]
51 ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[OFF]], 88
52 ; CHECK-NEXT: [[DIFF_CHECK18:%.*]] = icmp ult i64 [[TMP9]], 32
53 ; CHECK-NEXT: [[CONFLICT_RDX19:%.*]] = or i1 [[CONFLICT_RDX17]], [[DIFF_CHECK18]]
54 ; CHECK-NEXT: br i1 [[CONFLICT_RDX19]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
56 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
57 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
58 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
60 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
61 ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0
62 ; CHECK-NEXT: [[TMP11:%.*]] = add nsw i64 [[TMP10]], -5
63 ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], [[OFF]]
64 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[DST:%.*]], i64 [[TMP12]]
65 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[TMP13]], i32 0
66 ; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP14]], align 8
67 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP11]], [[OFF_MUL_2]]
68 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP15]]
69 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP16]], i32 0
70 ; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP17]], align 8
71 ; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP11]], [[OFF_MUL_3]]
72 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP18]]
73 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[TMP19]], i32 0
74 ; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP20]], align 8
75 ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP11]], [[OFF_MUL_4]]
76 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP21]]
77 ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr double, ptr [[TMP22]], i32 0
78 ; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP23]], align 8
79 ; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[TMP11]], [[OFF_MUL_5]]
80 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP24]]
81 ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP25]], i32 0
82 ; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP26]], align 8
83 ; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[TMP11]], [[OFF_MUL_6]]
84 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP27]]
85 ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP28]], i32 0
86 ; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP29]], align 8
87 ; CHECK-NEXT: [[TMP30:%.*]] = add i64 [[TMP11]], [[OFF_MUL_7]]
88 ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP30]]
89 ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[TMP31]], i32 0
90 ; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP32]], align 8
91 ; CHECK-NEXT: [[TMP33:%.*]] = add i64 [[TMP11]], [[OFF_MUL_8]]
92 ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP33]]
93 ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP34]], i32 0
94 ; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP35]], align 8
95 ; CHECK-NEXT: [[TMP36:%.*]] = add i64 [[TMP11]], [[OFF_MUL_9]]
96 ; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP36]]
97 ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP37]], i32 0
98 ; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP38]], align 8
99 ; CHECK-NEXT: [[TMP39:%.*]] = add i64 [[TMP11]], [[OFF_MUL_10]]
100 ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP39]]
101 ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr double, ptr [[TMP40]], i32 0
102 ; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP41]], align 8
103 ; CHECK-NEXT: [[TMP42:%.*]] = add i64 [[TMP11]], [[OFF_MUL_11]]
104 ; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP42]]
105 ; CHECK-NEXT: [[TMP44:%.*]] = getelementptr double, ptr [[TMP43]], i32 0
106 ; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP44]], align 8
107 ; CHECK-NEXT: [[TMP45:%.*]] = add i64 [[TMP11]], [[OFF_MUL_12]]
108 ; CHECK-NEXT: [[TMP46:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP45]]
109 ; CHECK-NEXT: [[TMP47:%.*]] = getelementptr double, ptr [[TMP46]], i32 0
110 ; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP47]], align 8
111 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
112 ; CHECK-NEXT: [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
113 ; CHECK-NEXT: br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
114 ; CHECK: middle.block:
115 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
116 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
118 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
119 ; CHECK-NEXT: br label [[LOOP:%.*]]
121 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
122 ; CHECK-NEXT: [[IV_SUB_5:%.*]] = add nsw i64 [[IV]], -5
123 ; CHECK-NEXT: [[IDX_1:%.*]] = add i64 [[IV_SUB_5]], [[OFF]]
124 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IDX_1]]
125 ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_1]], align 8
126 ; CHECK-NEXT: [[IDX_2:%.*]] = add i64 [[IV_SUB_5]], [[OFF_MUL_2]]
127 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IDX_2]]
128 ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_2]], align 8
129 ; CHECK-NEXT: [[IDX_3:%.*]] = add i64 [[IV_SUB_5]], [[OFF_MUL_3]]
130 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IDX_3]]
131 ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_3]], align 8
132 ; CHECK-NEXT: [[IDX_4:%.*]] = add i64 [[IV_SUB_5]], [[OFF_MUL_4]]
133 ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IDX_4]]
134 ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_4]], align 8
135 ; CHECK-NEXT: [[IDX_5:%.*]] = add i64 [[IV_SUB_5]], [[OFF_MUL_5]]
136 ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IDX_5]]
137 ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_5]], align 8
138 ; CHECK-NEXT: [[IDX_6:%.*]] = add i64 [[IV_SUB_5]], [[OFF_MUL_6]]
139 ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IDX_6]]
140 ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_6]], align 8
141 ; CHECK-NEXT: [[IDX_7:%.*]] = add i64 [[IV_SUB_5]], [[OFF_MUL_7]]
142 ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IDX_7]]
143 ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_7]], align 8
144 ; CHECK-NEXT: [[IDX_8:%.*]] = add i64 [[IV_SUB_5]], [[OFF_MUL_8]]
145 ; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IDX_8]]
146 ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_8]], align 8
147 ; CHECK-NEXT: [[IDX_9:%.*]] = add i64 [[IV_SUB_5]], [[OFF_MUL_9]]
148 ; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IDX_9]]
149 ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_9]], align 8
150 ; CHECK-NEXT: [[IDX_10:%.*]] = add i64 [[IV_SUB_5]], [[OFF_MUL_10]]
151 ; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IDX_10]]
152 ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_10]], align 8
153 ; CHECK-NEXT: [[IDX_11:%.*]] = add i64 [[IV_SUB_5]], [[OFF_MUL_11]]
154 ; CHECK-NEXT: [[GEP_11:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IDX_11]]
155 ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_11]], align 8
156 ; CHECK-NEXT: [[IDX_12:%.*]] = add i64 [[IV_SUB_5]], [[OFF_MUL_12]]
157 ; CHECK-NEXT: [[GEP_12:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IDX_12]]
158 ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_12]], align 8
159 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
160 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
161 ; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
163 ; CHECK-NEXT: ret void
166 %off.mul.2 = shl i64 %off, 1
167 %off.mul.3 = mul i64 %off, 3
168 %off.mul.4 = shl i64 %off, 2
169 %off.mul.5 = mul i64 %off, 5
170 %off.mul.6 = mul i64 %off, 6
171 %off.mul.7 = mul i64 %off, 7
172 %off.mul.8 = shl i64 %off, 3
173 %off.mul.9 = mul i64 %off, 9
174 %off.mul.10 = mul i64 %off, 10
175 %off.mul.11 = mul i64 %off, 11
176 %off.mul.12 = mul i64 %off, 12
180 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
181 %iv.sub.5 = add nsw i64 %iv, -5
182 %idx.1 = add i64 %iv.sub.5, %off
183 %gep.1 = getelementptr i64, ptr %dst, i64 %idx.1
184 store double 0.000000e+00, ptr %gep.1, align 8
185 %idx.2 = add i64 %iv.sub.5, %off.mul.2
186 %gep.2 = getelementptr i64, ptr %dst, i64 %idx.2
187 store double 0.000000e+00, ptr %gep.2, align 8
188 %idx.3 = add i64 %iv.sub.5, %off.mul.3
189 %gep.3 = getelementptr i64, ptr %dst, i64 %idx.3
190 store double 0.000000e+00, ptr %gep.3, align 8
191 %idx.4 = add i64 %iv.sub.5, %off.mul.4
192 %gep.4 = getelementptr i64, ptr %dst, i64 %idx.4
193 store double 0.000000e+00, ptr %gep.4, align 8
194 %idx.5 = add i64 %iv.sub.5, %off.mul.5
195 %gep.5 = getelementptr i64, ptr %dst, i64 %idx.5
196 store double 0.000000e+00, ptr %gep.5, align 8
197 %idx.6 = add i64 %iv.sub.5, %off.mul.6
198 %gep.6 = getelementptr i64, ptr %dst, i64 %idx.6
199 store double 0.000000e+00, ptr %gep.6, align 8
200 %idx.7 = add i64 %iv.sub.5, %off.mul.7
201 %gep.7 = getelementptr i64, ptr %dst, i64 %idx.7
202 store double 0.000000e+00, ptr %gep.7, align 8
203 %idx.8 = add i64 %iv.sub.5, %off.mul.8
204 %gep.8 = getelementptr i64, ptr %dst, i64 %idx.8
205 store double 0.000000e+00, ptr %gep.8, align 8
206 %idx.9 = add i64 %iv.sub.5, %off.mul.9
207 %gep.9 = getelementptr i64, ptr %dst, i64 %idx.9
208 store double 0.000000e+00, ptr %gep.9, align 8
209 %idx.10 = add i64 %iv.sub.5, %off.mul.10
210 %gep.10 = getelementptr i64, ptr %dst, i64 %idx.10
211 store double 0.000000e+00, ptr %gep.10, align 8
212 %idx.11 = add i64 %iv.sub.5, %off.mul.11
213 %gep.11 = getelementptr i64, ptr %dst, i64 %idx.11
214 store double 0.000000e+00, ptr %gep.11, align 8
215 %idx.12 = add i64 %iv.sub.5, %off.mul.12
216 %gep.12 = getelementptr i64, ptr %dst, i64 %idx.12
217 store double 0.000000e+00, ptr %gep.12, align 8
218 %iv.next = add nuw nsw i64 %iv, 1
219 %ec = icmp eq i64 %iv.next, %N
220 br i1 %ec, label %exit, label %loop