1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
4 ; void foo(float *data, float d) {
6 ; for (i = 0; i < 8000; i++)
10 ; This loop will be unrolled by 96 and vectorized on power9.
11 ; icmp for loop iteration index and loop trip count(384) has LSRUse for 'reg({0,+,384})'.
12 ; Make sure above icmp does not impact LSR choose best formulae sets based on 'reg({(192 + %0),+,384})'
14 define void @foo(float* nocapture %data, float %d) {
16 ; CHECK: .LBB0_1: # %vector.body
17 ; CHECK: stxv 0, -192(4)
18 ; CHECK-NEXT: stxv 0, -176(4)
19 ; CHECK-NEXT: stxv 0, -160(4)
20 ; CHECK-NEXT: stxv 0, -144(4)
21 ; CHECK-NEXT: stxv 0, -128(4)
22 ; CHECK-NEXT: stxv 0, -112(4)
23 ; CHECK-NEXT: stxv 0, -96(4)
24 ; CHECK-NEXT: stxv 0, -80(4)
25 ; CHECK-NEXT: stxv 0, -64(4)
26 ; CHECK-NEXT: stxv 0, -48(4)
27 ; CHECK-NEXT: stxv 0, -32(4)
28 ; CHECK-NEXT: stxv 0, -16(4)
29 ; CHECK-NEXT: stxv 0, 0(4)
30 ; CHECK-NEXT: stxv 0, 16(4)
31 ; CHECK-NEXT: stxv 0, 32(4)
32 ; CHECK-NEXT: stxv 0, 48(4)
33 ; CHECK-NEXT: stxv 0, 64(4)
34 ; CHECK-NEXT: stxv 0, 80(4)
35 ; CHECK-NEXT: stxv 0, 96(4)
36 ; CHECK-NEXT: stxv 0, 112(4)
37 ; CHECK-NEXT: stxv 0, 128(4)
38 ; CHECK-NEXT: stxv 0, 144(4)
39 ; CHECK-NEXT: stxv 0, 160(4)
40 ; CHECK-NEXT: stxv 0, 176(4)
41 ; CHECK-NEXT: addi 4, 4, 384
42 ; CHECK-NEXT: bdnz .LBB0_1
45 %broadcast.splatinsert16 = insertelement <4 x float> undef, float %d, i32 0
46 %broadcast.splat17 = shufflevector <4 x float> %broadcast.splatinsert16, <4 x float> undef, <4 x i32> zeroinitializer
47 %broadcast.splatinsert18 = insertelement <4 x float> undef, float %d, i32 0
48 %broadcast.splat19 = shufflevector <4 x float> %broadcast.splatinsert18, <4 x float> undef, <4 x i32> zeroinitializer
49 %broadcast.splatinsert20 = insertelement <4 x float> undef, float %d, i32 0
50 %broadcast.splat21 = shufflevector <4 x float> %broadcast.splatinsert20, <4 x float> undef, <4 x i32> zeroinitializer
51 %broadcast.splatinsert22 = insertelement <4 x float> undef, float %d, i32 0
52 %broadcast.splat23 = shufflevector <4 x float> %broadcast.splatinsert22, <4 x float> undef, <4 x i32> zeroinitializer
53 %broadcast.splatinsert24 = insertelement <4 x float> undef, float %d, i32 0
54 %broadcast.splat25 = shufflevector <4 x float> %broadcast.splatinsert24, <4 x float> undef, <4 x i32> zeroinitializer
55 %broadcast.splatinsert26 = insertelement <4 x float> undef, float %d, i32 0
56 %broadcast.splat27 = shufflevector <4 x float> %broadcast.splatinsert26, <4 x float> undef, <4 x i32> zeroinitializer
57 %broadcast.splatinsert28 = insertelement <4 x float> undef, float %d, i32 0
58 %broadcast.splat29 = shufflevector <4 x float> %broadcast.splatinsert28, <4 x float> undef, <4 x i32> zeroinitializer
59 %broadcast.splatinsert30 = insertelement <4 x float> undef, float %d, i32 0
60 %broadcast.splat31 = shufflevector <4 x float> %broadcast.splatinsert30, <4 x float> undef, <4 x i32> zeroinitializer
61 %broadcast.splatinsert32 = insertelement <4 x float> undef, float %d, i32 0
62 %broadcast.splat33 = shufflevector <4 x float> %broadcast.splatinsert32, <4 x float> undef, <4 x i32> zeroinitializer
63 %broadcast.splatinsert34 = insertelement <4 x float> undef, float %d, i32 0
64 %broadcast.splat35 = shufflevector <4 x float> %broadcast.splatinsert34, <4 x float> undef, <4 x i32> zeroinitializer
65 %broadcast.splatinsert36 = insertelement <4 x float> undef, float %d, i32 0
66 %broadcast.splat37 = shufflevector <4 x float> %broadcast.splatinsert36, <4 x float> undef, <4 x i32> zeroinitializer
67 %broadcast.splatinsert38 = insertelement <4 x float> undef, float %d, i32 0
68 %broadcast.splat39 = shufflevector <4 x float> %broadcast.splatinsert38, <4 x float> undef, <4 x i32> zeroinitializer
71 vector.body: ; preds = %vector.body, %entry
72 %index = phi i64 [ 0, %entry ], [ %index.next.1, %vector.body ]
73 %0 = getelementptr inbounds float, float* %data, i64 %index
74 %1 = bitcast float* %0 to <4 x float>*
75 store <4 x float> %broadcast.splat17, <4 x float>* %1, align 4
76 %2 = getelementptr inbounds float, float* %0, i64 4
77 %3 = bitcast float* %2 to <4 x float>*
78 store <4 x float> %broadcast.splat19, <4 x float>* %3, align 4
79 %4 = getelementptr inbounds float, float* %0, i64 8
80 %5 = bitcast float* %4 to <4 x float>*
81 store <4 x float> %broadcast.splat21, <4 x float>* %5, align 4
82 %6 = getelementptr inbounds float, float* %0, i64 12
83 %7 = bitcast float* %6 to <4 x float>*
84 store <4 x float> %broadcast.splat23, <4 x float>* %7, align 4
85 %8 = getelementptr inbounds float, float* %0, i64 16
86 %9 = bitcast float* %8 to <4 x float>*
87 store <4 x float> %broadcast.splat25, <4 x float>* %9, align 4
88 %10 = getelementptr inbounds float, float* %0, i64 20
89 %11 = bitcast float* %10 to <4 x float>*
90 store <4 x float> %broadcast.splat27, <4 x float>* %11, align 4
91 %12 = getelementptr inbounds float, float* %0, i64 24
92 %13 = bitcast float* %12 to <4 x float>*
93 store <4 x float> %broadcast.splat29, <4 x float>* %13, align 4
94 %14 = getelementptr inbounds float, float* %0, i64 28
95 %15 = bitcast float* %14 to <4 x float>*
96 store <4 x float> %broadcast.splat31, <4 x float>* %15, align 4
97 %16 = getelementptr inbounds float, float* %0, i64 32
98 %17 = bitcast float* %16 to <4 x float>*
99 store <4 x float> %broadcast.splat33, <4 x float>* %17, align 4
100 %18 = getelementptr inbounds float, float* %0, i64 36
101 %19 = bitcast float* %18 to <4 x float>*
102 store <4 x float> %broadcast.splat35, <4 x float>* %19, align 4
103 %20 = getelementptr inbounds float, float* %0, i64 40
104 %21 = bitcast float* %20 to <4 x float>*
105 store <4 x float> %broadcast.splat37, <4 x float>* %21, align 4
106 %22 = getelementptr inbounds float, float* %0, i64 44
107 %23 = bitcast float* %22 to <4 x float>*
108 store <4 x float> %broadcast.splat39, <4 x float>* %23, align 4
109 %index.next = add nuw nsw i64 %index, 48
110 %24 = getelementptr inbounds float, float* %data, i64 %index.next
111 %25 = bitcast float* %24 to <4 x float>*
112 store <4 x float> %broadcast.splat17, <4 x float>* %25, align 4
113 %26 = getelementptr inbounds float, float* %24, i64 4
114 %27 = bitcast float* %26 to <4 x float>*
115 store <4 x float> %broadcast.splat19, <4 x float>* %27, align 4
116 %28 = getelementptr inbounds float, float* %24, i64 8
117 %29 = bitcast float* %28 to <4 x float>*
118 store <4 x float> %broadcast.splat21, <4 x float>* %29, align 4
119 %30 = getelementptr inbounds float, float* %24, i64 12
120 %31 = bitcast float* %30 to <4 x float>*
121 store <4 x float> %broadcast.splat23, <4 x float>* %31, align 4
122 %32 = getelementptr inbounds float, float* %24, i64 16
123 %33 = bitcast float* %32 to <4 x float>*
124 store <4 x float> %broadcast.splat25, <4 x float>* %33, align 4
125 %34 = getelementptr inbounds float, float* %24, i64 20
126 %35 = bitcast float* %34 to <4 x float>*
127 store <4 x float> %broadcast.splat27, <4 x float>* %35, align 4
128 %36 = getelementptr inbounds float, float* %24, i64 24
129 %37 = bitcast float* %36 to <4 x float>*
130 store <4 x float> %broadcast.splat29, <4 x float>* %37, align 4
131 %38 = getelementptr inbounds float, float* %24, i64 28
132 %39 = bitcast float* %38 to <4 x float>*
133 store <4 x float> %broadcast.splat31, <4 x float>* %39, align 4
134 %40 = getelementptr inbounds float, float* %24, i64 32
135 %41 = bitcast float* %40 to <4 x float>*
136 store <4 x float> %broadcast.splat33, <4 x float>* %41, align 4
137 %42 = getelementptr inbounds float, float* %24, i64 36
138 %43 = bitcast float* %42 to <4 x float>*
139 store <4 x float> %broadcast.splat35, <4 x float>* %43, align 4
140 %44 = getelementptr inbounds float, float* %24, i64 40
141 %45 = bitcast float* %44 to <4 x float>*
142 store <4 x float> %broadcast.splat37, <4 x float>* %45, align 4
143 %46 = getelementptr inbounds float, float* %24, i64 44
144 %47 = bitcast float* %46 to <4 x float>*
145 store <4 x float> %broadcast.splat39, <4 x float>* %47, align 4
146 %index.next.1 = add nuw nsw i64 %index, 96
147 %48 = icmp eq i64 %index.next.1, 7968
148 br i1 %48, label %for.body, label %vector.body
150 for.body: ; preds = %vector.body
151 %arrayidx = getelementptr inbounds float, float* %data, i64 7968
152 store float %d, float* %arrayidx, align 4
153 %arrayidx.1 = getelementptr inbounds float, float* %data, i64 7969
154 store float %d, float* %arrayidx.1, align 4
155 %arrayidx.2 = getelementptr inbounds float, float* %data, i64 7970
156 store float %d, float* %arrayidx.2, align 4
157 %arrayidx.3 = getelementptr inbounds float, float* %data, i64 7971
158 store float %d, float* %arrayidx.3, align 4
159 %arrayidx.4 = getelementptr inbounds float, float* %data, i64 7972
160 store float %d, float* %arrayidx.4, align 4
161 %arrayidx.5 = getelementptr inbounds float, float* %data, i64 7973
162 store float %d, float* %arrayidx.5, align 4
163 %arrayidx.6 = getelementptr inbounds float, float* %data, i64 7974
164 store float %d, float* %arrayidx.6, align 4
165 %arrayidx.7 = getelementptr inbounds float, float* %data, i64 7975
166 store float %d, float* %arrayidx.7, align 4
167 %arrayidx.8 = getelementptr inbounds float, float* %data, i64 7976
168 store float %d, float* %arrayidx.8, align 4
169 %arrayidx.9 = getelementptr inbounds float, float* %data, i64 7977
170 store float %d, float* %arrayidx.9, align 4
171 %arrayidx.10 = getelementptr inbounds float, float* %data, i64 7978
172 store float %d, float* %arrayidx.10, align 4
173 %arrayidx.11 = getelementptr inbounds float, float* %data, i64 7979
174 store float %d, float* %arrayidx.11, align 4
175 %arrayidx.12 = getelementptr inbounds float, float* %data, i64 7980
176 store float %d, float* %arrayidx.12, align 4
177 %arrayidx.13 = getelementptr inbounds float, float* %data, i64 7981
178 store float %d, float* %arrayidx.13, align 4
179 %arrayidx.14 = getelementptr inbounds float, float* %data, i64 7982
180 store float %d, float* %arrayidx.14, align 4
181 %arrayidx.15 = getelementptr inbounds float, float* %data, i64 7983
182 store float %d, float* %arrayidx.15, align 4
183 %arrayidx.16 = getelementptr inbounds float, float* %data, i64 7984
184 store float %d, float* %arrayidx.16, align 4
185 %arrayidx.17 = getelementptr inbounds float, float* %data, i64 7985
186 store float %d, float* %arrayidx.17, align 4
187 %arrayidx.18 = getelementptr inbounds float, float* %data, i64 7986
188 store float %d, float* %arrayidx.18, align 4
189 %arrayidx.19 = getelementptr inbounds float, float* %data, i64 7987
190 store float %d, float* %arrayidx.19, align 4
191 %arrayidx.20 = getelementptr inbounds float, float* %data, i64 7988
192 store float %d, float* %arrayidx.20, align 4
193 %arrayidx.21 = getelementptr inbounds float, float* %data, i64 7989
194 store float %d, float* %arrayidx.21, align 4
195 %arrayidx.22 = getelementptr inbounds float, float* %data, i64 7990
196 store float %d, float* %arrayidx.22, align 4
197 %arrayidx.23 = getelementptr inbounds float, float* %data, i64 7991
198 store float %d, float* %arrayidx.23, align 4
199 %arrayidx.24 = getelementptr inbounds float, float* %data, i64 7992
200 store float %d, float* %arrayidx.24, align 4
201 %arrayidx.25 = getelementptr inbounds float, float* %data, i64 7993
202 store float %d, float* %arrayidx.25, align 4
203 %arrayidx.26 = getelementptr inbounds float, float* %data, i64 7994
204 store float %d, float* %arrayidx.26, align 4
205 %arrayidx.27 = getelementptr inbounds float, float* %data, i64 7995
206 store float %d, float* %arrayidx.27, align 4
207 %arrayidx.28 = getelementptr inbounds float, float* %data, i64 7996
208 store float %d, float* %arrayidx.28, align 4
209 %arrayidx.29 = getelementptr inbounds float, float* %data, i64 7997
210 store float %d, float* %arrayidx.29, align 4
211 %arrayidx.30 = getelementptr inbounds float, float* %data, i64 7998
212 store float %d, float* %arrayidx.30, align 4
213 %arrayidx.31 = getelementptr inbounds float, float* %data, i64 7999
214 store float %d, float* %arrayidx.31, align 4