2 source_filename = "matmul.c"
3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4 target triple = "x86_64-unknown-linux-gnu"
6 %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
7 %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
9 @A = common dso_local local_unnamed_addr global [1536 x [1536 x float]] zeroinitializer, align 16
10 @B = common dso_local local_unnamed_addr global [1536 x [1536 x float]] zeroinitializer, align 16
11 @stdout = external dso_local local_unnamed_addr global %struct._IO_FILE*, align 8
12 @.str = private unnamed_addr constant [5 x i8] c"%lf \00", align 1
13 @C = common dso_local local_unnamed_addr global [1536 x [1536 x float]] zeroinitializer, align 16
15 ; Function Attrs: noinline nounwind uwtable
16 define dso_local void @init_array() local_unnamed_addr #0 {
18 %polly.par.userContext = alloca {}, align 8
19 %polly.par.userContext1 = bitcast {}* %polly.par.userContext to i8*
20 call void @GOMP_parallel_loop_runtime_start(void (i8*)* nonnull @init_array_polly_subfn, i8* nonnull %polly.par.userContext1, i32 0, i64 0, i64 1536, i64 1) #3
21 call void @init_array_polly_subfn(i8* nonnull %polly.par.userContext1) #3
22 call void @GOMP_parallel_end() #3
26 ; Function Attrs: noinline nounwind uwtable
27 define dso_local void @print_array() local_unnamed_addr #1 {
29 br label %for.cond1.preheader
31 for.cond1.preheader: ; preds = %for.end, %entry
32 %indvars.iv6 = phi i64 [ 0, %entry ], [ %indvars.iv.next7, %for.end ]
33 %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
36 for.body3: ; preds = %for.inc, %for.cond1.preheader
37 %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.inc ]
38 %1 = phi %struct._IO_FILE* [ %0, %for.cond1.preheader ], [ %5, %for.inc ]
39 %arrayidx5 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %indvars.iv6, i64 %indvars.iv
40 %2 = load float, float* %arrayidx5, align 4
41 %conv = fpext float %2 to double
42 %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #3
43 %3 = trunc i64 %indvars.iv to i32
44 %rem = urem i32 %3, 80
45 %cmp6 = icmp eq i32 %rem, 79
46 br i1 %cmp6, label %if.then, label %for.inc
48 if.then: ; preds = %for.body3
49 %4 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
50 %fputc3 = tail call i32 @fputc(i32 10, %struct._IO_FILE* %4)
53 for.inc: ; preds = %if.then, %for.body3
54 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
55 %5 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
56 %exitcond = icmp eq i64 %indvars.iv.next, 1536
57 br i1 %exitcond, label %for.end, label %for.body3
59 for.end: ; preds = %for.inc
60 %fputc = tail call i32 @fputc(i32 10, %struct._IO_FILE* %5)
61 %indvars.iv.next7 = add nuw nsw i64 %indvars.iv6, 1
62 %exitcond8 = icmp eq i64 %indvars.iv.next7, 1536
63 br i1 %exitcond8, label %for.end12, label %for.cond1.preheader
65 for.end12: ; preds = %for.end
69 ; Function Attrs: nounwind
70 declare dso_local i32 @fprintf(%struct._IO_FILE* nocapture, i8* nocapture readonly, ...) local_unnamed_addr #2
72 ; Function Attrs: noinline nounwind uwtable
73 define dso_local i32 @main() local_unnamed_addr #0 {
75 %polly.par.userContext3 = alloca {}, align 8
76 tail call void @init_array()
77 %polly.par.userContext1 = bitcast {}* %polly.par.userContext3 to i8*
78 call void @GOMP_parallel_loop_runtime_start(void (i8*)* nonnull @main_polly_subfn, i8* nonnull %polly.par.userContext1, i32 0, i64 0, i64 1536, i64 1) #3
79 call void @main_polly_subfn(i8* nonnull %polly.par.userContext1) #3
80 call void @GOMP_parallel_end() #3
81 call void @GOMP_parallel_loop_runtime_start(void (i8*)* nonnull @main_polly_subfn_1, i8* nonnull %polly.par.userContext1, i32 0, i64 0, i64 1536, i64 64) #3
82 call void @main_polly_subfn_1(i8* nonnull %polly.par.userContext1) #3
83 call void @GOMP_parallel_end() #3
87 ; Function Attrs: nounwind
88 declare i32 @fputc(i32, %struct._IO_FILE* nocapture) local_unnamed_addr #3
90 define internal void @init_array_polly_subfn(i8* nocapture readnone %polly.par.userContext) #4 {
92 %polly.par.LBPtr = alloca i64, align 8
93 %polly.par.UBPtr = alloca i64, align 8
94 %0 = call i8 @GOMP_loop_runtime_next(i64* nonnull %polly.par.LBPtr, i64* nonnull %polly.par.UBPtr)
96 br i1 %1, label %polly.par.exit, label %polly.par.loadIVBounds
98 polly.par.exit: ; preds = %polly.par.checkNext.loopexit, %polly.par.setup
99 call void @GOMP_loop_end_nowait()
102 polly.par.checkNext.loopexit: ; preds = %polly.loop_exit4
103 %2 = call i8 @GOMP_loop_runtime_next(i64* nonnull %polly.par.LBPtr, i64* nonnull %polly.par.UBPtr)
104 %3 = icmp eq i8 %2, 0
105 br i1 %3, label %polly.par.exit, label %polly.par.loadIVBounds
107 polly.par.loadIVBounds: ; preds = %polly.par.setup, %polly.par.checkNext.loopexit
108 %polly.par.LB = load i64, i64* %polly.par.LBPtr, align 8
109 %polly.par.UB = load i64, i64* %polly.par.UBPtr, align 8
110 %polly.par.UBAdjusted = add i64 %polly.par.UB, -1
111 br label %polly.loop_header
113 polly.loop_header: ; preds = %polly.par.loadIVBounds, %polly.loop_exit4
114 %polly.indvar = phi i64 [ %polly.par.LB, %polly.par.loadIVBounds ], [ %polly.indvar_next, %polly.loop_exit4 ]
115 %4 = trunc i64 %polly.indvar to i32
116 br label %polly.loop_header2
118 polly.loop_exit4: ; preds = %polly.loop_header2
119 %polly.indvar_next = add nsw i64 %polly.indvar, 1
120 %polly.loop_cond = icmp slt i64 %polly.indvar, %polly.par.UBAdjusted
121 br i1 %polly.loop_cond, label %polly.loop_header, label %polly.par.checkNext.loopexit
123 polly.loop_header2: ; preds = %polly.loop_header2, %polly.loop_header
124 %polly.indvar5 = phi i64 [ 0, %polly.loop_header ], [ %polly.indvar_next6, %polly.loop_header2 ]
125 %5 = trunc i64 %polly.indvar5 to i32
127 %7 = and i32 %6, 1023
128 %8 = add nuw nsw i32 %7, 1
129 %p_conv = sitofp i32 %8 to double
130 %p_div = fmul double %p_conv, 5.000000e-01
131 %p_conv4 = fptrunc double %p_div to float
132 %scevgep8 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %polly.indvar, i64 %polly.indvar5
133 store float %p_conv4, float* %scevgep8, align 4, !alias.scope !2, !noalias !4
134 %scevgep10 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %polly.indvar, i64 %polly.indvar5
135 store float %p_conv4, float* %scevgep10, align 4, !alias.scope !5, !noalias !6
136 %polly.indvar_next6 = add nuw nsw i64 %polly.indvar5, 1
137 %exitcond = icmp eq i64 %polly.indvar_next6, 1536
138 br i1 %exitcond, label %polly.loop_exit4, label %polly.loop_header2
141 declare i8 @GOMP_loop_runtime_next(i64*, i64*) local_unnamed_addr
143 declare void @GOMP_loop_end_nowait() local_unnamed_addr
145 declare void @GOMP_parallel_loop_runtime_start(void (i8*)*, i8*, i32, i64, i64, i64) local_unnamed_addr
147 declare void @GOMP_parallel_end() local_unnamed_addr
149 define internal void @main_polly_subfn(i8* nocapture readnone %polly.par.userContext) #4 {
151 %polly.par.LBPtr = alloca i64, align 8
152 %polly.par.UBPtr = alloca i64, align 8
153 %0 = call i8 @GOMP_loop_runtime_next(i64* nonnull %polly.par.LBPtr, i64* nonnull %polly.par.UBPtr)
154 %1 = icmp eq i8 %0, 0
155 br i1 %1, label %polly.par.exit, label %polly.par.loadIVBounds
157 polly.par.exit: ; preds = %polly.par.loadIVBounds, %polly.par.setup
158 call void @GOMP_loop_end_nowait()
161 polly.par.loadIVBounds: ; preds = %polly.par.setup, %polly.par.loadIVBounds
162 %polly.par.LB = load i64, i64* %polly.par.LBPtr, align 8
163 %polly.par.UB = load i64, i64* %polly.par.UBPtr, align 8
164 %polly.par.UBAdjusted = add i64 %polly.par.UB, -1
165 %scevgep2 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %polly.par.LB, i64 0
166 %scevgep23 = bitcast float* %scevgep2 to i8*
167 %2 = icmp sgt i64 %polly.par.LB, %polly.par.UBAdjusted
168 %smax = select i1 %2, i64 %polly.par.LB, i64 %polly.par.UBAdjusted
169 %3 = add i64 %smax, 1
170 %4 = sub i64 %3, %polly.par.LB
171 %5 = mul i64 %4, 6144
172 call void @llvm.memset.p0i8.i64(i8* align 16 %scevgep23, i8 0, i64 %5, i1 false)
173 %6 = call i8 @GOMP_loop_runtime_next(i64* nonnull %polly.par.LBPtr, i64* nonnull %polly.par.UBPtr)
174 %7 = icmp eq i8 %6, 0
175 br i1 %7, label %polly.par.exit, label %polly.par.loadIVBounds
178 define internal void @main_polly_subfn_1(i8* nocapture readnone %polly.par.userContext) #4 {
180 %polly.par.LBPtr = alloca i64, align 8
181 %polly.par.UBPtr = alloca i64, align 8
182 %0 = call i8 @GOMP_loop_runtime_next(i64* nonnull %polly.par.LBPtr, i64* nonnull %polly.par.UBPtr)
183 %1 = icmp eq i8 %0, 0
184 br i1 %1, label %polly.par.exit, label %polly.par.loadIVBounds
186 polly.par.exit: ; preds = %polly.par.checkNext.loopexit, %polly.par.setup
187 call void @GOMP_loop_end_nowait()
190 polly.par.checkNext.loopexit: ; preds = %polly.loop_exit4
191 %2 = call i8 @GOMP_loop_runtime_next(i64* nonnull %polly.par.LBPtr, i64* nonnull %polly.par.UBPtr)
192 %3 = icmp eq i8 %2, 0
193 br i1 %3, label %polly.par.exit, label %polly.par.loadIVBounds
195 polly.par.loadIVBounds: ; preds = %polly.par.setup, %polly.par.checkNext.loopexit
196 %polly.par.LB = load i64, i64* %polly.par.LBPtr, align 8
197 %polly.par.UB = load i64, i64* %polly.par.UBPtr, align 8
198 %polly.par.UBAdjusted = add i64 %polly.par.UB, -1
199 br label %polly.loop_header
201 polly.loop_header: ; preds = %polly.loop_exit4, %polly.par.loadIVBounds
202 %polly.indvar = phi i64 [ %polly.par.LB, %polly.par.loadIVBounds ], [ %polly.indvar_next, %polly.loop_exit4 ]
203 %4 = add nsw i64 %polly.indvar, 63
204 br label %polly.loop_header2
206 polly.loop_exit4: ; preds = %polly.loop_exit10
207 %polly.indvar_next = add nsw i64 %polly.indvar, 64
208 %polly.loop_cond = icmp sgt i64 %polly.indvar_next, %polly.par.UBAdjusted
209 br i1 %polly.loop_cond, label %polly.par.checkNext.loopexit, label %polly.loop_header
211 polly.loop_header2: ; preds = %polly.loop_header, %polly.loop_exit10
212 %indvar = phi i64 [ 0, %polly.loop_header ], [ %indvar.next, %polly.loop_exit10 ]
213 %polly.indvar5 = phi i64 [ 0, %polly.loop_header ], [ %polly.indvar_next6, %polly.loop_exit10 ]
214 %5 = shl i64 %indvar, 6
215 %offset.idx.1 = or i64 %5, 16
216 %offset.idx.2 = or i64 %5, 32
217 %offset.idx.3 = or i64 %5, 48
218 br label %polly.loop_header8
220 polly.loop_exit10: ; preds = %polly.loop_exit16
221 %polly.indvar_next6 = add nuw nsw i64 %polly.indvar5, 64
222 %polly.loop_cond7 = icmp ult i64 %polly.indvar_next6, 1536
223 %indvar.next = add i64 %indvar, 1
224 br i1 %polly.loop_cond7, label %polly.loop_header2, label %polly.loop_exit4
226 polly.loop_header8: ; preds = %polly.loop_header2, %polly.loop_exit16
227 %indvars.iv3 = phi i64 [ 64, %polly.loop_header2 ], [ %indvars.iv.next4, %polly.loop_exit16 ]
228 %polly.indvar11 = phi i64 [ 0, %polly.loop_header2 ], [ %polly.indvar_next12, %polly.loop_exit16 ]
229 br label %polly.loop_header14
231 polly.loop_exit16: ; preds = %polly.loop_exit22
232 %polly.indvar_next12 = add nuw nsw i64 %polly.indvar11, 64
233 %polly.loop_cond13 = icmp ult i64 %polly.indvar_next12, 1536
234 %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 64
235 br i1 %polly.loop_cond13, label %polly.loop_header8, label %polly.loop_exit10
237 polly.loop_header14: ; preds = %polly.loop_header8, %polly.loop_exit22
238 %polly.indvar17 = phi i64 [ %polly.indvar_next18, %polly.loop_exit22 ], [ %polly.indvar, %polly.loop_header8 ]
239 %6 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %polly.indvar17, i64 %5
240 %7 = bitcast float* %6 to <16 x float>*
241 %8 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %polly.indvar17, i64 %offset.idx.1
242 %9 = bitcast float* %8 to <16 x float>*
243 %10 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %polly.indvar17, i64 %offset.idx.2
244 %11 = bitcast float* %10 to <16 x float>*
245 %12 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %polly.indvar17, i64 %offset.idx.3
246 %13 = bitcast float* %12 to <16 x float>*
247 %.promoted = load <16 x float>, <16 x float>* %7, align 4, !alias.scope !7, !noalias !9
248 %.promoted16 = load <16 x float>, <16 x float>* %9, align 4, !alias.scope !7, !noalias !9
249 %.promoted18 = load <16 x float>, <16 x float>* %11, align 4, !alias.scope !7, !noalias !9
250 %.promoted20 = load <16 x float>, <16 x float>* %13, align 4, !alias.scope !7, !noalias !9
253 polly.loop_exit22: ; preds = %vector.ph
254 store <16 x float> %interleaved.vec, <16 x float>* %7, align 4, !alias.scope !7, !noalias !9
255 store <16 x float> %interleaved.vec.1, <16 x float>* %9, align 4, !alias.scope !7, !noalias !9
256 store <16 x float> %interleaved.vec.2, <16 x float>* %11, align 4, !alias.scope !7, !noalias !9
257 store <16 x float> %interleaved.vec.3, <16 x float>* %13, align 4, !alias.scope !7, !noalias !9
258 %polly.indvar_next18 = add nsw i64 %polly.indvar17, 1
259 %polly.loop_cond19 = icmp slt i64 %polly.indvar17, %4
260 br i1 %polly.loop_cond19, label %polly.loop_header14, label %polly.loop_exit16
262 vector.ph: ; preds = %polly.loop_header14, %vector.ph
263 %wide.vec.321 = phi <16 x float> [ %.promoted20, %polly.loop_header14 ], [ %interleaved.vec.3, %vector.ph ]
264 %wide.vec.219 = phi <16 x float> [ %.promoted18, %polly.loop_header14 ], [ %interleaved.vec.2, %vector.ph ]
265 %wide.vec.117 = phi <16 x float> [ %.promoted16, %polly.loop_header14 ], [ %interleaved.vec.1, %vector.ph ]
266 %wide.vec15 = phi <16 x float> [ %.promoted, %polly.loop_header14 ], [ %interleaved.vec, %vector.ph ]
267 %polly.indvar23 = phi i64 [ %polly.indvar11, %polly.loop_header14 ], [ %polly.indvar_next24, %vector.ph ]
268 %scevgep40 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %polly.indvar17, i64 %polly.indvar23
269 %_p_scalar_41 = load float, float* %scevgep40, align 4, !alias.scope !10, !noalias !12
270 %broadcast.splatinsert13 = insertelement <4 x float> undef, float %_p_scalar_41, i32 0
271 %broadcast.splat14 = shufflevector <4 x float> %broadcast.splatinsert13, <4 x float> undef, <4 x i32> zeroinitializer
272 %strided.vec = shufflevector <16 x float> %wide.vec15, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
273 %strided.vec5 = shufflevector <16 x float> %wide.vec15, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
274 %strided.vec6 = shufflevector <16 x float> %wide.vec15, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
275 %strided.vec7 = shufflevector <16 x float> %wide.vec15, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
276 %14 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %polly.indvar23, i64 %5
277 %15 = bitcast float* %14 to <16 x float>*
278 %wide.vec8 = load <16 x float>, <16 x float>* %15, align 16, !alias.scope !11, !noalias !13
279 %strided.vec9 = shufflevector <16 x float> %wide.vec8, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
280 %strided.vec10 = shufflevector <16 x float> %wide.vec8, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
281 %strided.vec11 = shufflevector <16 x float> %wide.vec8, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
282 %strided.vec12 = shufflevector <16 x float> %wide.vec8, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
283 %16 = fmul <4 x float> %broadcast.splat14, %strided.vec9
284 %17 = fadd <4 x float> %strided.vec, %16
285 %18 = fmul <4 x float> %broadcast.splat14, %strided.vec10
286 %19 = fadd <4 x float> %strided.vec5, %18
287 %20 = fmul <4 x float> %broadcast.splat14, %strided.vec11
288 %21 = fadd <4 x float> %strided.vec6, %20
289 %22 = fmul <4 x float> %broadcast.splat14, %strided.vec12
290 %23 = fadd <4 x float> %strided.vec7, %22
291 %24 = shufflevector <4 x float> %17, <4 x float> %19, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
292 %25 = shufflevector <4 x float> %21, <4 x float> %23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
293 %interleaved.vec = shufflevector <8 x float> %24, <8 x float> %25, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
294 %strided.vec.1 = shufflevector <16 x float> %wide.vec.117, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
295 %strided.vec5.1 = shufflevector <16 x float> %wide.vec.117, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
296 %strided.vec6.1 = shufflevector <16 x float> %wide.vec.117, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
297 %strided.vec7.1 = shufflevector <16 x float> %wide.vec.117, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
298 %26 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %polly.indvar23, i64 %offset.idx.1
299 %27 = bitcast float* %26 to <16 x float>*
300 %wide.vec8.1 = load <16 x float>, <16 x float>* %27, align 16, !alias.scope !11, !noalias !13
301 %strided.vec9.1 = shufflevector <16 x float> %wide.vec8.1, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
302 %strided.vec10.1 = shufflevector <16 x float> %wide.vec8.1, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
303 %strided.vec11.1 = shufflevector <16 x float> %wide.vec8.1, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
304 %strided.vec12.1 = shufflevector <16 x float> %wide.vec8.1, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
305 %28 = fmul <4 x float> %broadcast.splat14, %strided.vec9.1
306 %29 = fadd <4 x float> %strided.vec.1, %28
307 %30 = fmul <4 x float> %broadcast.splat14, %strided.vec10.1
308 %31 = fadd <4 x float> %strided.vec5.1, %30
309 %32 = fmul <4 x float> %broadcast.splat14, %strided.vec11.1
310 %33 = fadd <4 x float> %strided.vec6.1, %32
311 %34 = fmul <4 x float> %broadcast.splat14, %strided.vec12.1
312 %35 = fadd <4 x float> %strided.vec7.1, %34
313 %36 = shufflevector <4 x float> %29, <4 x float> %31, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
314 %37 = shufflevector <4 x float> %33, <4 x float> %35, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
315 %interleaved.vec.1 = shufflevector <8 x float> %36, <8 x float> %37, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
316 %strided.vec.2 = shufflevector <16 x float> %wide.vec.219, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
317 %strided.vec5.2 = shufflevector <16 x float> %wide.vec.219, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
318 %strided.vec6.2 = shufflevector <16 x float> %wide.vec.219, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
319 %strided.vec7.2 = shufflevector <16 x float> %wide.vec.219, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
320 %38 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %polly.indvar23, i64 %offset.idx.2
321 %39 = bitcast float* %38 to <16 x float>*
322 %wide.vec8.2 = load <16 x float>, <16 x float>* %39, align 16, !alias.scope !11, !noalias !13
323 %strided.vec9.2 = shufflevector <16 x float> %wide.vec8.2, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
324 %strided.vec10.2 = shufflevector <16 x float> %wide.vec8.2, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
325 %strided.vec11.2 = shufflevector <16 x float> %wide.vec8.2, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
326 %strided.vec12.2 = shufflevector <16 x float> %wide.vec8.2, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
327 %40 = fmul <4 x float> %broadcast.splat14, %strided.vec9.2
328 %41 = fadd <4 x float> %strided.vec.2, %40
329 %42 = fmul <4 x float> %broadcast.splat14, %strided.vec10.2
330 %43 = fadd <4 x float> %strided.vec5.2, %42
331 %44 = fmul <4 x float> %broadcast.splat14, %strided.vec11.2
332 %45 = fadd <4 x float> %strided.vec6.2, %44
333 %46 = fmul <4 x float> %broadcast.splat14, %strided.vec12.2
334 %47 = fadd <4 x float> %strided.vec7.2, %46
335 %48 = shufflevector <4 x float> %41, <4 x float> %43, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
336 %49 = shufflevector <4 x float> %45, <4 x float> %47, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
337 %interleaved.vec.2 = shufflevector <8 x float> %48, <8 x float> %49, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
338 %strided.vec.3 = shufflevector <16 x float> %wide.vec.321, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
339 %strided.vec5.3 = shufflevector <16 x float> %wide.vec.321, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
340 %strided.vec6.3 = shufflevector <16 x float> %wide.vec.321, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
341 %strided.vec7.3 = shufflevector <16 x float> %wide.vec.321, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
342 %50 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %polly.indvar23, i64 %offset.idx.3
343 %51 = bitcast float* %50 to <16 x float>*
344 %wide.vec8.3 = load <16 x float>, <16 x float>* %51, align 16, !alias.scope !11, !noalias !13
345 %strided.vec9.3 = shufflevector <16 x float> %wide.vec8.3, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
346 %strided.vec10.3 = shufflevector <16 x float> %wide.vec8.3, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
347 %strided.vec11.3 = shufflevector <16 x float> %wide.vec8.3, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
348 %strided.vec12.3 = shufflevector <16 x float> %wide.vec8.3, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
349 %52 = fmul <4 x float> %broadcast.splat14, %strided.vec9.3
350 %53 = fadd <4 x float> %strided.vec.3, %52
351 %54 = fmul <4 x float> %broadcast.splat14, %strided.vec10.3
352 %55 = fadd <4 x float> %strided.vec5.3, %54
353 %56 = fmul <4 x float> %broadcast.splat14, %strided.vec11.3
354 %57 = fadd <4 x float> %strided.vec6.3, %56
355 %58 = fmul <4 x float> %broadcast.splat14, %strided.vec12.3
356 %59 = fadd <4 x float> %strided.vec7.3, %58
357 %60 = shufflevector <4 x float> %53, <4 x float> %55, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
358 %61 = shufflevector <4 x float> %57, <4 x float> %59, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
359 %interleaved.vec.3 = shufflevector <8 x float> %60, <8 x float> %61, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
360 %polly.indvar_next24 = add nuw nsw i64 %polly.indvar23, 1
361 %exitcond = icmp eq i64 %polly.indvar_next24, %indvars.iv3
362 br i1 %exitcond, label %polly.loop_exit22, label %vector.ph
365 ; Function Attrs: argmemonly nounwind
366 declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #5
368 attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "polly-optimized" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
369 attributes #1 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
370 attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
371 attributes #3 = { nounwind }
372 attributes #4 = { "polly.skip.fn" }
373 attributes #5 = { argmemonly nounwind }
375 !llvm.module.flags = !{!0}
378 !0 = !{i32 1, !"wchar_size", i32 4}
379 !1 = !{!"clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"}
380 !2 = distinct !{!2, !3, !"polly.alias.scope.MemRef_A"}
381 !3 = distinct !{!3, !"polly.alias.scope.domain"}
383 !5 = distinct !{!5, !3, !"polly.alias.scope.MemRef_B"}
385 !7 = distinct !{!7, !8, !"polly.alias.scope.MemRef_C"}
386 !8 = distinct !{!8, !"polly.alias.scope.domain"}
388 !10 = distinct !{!10, !8, !"polly.alias.scope.MemRef_A"}
389 !11 = distinct !{!11, !8, !"polly.alias.scope.MemRef_B"}