2 source_filename = "matmul.c"
3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4 target triple = "x86_64-unknown-linux-gnu"
6 %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
7 %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
9 @A = common dso_local local_unnamed_addr global [1536 x [1536 x float]] zeroinitializer, align 16
10 @B = common dso_local local_unnamed_addr global [1536 x [1536 x float]] zeroinitializer, align 16
11 @stdout = external dso_local local_unnamed_addr global %struct._IO_FILE*, align 8
12 @.str = private unnamed_addr constant [5 x i8] c"%lf \00", align 1
13 @C = common dso_local local_unnamed_addr global [1536 x [1536 x float]] zeroinitializer, align 16
15 ; Function Attrs: noinline norecurse nounwind uwtable writeonly
16 define dso_local void @init_array() local_unnamed_addr #0 {
18 br label %polly.loop_header
20 polly.exiting: ; preds = %polly.loop_exit3
23 polly.loop_header: ; preds = %polly.loop_exit3, %entry
24 %polly.indvar = phi i64 [ 0, %entry ], [ %polly.indvar_next, %polly.loop_exit3 ]
25 %0 = trunc i64 %polly.indvar to i32
26 br label %polly.loop_header1
28 polly.loop_exit3: ; preds = %polly.loop_header1
29 %polly.indvar_next = add nuw nsw i64 %polly.indvar, 1
30 %exitcond1 = icmp eq i64 %polly.indvar_next, 1536
31 br i1 %exitcond1, label %polly.exiting, label %polly.loop_header, !llvm.loop !2
33 polly.loop_header1: ; preds = %polly.loop_header1, %polly.loop_header
34 %polly.indvar4 = phi i64 [ 0, %polly.loop_header ], [ %polly.indvar_next5.1, %polly.loop_header1 ]
35 %1 = trunc i64 %polly.indvar4 to i32
36 %2 = mul nuw nsw i32 %1, %0
39 %p_conv = sitofp i32 %4 to double
40 %p_div = fmul double %p_conv, 5.000000e-01
41 %p_conv4 = fptrunc double %p_div to float
42 %scevgep7 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %polly.indvar, i64 %polly.indvar4
43 store float %p_conv4, float* %scevgep7, align 8, !alias.scope !3, !noalias !5, !llvm.mem.parallel_loop_access !2
44 %scevgep9 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %polly.indvar, i64 %polly.indvar4
45 store float %p_conv4, float* %scevgep9, align 8, !alias.scope !6, !noalias !7, !llvm.mem.parallel_loop_access !2
46 %polly.indvar_next5 = or i64 %polly.indvar4, 1
47 %5 = trunc i64 %polly.indvar_next5 to i32
48 %6 = mul nuw nsw i32 %5, %0
50 %8 = add nuw nsw i32 %7, 1
51 %p_conv.1 = sitofp i32 %8 to double
52 %p_div.1 = fmul double %p_conv.1, 5.000000e-01
53 %p_conv4.1 = fptrunc double %p_div.1 to float
54 %scevgep7.1 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %polly.indvar, i64 %polly.indvar_next5
55 store float %p_conv4.1, float* %scevgep7.1, align 4, !alias.scope !3, !noalias !5, !llvm.mem.parallel_loop_access !2
56 %scevgep9.1 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %polly.indvar, i64 %polly.indvar_next5
57 store float %p_conv4.1, float* %scevgep9.1, align 4, !alias.scope !6, !noalias !7, !llvm.mem.parallel_loop_access !2
58 %polly.indvar_next5.1 = add nuw nsw i64 %polly.indvar4, 2
59 %exitcond.1 = icmp eq i64 %polly.indvar_next5.1, 1536
60 br i1 %exitcond.1, label %polly.loop_exit3, label %polly.loop_header1
63 ; Function Attrs: noinline nounwind uwtable
64 define dso_local void @print_array() local_unnamed_addr #1 {
66 br label %for.cond1.preheader
68 for.cond1.preheader: ; preds = %for.end, %entry
69 %indvars.iv6 = phi i64 [ 0, %entry ], [ %indvars.iv.next7, %for.end ]
70 %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
73 for.body3: ; preds = %for.inc, %for.cond1.preheader
74 %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.inc ]
75 %1 = phi %struct._IO_FILE* [ %0, %for.cond1.preheader ], [ %5, %for.inc ]
76 %arrayidx5 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %indvars.iv6, i64 %indvars.iv
77 %2 = load float, float* %arrayidx5, align 4
78 %conv = fpext float %2 to double
79 %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #4
80 %3 = trunc i64 %indvars.iv to i32
81 %rem = urem i32 %3, 80
82 %cmp6 = icmp eq i32 %rem, 79
83 br i1 %cmp6, label %if.then, label %for.inc
85 if.then: ; preds = %for.body3
86 %4 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
87 %fputc3 = tail call i32 @fputc(i32 10, %struct._IO_FILE* %4)
90 for.inc: ; preds = %if.then, %for.body3
91 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
92 %5 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
93 %exitcond = icmp eq i64 %indvars.iv.next, 1536
94 br i1 %exitcond, label %for.end, label %for.body3
96 for.end: ; preds = %for.inc
97 %fputc = tail call i32 @fputc(i32 10, %struct._IO_FILE* %5)
98 %indvars.iv.next7 = add nuw nsw i64 %indvars.iv6, 1
99 %exitcond8 = icmp eq i64 %indvars.iv.next7, 1536
100 br i1 %exitcond8, label %for.end12, label %for.cond1.preheader
102 for.end12: ; preds = %for.end
106 ; Function Attrs: nounwind
107 declare dso_local i32 @fprintf(%struct._IO_FILE* nocapture, i8* nocapture readonly, ...) local_unnamed_addr #2
109 ; Function Attrs: noinline norecurse nounwind uwtable
110 define dso_local i32 @main() local_unnamed_addr #3 {
112 tail call void @init_array()
113 call void @llvm.memset.p0i8.i64(i8* align 16 bitcast ([1536 x [1536 x float]]* @C to i8*), i8 0, i64 9437184, i1 false)
114 br label %polly.loop_header8
116 polly.exiting: ; preds = %polly.loop_exit16
119 polly.loop_header8: ; preds = %entry, %polly.loop_exit16
120 %indvars.iv5 = phi i64 [ 64, %entry ], [ %indvars.iv.next6, %polly.loop_exit16 ]
121 %polly.indvar11 = phi i64 [ 0, %entry ], [ %polly.indvar_next12, %polly.loop_exit16 ]
122 br label %polly.loop_header14
124 polly.loop_exit16: ; preds = %polly.loop_exit22
125 %polly.indvar_next12 = add nuw nsw i64 %polly.indvar11, 64
126 %polly.loop_cond13 = icmp ult i64 %polly.indvar_next12, 1536
127 %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 64
128 br i1 %polly.loop_cond13, label %polly.loop_header8, label %polly.exiting, !llvm.loop !8
130 polly.loop_header14: ; preds = %polly.loop_header8, %polly.loop_exit22
131 %indvar = phi i64 [ 0, %polly.loop_header8 ], [ %indvar.next, %polly.loop_exit22 ]
132 %polly.indvar17 = phi i64 [ 0, %polly.loop_header8 ], [ %polly.indvar_next18, %polly.loop_exit22 ]
133 %0 = shl i64 %indvar, 6
134 %offset.idx.1 = or i64 %0, 16
135 %offset.idx.2 = or i64 %0, 32
136 %offset.idx.3 = or i64 %0, 48
137 br label %polly.loop_header20
139 polly.loop_exit22: ; preds = %polly.loop_exit28
140 %polly.indvar_next18 = add nuw nsw i64 %polly.indvar17, 64
141 %polly.loop_cond19 = icmp ult i64 %polly.indvar_next18, 1536
142 %indvar.next = add i64 %indvar, 1
143 br i1 %polly.loop_cond19, label %polly.loop_header14, label %polly.loop_exit16
145 polly.loop_header20: ; preds = %polly.loop_header14, %polly.loop_exit28
146 %indvars.iv3 = phi i64 [ 64, %polly.loop_header14 ], [ %indvars.iv.next4, %polly.loop_exit28 ]
147 %polly.indvar23 = phi i64 [ 0, %polly.loop_header14 ], [ %polly.indvar_next24, %polly.loop_exit28 ]
148 br label %polly.loop_header26
150 polly.loop_exit28: ; preds = %polly.loop_exit34
151 %polly.indvar_next24 = add nuw nsw i64 %polly.indvar23, 64
152 %polly.loop_cond25 = icmp ult i64 %polly.indvar_next24, 1536
153 %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 64
154 br i1 %polly.loop_cond25, label %polly.loop_header20, label %polly.loop_exit22
156 polly.loop_header26: ; preds = %polly.loop_exit34, %polly.loop_header20
157 %polly.indvar29 = phi i64 [ %polly.indvar11, %polly.loop_header20 ], [ %polly.indvar_next30, %polly.loop_exit34 ]
158 %1 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %polly.indvar29, i64 %0
159 %2 = bitcast float* %1 to <16 x float>*
160 %3 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %polly.indvar29, i64 %offset.idx.1
161 %4 = bitcast float* %3 to <16 x float>*
162 %5 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %polly.indvar29, i64 %offset.idx.2
163 %6 = bitcast float* %5 to <16 x float>*
164 %7 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %polly.indvar29, i64 %offset.idx.3
165 %8 = bitcast float* %7 to <16 x float>*
166 %.promoted = load <16 x float>, <16 x float>* %2, align 4, !alias.scope !9, !noalias !11
167 %.promoted22 = load <16 x float>, <16 x float>* %4, align 4, !alias.scope !9, !noalias !11
168 %.promoted24 = load <16 x float>, <16 x float>* %6, align 4, !alias.scope !9, !noalias !11
169 %.promoted26 = load <16 x float>, <16 x float>* %8, align 4, !alias.scope !9, !noalias !11
172 polly.loop_exit34: ; preds = %vector.ph
173 store <16 x float> %interleaved.vec, <16 x float>* %2, align 4, !alias.scope !9, !noalias !11
174 store <16 x float> %interleaved.vec.1, <16 x float>* %4, align 4, !alias.scope !9, !noalias !11
175 store <16 x float> %interleaved.vec.2, <16 x float>* %6, align 4, !alias.scope !9, !noalias !11
176 store <16 x float> %interleaved.vec.3, <16 x float>* %8, align 4, !alias.scope !9, !noalias !11
177 %polly.indvar_next30 = add nuw nsw i64 %polly.indvar29, 1
178 %exitcond7 = icmp eq i64 %polly.indvar_next30, %indvars.iv5
179 br i1 %exitcond7, label %polly.loop_exit28, label %polly.loop_header26
181 vector.ph: ; preds = %polly.loop_header26, %vector.ph
182 %wide.vec.327 = phi <16 x float> [ %.promoted26, %polly.loop_header26 ], [ %interleaved.vec.3, %vector.ph ]
183 %wide.vec.225 = phi <16 x float> [ %.promoted24, %polly.loop_header26 ], [ %interleaved.vec.2, %vector.ph ]
184 %wide.vec.123 = phi <16 x float> [ %.promoted22, %polly.loop_header26 ], [ %interleaved.vec.1, %vector.ph ]
185 %wide.vec21 = phi <16 x float> [ %.promoted, %polly.loop_header26 ], [ %interleaved.vec, %vector.ph ]
186 %polly.indvar35 = phi i64 [ %polly.indvar23, %polly.loop_header26 ], [ %polly.indvar_next36, %vector.ph ]
187 %scevgep53 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %polly.indvar29, i64 %polly.indvar35
188 %_p_scalar_54 = load float, float* %scevgep53, align 4, !alias.scope !12, !noalias !14, !llvm.mem.parallel_loop_access !8
189 %broadcast.splatinsert19 = insertelement <4 x float> undef, float %_p_scalar_54, i32 0
190 %broadcast.splat20 = shufflevector <4 x float> %broadcast.splatinsert19, <4 x float> undef, <4 x i32> zeroinitializer
191 %strided.vec = shufflevector <16 x float> %wide.vec21, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
192 %strided.vec11 = shufflevector <16 x float> %wide.vec21, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
193 %strided.vec12 = shufflevector <16 x float> %wide.vec21, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
194 %strided.vec13 = shufflevector <16 x float> %wide.vec21, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
195 %9 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %polly.indvar35, i64 %0
196 %10 = bitcast float* %9 to <16 x float>*
197 %wide.vec14 = load <16 x float>, <16 x float>* %10, align 16, !alias.scope !13, !noalias !15
198 %strided.vec15 = shufflevector <16 x float> %wide.vec14, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
199 %strided.vec16 = shufflevector <16 x float> %wide.vec14, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
200 %strided.vec17 = shufflevector <16 x float> %wide.vec14, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
201 %strided.vec18 = shufflevector <16 x float> %wide.vec14, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
202 %11 = fmul <4 x float> %broadcast.splat20, %strided.vec15
203 %12 = fadd <4 x float> %strided.vec, %11
204 %13 = fmul <4 x float> %broadcast.splat20, %strided.vec16
205 %14 = fadd <4 x float> %strided.vec11, %13
206 %15 = fmul <4 x float> %broadcast.splat20, %strided.vec17
207 %16 = fadd <4 x float> %strided.vec12, %15
208 %17 = fmul <4 x float> %broadcast.splat20, %strided.vec18
209 %18 = fadd <4 x float> %strided.vec13, %17
210 %19 = shufflevector <4 x float> %12, <4 x float> %14, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
211 %20 = shufflevector <4 x float> %16, <4 x float> %18, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
212 %interleaved.vec = shufflevector <8 x float> %19, <8 x float> %20, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
213 %strided.vec.1 = shufflevector <16 x float> %wide.vec.123, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
214 %strided.vec11.1 = shufflevector <16 x float> %wide.vec.123, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
215 %strided.vec12.1 = shufflevector <16 x float> %wide.vec.123, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
216 %strided.vec13.1 = shufflevector <16 x float> %wide.vec.123, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
217 %21 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %polly.indvar35, i64 %offset.idx.1
218 %22 = bitcast float* %21 to <16 x float>*
219 %wide.vec14.1 = load <16 x float>, <16 x float>* %22, align 16, !alias.scope !13, !noalias !15
220 %strided.vec15.1 = shufflevector <16 x float> %wide.vec14.1, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
221 %strided.vec16.1 = shufflevector <16 x float> %wide.vec14.1, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
222 %strided.vec17.1 = shufflevector <16 x float> %wide.vec14.1, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
223 %strided.vec18.1 = shufflevector <16 x float> %wide.vec14.1, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
224 %23 = fmul <4 x float> %broadcast.splat20, %strided.vec15.1
225 %24 = fadd <4 x float> %strided.vec.1, %23
226 %25 = fmul <4 x float> %broadcast.splat20, %strided.vec16.1
227 %26 = fadd <4 x float> %strided.vec11.1, %25
228 %27 = fmul <4 x float> %broadcast.splat20, %strided.vec17.1
229 %28 = fadd <4 x float> %strided.vec12.1, %27
230 %29 = fmul <4 x float> %broadcast.splat20, %strided.vec18.1
231 %30 = fadd <4 x float> %strided.vec13.1, %29
232 %31 = shufflevector <4 x float> %24, <4 x float> %26, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
233 %32 = shufflevector <4 x float> %28, <4 x float> %30, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
234 %interleaved.vec.1 = shufflevector <8 x float> %31, <8 x float> %32, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
235 %strided.vec.2 = shufflevector <16 x float> %wide.vec.225, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
236 %strided.vec11.2 = shufflevector <16 x float> %wide.vec.225, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
237 %strided.vec12.2 = shufflevector <16 x float> %wide.vec.225, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
238 %strided.vec13.2 = shufflevector <16 x float> %wide.vec.225, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
239 %33 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %polly.indvar35, i64 %offset.idx.2
240 %34 = bitcast float* %33 to <16 x float>*
241 %wide.vec14.2 = load <16 x float>, <16 x float>* %34, align 16, !alias.scope !13, !noalias !15
242 %strided.vec15.2 = shufflevector <16 x float> %wide.vec14.2, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
243 %strided.vec16.2 = shufflevector <16 x float> %wide.vec14.2, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
244 %strided.vec17.2 = shufflevector <16 x float> %wide.vec14.2, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
245 %strided.vec18.2 = shufflevector <16 x float> %wide.vec14.2, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
246 %35 = fmul <4 x float> %broadcast.splat20, %strided.vec15.2
247 %36 = fadd <4 x float> %strided.vec.2, %35
248 %37 = fmul <4 x float> %broadcast.splat20, %strided.vec16.2
249 %38 = fadd <4 x float> %strided.vec11.2, %37
250 %39 = fmul <4 x float> %broadcast.splat20, %strided.vec17.2
251 %40 = fadd <4 x float> %strided.vec12.2, %39
252 %41 = fmul <4 x float> %broadcast.splat20, %strided.vec18.2
253 %42 = fadd <4 x float> %strided.vec13.2, %41
254 %43 = shufflevector <4 x float> %36, <4 x float> %38, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
255 %44 = shufflevector <4 x float> %40, <4 x float> %42, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
256 %interleaved.vec.2 = shufflevector <8 x float> %43, <8 x float> %44, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
257 %strided.vec.3 = shufflevector <16 x float> %wide.vec.327, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
258 %strided.vec11.3 = shufflevector <16 x float> %wide.vec.327, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
259 %strided.vec12.3 = shufflevector <16 x float> %wide.vec.327, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
260 %strided.vec13.3 = shufflevector <16 x float> %wide.vec.327, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
261 %45 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %polly.indvar35, i64 %offset.idx.3
262 %46 = bitcast float* %45 to <16 x float>*
263 %wide.vec14.3 = load <16 x float>, <16 x float>* %46, align 16, !alias.scope !13, !noalias !15
264 %strided.vec15.3 = shufflevector <16 x float> %wide.vec14.3, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
265 %strided.vec16.3 = shufflevector <16 x float> %wide.vec14.3, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
266 %strided.vec17.3 = shufflevector <16 x float> %wide.vec14.3, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
267 %strided.vec18.3 = shufflevector <16 x float> %wide.vec14.3, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
268 %47 = fmul <4 x float> %broadcast.splat20, %strided.vec15.3
269 %48 = fadd <4 x float> %strided.vec.3, %47
270 %49 = fmul <4 x float> %broadcast.splat20, %strided.vec16.3
271 %50 = fadd <4 x float> %strided.vec11.3, %49
272 %51 = fmul <4 x float> %broadcast.splat20, %strided.vec17.3
273 %52 = fadd <4 x float> %strided.vec12.3, %51
274 %53 = fmul <4 x float> %broadcast.splat20, %strided.vec18.3
275 %54 = fadd <4 x float> %strided.vec13.3, %53
276 %55 = shufflevector <4 x float> %48, <4 x float> %50, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
277 %56 = shufflevector <4 x float> %52, <4 x float> %54, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
278 %interleaved.vec.3 = shufflevector <8 x float> %55, <8 x float> %56, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
279 %polly.indvar_next36 = add nuw nsw i64 %polly.indvar35, 1
280 %exitcond = icmp eq i64 %polly.indvar_next36, %indvars.iv3
281 br i1 %exitcond, label %polly.loop_exit34, label %vector.ph
284 ; Function Attrs: nounwind
285 declare i32 @fputc(i32, %struct._IO_FILE* nocapture) local_unnamed_addr #4
287 ; Function Attrs: argmemonly nounwind
288 declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #5
290 attributes #0 = { noinline norecurse nounwind uwtable writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "polly-optimized" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
291 attributes #1 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
292 attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
293 attributes #3 = { noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "polly-optimized" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
294 attributes #4 = { nounwind }
295 attributes #5 = { argmemonly nounwind }
297 !llvm.module.flags = !{!0}
300 !0 = !{i32 1, !"wchar_size", i32 4}
301 !1 = !{!"clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"}
303 !3 = distinct !{!3, !4, !"polly.alias.scope.MemRef_A"}
304 !4 = distinct !{!4, !"polly.alias.scope.domain"}
306 !6 = distinct !{!6, !4, !"polly.alias.scope.MemRef_B"}
309 !9 = distinct !{!9, !10, !"polly.alias.scope.MemRef_C"}
310 !10 = distinct !{!10, !"polly.alias.scope.domain"}
312 !12 = distinct !{!12, !10, !"polly.alias.scope.MemRef_A"}
313 !13 = distinct !{!13, !10, !"polly.alias.scope.MemRef_B"}