3 .section .rodata.cst8,"aM",@progbits,8
4 .p2align 3 # -- Begin function init_array
6 .quad 4602678819172646912 # double 0.5
10 .type init_array,@function
11 init_array
: # @init_array
15 .cfi_def_cfa_offset 16
18 .cfi_def_cfa_register %rbp
22 movsd
.LCPI0_0(%rip), %xmm0 # xmm0 = mem[0],zero
25 .LBB0_1: # %polly.loop_header
26 # =>This Loop Header: Depth=1
27 # Child Loop BB0_2 Depth 2
31 .LBB0_2: # %polly.loop_header1
32 # Parent Loop BB0_1 Depth=1
33 # => This Inner Loop Header: Depth=2
35 andl $
1022, %esi
# imm = 0x3FE
41 movss
%xmm1
, -4(%rcx
,%rdi
,4)
42 movss
%xmm1
, -4(%rax
,%rdi
,4)
44 andl $
1023, %esi
# imm = 0x3FF
50 movss
%xmm1
, (%rcx
,%rdi
,4)
51 movss
%xmm1
, (%rax
,%rdi
,4)
54 cmpq $
1537, %rdi
# imm = 0x601
56 # %bb.3: # %polly.loop_exit3
57 # in Loop: Header=BB0_1 Depth=1
59 addq $
6144, %rax
# imm = 0x1800
60 addq $
6144, %rcx
# imm = 0x1800
62 cmpq $
1536, %r9 # imm = 0x600
64 # %bb.4: # %polly.exiting
69 .size init_array, .Lfunc_end0-init_array
72 .globl print_array # -- Begin function print_array
74 .type print_array,@function
75 print_array
: # @print_array
79 .cfi_def_cfa_offset 16
82 .cfi_def_cfa_register %rbp
96 movl $
3435973837, %r12d
# imm = 0xCCCCCCCD
97 leaq
.L.str(%rip), %r14
99 .LBB1_1: # %for.cond1.preheader
100 # =>This Loop Header: Depth=1
101 # Child Loop BB1_2 Depth 2
102 movq
%rax
, -48(%rbp
) # 8-byte Spill
103 movq stdout
(%rip
), %rsi
106 .LBB1_2: # %for.body3
107 # Parent Loop BB1_1 Depth=1
108 # => This Inner Loop Header: Depth=2
112 leal
(%rax
,%rax
,4), %r15d
115 movss
(%r13,%rbx
,4), %xmm0
# xmm0 = mem[0],zero,zero,zero
116 cvtss2sd
%xmm0
, %xmm0
124 # in Loop: Header=BB1_2 Depth=2
125 movq stdout
(%rip
), %rsi
129 # in Loop: Header=BB1_2 Depth=2
131 movq stdout
(%rip
), %rsi
132 cmpq $
1536, %rbx
# imm = 0x600
135 # in Loop: Header=BB1_1 Depth=1
138 movq
-48(%rbp
), %rax
# 8-byte Reload
140 addq $
6144, %r13 # imm = 0x1800
141 cmpq $
1536, %rax
# imm = 0x600
143 # %bb.6: # %for.end12
154 .size print_array, .Lfunc_end1-print_array
157 .globl main # -- Begin function main
164 .cfi_def_cfa_offset 16
165 .cfi_offset %rbp, -16
167 .cfi_def_cfa_register %rbp
173 subq $
264, %rsp
# imm = 0x108
174 .cfi_offset %rbx, -56
175 .cfi_offset %r12, -48
176 .cfi_offset %r13, -40
177 .cfi_offset %r14, -32
178 .cfi_offset %r15, -24
182 movq
%rax
, -48(%rbp
) # 8-byte Spill
184 movl $
9437184, %edx
# imm = 0x900000
187 movq
%rax
, -80(%rbp
) # 8-byte Spill
189 movq
%rax
, -72(%rbp
) # 8-byte Spill
191 .LBB2_1: # %polly.loop_header8
192 # =>This Loop Header: Depth=1
193 # Child Loop BB2_2 Depth 2
194 # Child Loop BB2_3 Depth 3
195 # Child Loop BB2_4 Depth 4
196 # Child Loop BB2_5 Depth 5
197 leaq B+
192(%rip
), %r9
201 .LBB2_2: # %polly.loop_header14
202 # Parent Loop BB2_1 Depth=1
203 # => This Loop Header: Depth=2
204 # Child Loop BB2_3 Depth 3
205 # Child Loop BB2_4 Depth 4
206 # Child Loop BB2_5 Depth 5
207 movq
%rax
, -168(%rbp
) # 8-byte Spill
208 movq
%rdi
, -176(%rbp
) # 8-byte Spill
213 movq
-72(%rbp
), %r12 # 8-byte Reload
214 movq
%r9, -184(%rbp
) # 8-byte Spill
217 .LBB2_3: # %polly.loop_header20
218 # Parent Loop BB2_1 Depth=1
219 # Parent Loop BB2_2 Depth=2
220 # => This Loop Header: Depth=3
221 # Child Loop BB2_4 Depth 4
222 # Child Loop BB2_5 Depth 5
223 movq
%rax
, -192(%rbp
) # 8-byte Spill
224 movq
%r12, -200(%rbp
) # 8-byte Spill
225 movq
-48(%rbp
), %r14 # 8-byte Reload
227 .LBB2_4: # %polly.loop_header26
228 # Parent Loop BB2_1 Depth=1
229 # Parent Loop BB2_2 Depth=2
230 # Parent Loop BB2_3 Depth=3
231 # => This Loop Header: Depth=4
232 # Child Loop BB2_5 Depth 5
233 leaq
(%r14,%r14,2), %rbx
237 leaq
(%rbx
,%rdi
,4), %r8
238 leaq
(%rbx
,%rdx
,4), %r15
239 leaq
(%rbx
,%rsi
,4), %r10
240 leaq
(%rbx
,%rcx
,4), %r11
241 movups
(%rbx
,%rdi
,4), %xmm8
242 movups
16(%rbx
,%rdi
,4), %xmm0
243 movaps
%xmm0
, -144(%rbp
) # 16-byte Spill
244 movups
32(%rbx
,%rdi
,4), %xmm6
245 movups
48(%rbx
,%rdi
,4), %xmm1
246 movups
(%rbx
,%rdx
,4), %xmm15
247 movups
16(%rbx
,%rdx
,4), %xmm0
248 movaps
%xmm0
, -64(%rbp
) # 16-byte Spill
249 movups
32(%rbx
,%rdx
,4), %xmm0
250 movaps
%xmm0
, -96(%rbp
) # 16-byte Spill
251 movups
48(%rbx
,%rdx
,4), %xmm0
252 movaps
%xmm0
, -112(%rbp
) # 16-byte Spill
253 movups
(%rbx
,%rsi
,4), %xmm11
254 movups
16(%rbx
,%rsi
,4), %xmm0
255 movaps
%xmm0
, -160(%rbp
) # 16-byte Spill
256 movups
32(%rbx
,%rsi
,4), %xmm12
257 movups
48(%rbx
,%rsi
,4), %xmm0
258 movaps
%xmm0
, -128(%rbp
) # 16-byte Spill
259 movups
(%rbx
,%rcx
,4), %xmm9
260 movups
16(%rbx
,%rcx
,4), %xmm13
261 movups
32(%rbx
,%rcx
,4), %xmm2
262 movups
48(%rbx
,%rcx
,4), %xmm3
266 .LBB2_5: # %vector.ph
267 # Parent Loop BB2_1 Depth=1
268 # Parent Loop BB2_2 Depth=2
269 # Parent Loop BB2_3 Depth=3
270 # Parent Loop BB2_4 Depth=4
271 # => This Inner Loop Header: Depth=5
272 movaps
%xmm12
, -240(%rbp
) # 16-byte Spill
273 movaps
%xmm2
, -256(%rbp
) # 16-byte Spill
274 movaps
%xmm3
, -272(%rbp
) # 16-byte Spill
276 movaps
-144(%rbp
), %xmm7
# 16-byte Reload
277 unpcklps
%xmm7
, %xmm10
# xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1]
279 shufps $
0, %xmm6
, %xmm4
# xmm4 = xmm4[0,0],xmm6[0,0]
280 shufps $
36, %xmm4
, %xmm10
# xmm10 = xmm10[0,1],xmm4[2,0]
282 shufps $
17, %xmm8
, %xmm5
# xmm5 = xmm5[1,0],xmm8[1,0]
284 unpcklps
%xmm1
, %xmm4
# xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
285 shufps $
226, %xmm4
, %xmm5
# xmm5 = xmm5[2,0],xmm4[2,3]
287 unpckhps
%xmm7
, %xmm12
# xmm12 = xmm12[2],xmm7[2],xmm12[3],xmm7[3]
289 shufps $
34, %xmm6
, %xmm4
# xmm4 = xmm4[2,0],xmm6[2,0]
290 shufps $
36, %xmm4
, %xmm12
# xmm12 = xmm12[0,1],xmm4[2,0]
291 shufps $
51, %xmm8
, %xmm7
# xmm7 = xmm7[3,0],xmm8[3,0]
292 unpckhps
%xmm1
, %xmm6
# xmm6 = xmm6[2],xmm1[2],xmm6[3],xmm1[3]
293 shufps $
226, %xmm6
, %xmm7
# xmm7 = xmm7[2,0],xmm6[2,3]
294 movaps
-160(%rbx
), %xmm0
295 movaps
-144(%rbx
), %xmm1
297 shufps $
0, %xmm0
, %xmm6
# xmm6 = xmm6[0,0],xmm0[0,0]
298 movaps
-192(%rbx
), %xmm3
299 movaps
-176(%rbx
), %xmm4
301 unpcklps
%xmm4
, %xmm8
# xmm8 = xmm8[0],xmm4[0],xmm8[1],xmm4[1]
302 shufps $
36, %xmm6
, %xmm8
# xmm8 = xmm8[0,1],xmm6[2,0]
304 unpcklps
%xmm1
, %xmm2
# xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
306 shufps $
17, %xmm3
, %xmm6
# xmm6 = xmm6[1,0],xmm3[1,0]
307 shufps $
226, %xmm2
, %xmm6
# xmm6 = xmm6[2,0],xmm2[2,3]
309 shufps $
34, %xmm0
, %xmm2
# xmm2 = xmm2[2,0],xmm0[2,0]
311 unpckhps
%xmm4
, %xmm14
# xmm14 = xmm14[2],xmm4[2],xmm14[3],xmm4[3]
312 shufps $
36, %xmm2
, %xmm14
# xmm14 = xmm14[0,1],xmm2[2,0]
313 unpckhps
%xmm1
, %xmm0
# xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
314 shufps $
51, %xmm3
, %xmm4
# xmm4 = xmm4[3,0],xmm3[3,0]
315 shufps $
226, %xmm0
, %xmm4
# xmm4 = xmm4[2,0],xmm0[2,3]
316 movss
(%r12,%r13,4), %xmm0
# xmm0 = mem[0],zero,zero,zero
317 shufps $
0, %xmm0
, %xmm0
# xmm0 = xmm0[0,0,0,0]
328 unpckhps
%xmm4
, %xmm0
# xmm0 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
330 shufps $
51, %xmm8
, %xmm1
# xmm1 = xmm1[3,0],xmm8[3,0]
331 shufps $
226, %xmm0
, %xmm1
# xmm1 = xmm1[2,0],xmm0[2,3]
332 movaps
%xmm1
, -304(%rbp
) # 16-byte Spill
334 shufps $
34, %xmm14
, %xmm0
# xmm0 = xmm0[2,0],xmm14[2,0]
336 unpckhps
%xmm6
, %xmm1
# xmm1 = xmm1[2],xmm6[2],xmm1[3],xmm6[3]
337 shufps $
36, %xmm0
, %xmm1
# xmm1 = xmm1[0,1],xmm0[2,0]
338 movaps
%xmm1
, -288(%rbp
) # 16-byte Spill
340 unpcklps
%xmm4
, %xmm0
# xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
342 shufps $
17, %xmm8
, %xmm1
# xmm1 = xmm1[1,0],xmm8[1,0]
343 shufps $
226, %xmm0
, %xmm1
# xmm1 = xmm1[2,0],xmm0[2,3]
344 movaps
%xmm1
, -144(%rbp
) # 16-byte Spill
345 shufps $
0, %xmm14
, %xmm4
# xmm4 = xmm4[0,0],xmm14[0,0]
346 unpcklps
%xmm6
, %xmm8
# xmm8 = xmm8[0],xmm6[0],xmm8[1],xmm6[1]
347 shufps $
36, %xmm4
, %xmm8
# xmm8 = xmm8[0,1],xmm4[2,0]
348 movaps
%xmm15
, %xmm14
349 movaps
-64(%rbp
), %xmm4
# 16-byte Reload
350 unpcklps
%xmm4
, %xmm14
# xmm14 = xmm14[0],xmm4[0],xmm14[1],xmm4[1]
351 movaps
-112(%rbp
), %xmm1
# 16-byte Reload
353 movaps
-96(%rbp
), %xmm3
# 16-byte Reload
354 shufps $
0, %xmm3
, %xmm0
# xmm0 = xmm0[0,0],xmm3[0,0]
355 shufps $
36, %xmm0
, %xmm14
# xmm14 = xmm14[0,1],xmm0[2,0]
357 shufps $
17, %xmm15
, %xmm12
# xmm12 = xmm12[1,0],xmm15[1,0]
359 unpcklps
%xmm1
, %xmm2
# xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
360 shufps $
226, %xmm2
, %xmm12
# xmm12 = xmm12[2,0],xmm2[2,3]
362 unpckhps
%xmm4
, %xmm7
# xmm7 = xmm7[2],xmm4[2],xmm7[3],xmm4[3]
364 shufps $
34, %xmm3
, %xmm2
# xmm2 = xmm2[2,0],xmm3[2,0]
365 shufps $
36, %xmm2
, %xmm7
# xmm7 = xmm7[0,1],xmm2[2,0]
366 shufps $
51, %xmm15
, %xmm4
# xmm4 = xmm4[3,0],xmm15[3,0]
367 unpckhps
%xmm1
, %xmm3
# xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
368 shufps $
226, %xmm3
, %xmm4
# xmm4 = xmm4[2,0],xmm3[2,3]
369 movaps
%xmm4
, -64(%rbp
) # 16-byte Spill
370 movaps
-96(%rbx
), %xmm2
371 movaps
-80(%rbx
), %xmm1
373 shufps $
0, %xmm2
, %xmm4
# xmm4 = xmm4[0,0],xmm2[0,0]
374 movaps
-112(%rbx
), %xmm10
375 movaps
-128(%rbx
), %xmm0
377 unpcklps
%xmm10
, %xmm15
# xmm15 = xmm15[0],xmm10[0],xmm15[1],xmm10[1]
378 shufps $
36, %xmm4
, %xmm15
# xmm15 = xmm15[0,1],xmm4[2,0]
380 unpcklps
%xmm1
, %xmm4
# xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
382 shufps $
17, %xmm0
, %xmm6
# xmm6 = xmm6[1,0],xmm0[1,0]
383 shufps $
226, %xmm4
, %xmm6
# xmm6 = xmm6[2,0],xmm4[2,3]
385 shufps $
34, %xmm2
, %xmm3
# xmm3 = xmm3[2,0],xmm2[2,0]
387 unpckhps
%xmm10
, %xmm4
# xmm4 = xmm4[2],xmm10[2],xmm4[3],xmm10[3]
388 shufps $
36, %xmm3
, %xmm4
# xmm4 = xmm4[0,1],xmm3[2,0]
389 unpckhps
%xmm1
, %xmm2
# xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
390 shufps $
51, %xmm0
, %xmm10
# xmm10 = xmm10[3,0],xmm0[3,0]
391 shufps $
226, %xmm2
, %xmm10
# xmm10 = xmm10[2,0],xmm2[2,3]
392 movaps
%xmm5
, -224(%rbp
) # 16-byte Spill
400 addps
-64(%rbp
), %xmm10
# 16-byte Folded Reload
402 unpckhps
%xmm10
, %xmm0
# xmm0 = xmm0[2],xmm10[2],xmm0[3],xmm10[3]
404 shufps $
51, %xmm15
, %xmm1
# xmm1 = xmm1[3,0],xmm15[3,0]
405 shufps $
226, %xmm0
, %xmm1
# xmm1 = xmm1[2,0],xmm0[2,3]
406 movaps
%xmm1
, -112(%rbp
) # 16-byte Spill
408 shufps $
34, %xmm4
, %xmm0
# xmm0 = xmm0[2,0],xmm4[2,0]
410 unpckhps
%xmm6
, %xmm1
# xmm1 = xmm1[2],xmm6[2],xmm1[3],xmm6[3]
411 shufps $
36, %xmm0
, %xmm1
# xmm1 = xmm1[0,1],xmm0[2,0]
412 movaps
%xmm1
, -96(%rbp
) # 16-byte Spill
414 unpcklps
%xmm10
, %xmm0
# xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1]
416 shufps $
17, %xmm15
, %xmm1
# xmm1 = xmm1[1,0],xmm15[1,0]
417 shufps $
226, %xmm0
, %xmm1
# xmm1 = xmm1[2,0],xmm0[2,3]
418 movaps
%xmm1
, -64(%rbp
) # 16-byte Spill
419 shufps $
0, %xmm4
, %xmm10
# xmm10 = xmm10[0,0],xmm4[0,0]
420 unpcklps
%xmm6
, %xmm15
# xmm15 = xmm15[0],xmm6[0],xmm15[1],xmm6[1]
421 shufps $
36, %xmm10
, %xmm15
# xmm15 = xmm15[0,1],xmm10[2,0]
422 movaps
%xmm11
, %xmm10
423 movaps
-160(%rbp
), %xmm14
# 16-byte Reload
424 unpcklps
%xmm14
, %xmm10
# xmm10 = xmm10[0],xmm14[0],xmm10[1],xmm14[1]
425 movaps
-128(%rbp
), %xmm2
# 16-byte Reload
427 movaps
-240(%rbp
), %xmm3
# 16-byte Reload
428 shufps $
0, %xmm3
, %xmm0
# xmm0 = xmm0[0,0],xmm3[0,0]
429 shufps $
36, %xmm0
, %xmm10
# xmm10 = xmm10[0,1],xmm0[2,0]
430 movaps
%xmm14
, %xmm12
431 shufps $
17, %xmm11
, %xmm12
# xmm12 = xmm12[1,0],xmm11[1,0]
433 unpcklps
%xmm2
, %xmm0
# xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
434 shufps $
226, %xmm0
, %xmm12
# xmm12 = xmm12[2,0],xmm0[2,3]
436 unpckhps
%xmm14
, %xmm0
# xmm0 = xmm0[2],xmm14[2],xmm0[3],xmm14[3]
438 shufps $
34, %xmm3
, %xmm1
# xmm1 = xmm1[2,0],xmm3[2,0]
439 shufps $
36, %xmm1
, %xmm0
# xmm0 = xmm0[0,1],xmm1[2,0]
440 shufps $
51, %xmm11
, %xmm14
# xmm14 = xmm14[3,0],xmm11[3,0]
441 unpckhps
%xmm2
, %xmm3
# xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
442 shufps $
226, %xmm3
, %xmm14
# xmm14 = xmm14[2,0],xmm3[2,3]
443 movaps
-32(%rbx
), %xmm1
444 movaps
-16(%rbx
), %xmm2
446 shufps $
0, %xmm1
, %xmm3
# xmm3 = xmm3[0,0],xmm1[0,0]
447 movaps
-48(%rbx
), %xmm4
448 movaps
-64(%rbx
), %xmm5
450 unpcklps
%xmm4
, %xmm11
# xmm11 = xmm11[0],xmm4[0],xmm11[1],xmm4[1]
451 shufps $
36, %xmm3
, %xmm11
# xmm11 = xmm11[0,1],xmm3[2,0]
453 unpcklps
%xmm2
, %xmm3
# xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
455 shufps $
17, %xmm5
, %xmm7
# xmm7 = xmm7[1,0],xmm5[1,0]
456 shufps $
226, %xmm3
, %xmm7
# xmm7 = xmm7[2,0],xmm3[2,3]
458 shufps $
34, %xmm1
, %xmm3
# xmm3 = xmm3[2,0],xmm1[2,0]
460 unpckhps
%xmm4
, %xmm6
# xmm6 = xmm6[2],xmm4[2],xmm6[3],xmm4[3]
461 shufps $
36, %xmm3
, %xmm6
# xmm6 = xmm6[0,1],xmm3[2,0]
462 unpckhps
%xmm2
, %xmm1
# xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
463 shufps $
51, %xmm5
, %xmm4
# xmm4 = xmm4[3,0],xmm5[3,0]
464 shufps $
226, %xmm1
, %xmm4
# xmm4 = xmm4[2,0],xmm1[2,3]
465 movaps
-224(%rbp
), %xmm1
# 16-byte Reload
475 unpckhps
%xmm4
, %xmm0
# xmm0 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
477 shufps $
51, %xmm11
, %xmm1
# xmm1 = xmm1[3,0],xmm11[3,0]
478 shufps $
226, %xmm0
, %xmm1
# xmm1 = xmm1[2,0],xmm0[2,3]
479 movaps
%xmm1
, -128(%rbp
) # 16-byte Spill
481 shufps $
34, %xmm6
, %xmm0
# xmm0 = xmm0[2,0],xmm6[2,0]
482 movaps
%xmm11
, %xmm12
483 unpckhps
%xmm7
, %xmm12
# xmm12 = xmm12[2],xmm7[2],xmm12[3],xmm7[3]
484 shufps $
36, %xmm0
, %xmm12
# xmm12 = xmm12[0,1],xmm0[2,0]
486 unpcklps
%xmm4
, %xmm0
# xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
488 shufps $
17, %xmm11
, %xmm1
# xmm1 = xmm1[1,0],xmm11[1,0]
489 shufps $
226, %xmm0
, %xmm1
# xmm1 = xmm1[2,0],xmm0[2,3]
490 movaps
%xmm1
, -160(%rbp
) # 16-byte Spill
491 shufps $
0, %xmm6
, %xmm4
# xmm4 = xmm4[0,0],xmm6[0,0]
492 unpcklps
%xmm7
, %xmm11
# xmm11 = xmm11[0],xmm7[0],xmm11[1],xmm7[1]
493 shufps $
36, %xmm4
, %xmm11
# xmm11 = xmm11[0,1],xmm4[2,0]
495 unpcklps
%xmm13
, %xmm10
# xmm10 = xmm10[0],xmm13[0],xmm10[1],xmm13[1]
496 movaps
-272(%rbp
), %xmm2
# 16-byte Reload
498 movaps
-256(%rbp
), %xmm3
# 16-byte Reload
499 shufps $
0, %xmm3
, %xmm0
# xmm0 = xmm0[0,0],xmm3[0,0]
500 shufps $
36, %xmm0
, %xmm10
# xmm10 = xmm10[0,1],xmm0[2,0]
501 movaps
%xmm13
, %xmm14
502 shufps $
17, %xmm9
, %xmm14
# xmm14 = xmm14[1,0],xmm9[1,0]
504 unpcklps
%xmm2
, %xmm0
# xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
505 shufps $
226, %xmm0
, %xmm14
# xmm14 = xmm14[2,0],xmm0[2,3]
507 unpckhps
%xmm13
, %xmm0
# xmm0 = xmm0[2],xmm13[2],xmm0[3],xmm13[3]
509 shufps $
34, %xmm3
, %xmm1
# xmm1 = xmm1[2,0],xmm3[2,0]
510 shufps $
36, %xmm1
, %xmm0
# xmm0 = xmm0[0,1],xmm1[2,0]
511 shufps $
51, %xmm9
, %xmm13
# xmm13 = xmm13[3,0],xmm9[3,0]
512 unpckhps
%xmm2
, %xmm3
# xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
513 shufps $
226, %xmm3
, %xmm13
# xmm13 = xmm13[2,0],xmm3[2,3]
514 movaps
32(%rbx
), %xmm1
515 movaps
48(%rbx
), %xmm2
517 shufps $
0, %xmm1
, %xmm3
# xmm3 = xmm3[0,0],xmm1[0,0]
518 movaps
16(%rbx
), %xmm4
521 unpcklps
%xmm4
, %xmm9
# xmm9 = xmm9[0],xmm4[0],xmm9[1],xmm4[1]
522 shufps $
36, %xmm3
, %xmm9
# xmm9 = xmm9[0,1],xmm3[2,0]
524 unpcklps
%xmm2
, %xmm3
# xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
526 shufps $
17, %xmm5
, %xmm7
# xmm7 = xmm7[1,0],xmm5[1,0]
527 shufps $
226, %xmm3
, %xmm7
# xmm7 = xmm7[2,0],xmm3[2,3]
529 shufps $
34, %xmm1
, %xmm3
# xmm3 = xmm3[2,0],xmm1[2,0]
531 unpckhps
%xmm4
, %xmm6
# xmm6 = xmm6[2],xmm4[2],xmm6[3],xmm4[3]
532 shufps $
36, %xmm3
, %xmm6
# xmm6 = xmm6[0,1],xmm3[2,0]
533 unpckhps
%xmm2
, %xmm1
# xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
534 shufps $
51, %xmm5
, %xmm4
# xmm4 = xmm4[3,0],xmm5[3,0]
535 shufps $
226, %xmm1
, %xmm4
# xmm4 = xmm4[2,0],xmm1[2,3]
536 movaps
-224(%rbp
), %xmm1
# 16-byte Reload
546 unpckhps
%xmm4
, %xmm0
# xmm0 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
548 shufps $
51, %xmm9
, %xmm3
# xmm3 = xmm3[3,0],xmm9[3,0]
549 shufps $
226, %xmm0
, %xmm3
# xmm3 = xmm3[2,0],xmm0[2,3]
551 shufps $
34, %xmm6
, %xmm0
# xmm0 = xmm0[2,0],xmm6[2,0]
553 unpckhps
%xmm7
, %xmm2
# xmm2 = xmm2[2],xmm7[2],xmm2[3],xmm7[3]
554 shufps $
36, %xmm0
, %xmm2
# xmm2 = xmm2[0,1],xmm0[2,0]
556 unpcklps
%xmm4
, %xmm0
# xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
558 shufps $
17, %xmm9
, %xmm13
# xmm13 = xmm13[1,0],xmm9[1,0]
559 shufps $
226, %xmm0
, %xmm13
# xmm13 = xmm13[2,0],xmm0[2,3]
560 shufps $
0, %xmm6
, %xmm4
# xmm4 = xmm4[0,0],xmm6[0,0]
561 movaps
-288(%rbp
), %xmm6
# 16-byte Reload
562 movaps
-304(%rbp
), %xmm1
# 16-byte Reload
563 unpcklps
%xmm7
, %xmm9
# xmm9 = xmm9[0],xmm7[0],xmm9[1],xmm7[1]
564 shufps $
36, %xmm4
, %xmm9
# xmm9 = xmm9[0,1],xmm4[2,0]
566 addq $
6144, %rbx
# imm = 0x1800
569 # %bb.6: # %polly.loop_exit34
570 # in Loop: Header=BB2_4 Depth=4
572 movaps
-144(%rbp
), %xmm0
# 16-byte Reload
573 movups
%xmm0
, 16(%r8)
574 movups
%xmm6
, 32(%r8)
575 movups
%xmm1
, 48(%r8)
576 movaps
-112(%rbp
), %xmm0
# 16-byte Reload
577 movups
%xmm0
, 48(%r15)
578 movaps
-96(%rbp
), %xmm0
# 16-byte Reload
579 movups
%xmm0
, 32(%r15)
580 movaps
-64(%rbp
), %xmm0
# 16-byte Reload
581 movups
%xmm0
, 16(%r15)
582 movups
%xmm15
, (%r15)
583 movaps
-128(%rbp
), %xmm0
# 16-byte Reload
584 movups
%xmm0
, 48(%r10)
585 movaps
-160(%rbp
), %xmm0
# 16-byte Reload
586 movups
%xmm0
, 16(%r10)
587 movups
%xmm11
, (%r10)
588 movups
%xmm12
, 32(%r10)
589 movups
%xmm3
, 48(%r11)
590 movups
%xmm13
, 16(%r11)
592 movups
%xmm2
, 32(%r11)
594 addq $
6144, %r12 # imm = 0x1800
595 cmpq
-80(%rbp
), %r14 # 8-byte Folded Reload
597 # %bb.7: # %polly.loop_exit28
598 # in Loop: Header=BB2_3 Depth=3
599 movq
-192(%rbp
), %rax
# 8-byte Reload
601 addq $
393216, %r9 # imm = 0x60000
602 movq
-200(%rbp
), %r12 # 8-byte Reload
603 addq $
256, %r12 # imm = 0x100
604 cmpq $
1536, %rax
# imm = 0x600
606 # %bb.8: # %polly.loop_exit22
607 # in Loop: Header=BB2_2 Depth=2
608 movq
-168(%rbp
), %rax
# 8-byte Reload
610 movq
-176(%rbp
), %rdi
# 8-byte Reload
612 movq
-184(%rbp
), %r9 # 8-byte Reload
613 addq $
256, %r9 # imm = 0x100
614 cmpq $
1536, %rax
# imm = 0x600
616 # %bb.9: # %polly.loop_exit16
617 # in Loop: Header=BB2_1 Depth=1
618 movq
-48(%rbp
), %rax
# 8-byte Reload
621 addq $
64, -80(%rbp
) # 8-byte Folded Spill
622 addq $
393216, -72(%rbp
) # 8-byte Folded Spill
625 movq
%rcx
, -48(%rbp
) # 8-byte Spill
626 cmpq $
1536, %rcx
# imm = 0x600
628 # %bb.10: # %polly.exiting
630 addq $
264, %rsp
# imm = 0x108
640 .size main, .Lfunc_end2-main
647 .type .L.str,@object # @.str
648 .section .rodata.str1.1,"aMS",@progbits,1
656 .ident "clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"
657 .section ".note.GNU-stack","",@progbits