Bump version to 19.1.0 (final)
[llvm-project.git] / polly / docs / experiments / matmul / matmul.polly.interchanged.s
blob21770b0a917c1e96443b9e5d53a974e5079697ac
1 .text
2 .file "matmul.c"
3 .section .rodata.cst8,"aM",@progbits,8
4 .p2align 3 # -- Begin function init_array
5 .LCPI0_0:
6 .quad 4602678819172646912 # double 0.5
7 .text
8 .globl init_array
9 .p2align 4, 0x90
10 .type init_array,@function
11 init_array: # @init_array
12 .cfi_startproc
13 # %bb.0: # %entry
14 pushq %rbp
15 .cfi_def_cfa_offset 16
16 .cfi_offset %rbp, -16
17 movq %rsp, %rbp
18 .cfi_def_cfa_register %rbp
19 leaq B(%rip), %rax
20 leaq A(%rip), %rcx
21 xorl %r8d, %r8d
22 movsd .LCPI0_0(%rip), %xmm0 # xmm0 = mem[0],zero
23 xorl %r9d, %r9d
24 .p2align 4, 0x90
25 .LBB0_1: # %polly.loop_header
26 # =>This Loop Header: Depth=1
27 # Child Loop BB0_2 Depth 2
28 movl $1, %edi
29 xorl %edx, %edx
30 .p2align 4, 0x90
31 .LBB0_2: # %polly.loop_header1
32 # Parent Loop BB0_1 Depth=1
33 # => This Inner Loop Header: Depth=2
34 movl %edx, %esi
35 andl $1022, %esi # imm = 0x3FE
36 orl $1, %esi
37 xorps %xmm1, %xmm1
38 cvtsi2sdl %esi, %xmm1
39 mulsd %xmm0, %xmm1
40 cvtsd2ss %xmm1, %xmm1
41 movss %xmm1, -4(%rcx,%rdi,4)
42 movss %xmm1, -4(%rax,%rdi,4)
43 leal (%r9,%rdx), %esi
44 andl $1023, %esi # imm = 0x3FF
45 addl $1, %esi
46 xorps %xmm1, %xmm1
47 cvtsi2sdl %esi, %xmm1
48 mulsd %xmm0, %xmm1
49 cvtsd2ss %xmm1, %xmm1
50 movss %xmm1, (%rcx,%rdi,4)
51 movss %xmm1, (%rax,%rdi,4)
52 addq $2, %rdi
53 addl %r8d, %edx
54 cmpq $1537, %rdi # imm = 0x601
55 jne .LBB0_2
56 # %bb.3: # %polly.loop_exit3
57 # in Loop: Header=BB0_1 Depth=1
58 addq $1, %r9
59 addq $6144, %rax # imm = 0x1800
60 addq $6144, %rcx # imm = 0x1800
61 addl $2, %r8d
62 cmpq $1536, %r9 # imm = 0x600
63 jne .LBB0_1
64 # %bb.4: # %polly.exiting
65 popq %rbp
66 .cfi_def_cfa %rsp, 8
67 retq
68 .Lfunc_end0:
69 .size init_array, .Lfunc_end0-init_array
70 .cfi_endproc
71 # -- End function
72 .globl print_array # -- Begin function print_array
73 .p2align 4, 0x90
74 .type print_array,@function
75 print_array: # @print_array
76 .cfi_startproc
77 # %bb.0: # %entry
78 pushq %rbp
79 .cfi_def_cfa_offset 16
80 .cfi_offset %rbp, -16
81 movq %rsp, %rbp
82 .cfi_def_cfa_register %rbp
83 pushq %r15
84 pushq %r14
85 pushq %r13
86 pushq %r12
87 pushq %rbx
88 pushq %rax
89 .cfi_offset %rbx, -56
90 .cfi_offset %r12, -48
91 .cfi_offset %r13, -40
92 .cfi_offset %r14, -32
93 .cfi_offset %r15, -24
94 leaq C(%rip), %r13
95 xorl %eax, %eax
96 movl $3435973837, %r12d # imm = 0xCCCCCCCD
97 leaq .L.str(%rip), %r14
98 .p2align 4, 0x90
99 .LBB1_1: # %for.cond1.preheader
100 # =>This Loop Header: Depth=1
101 # Child Loop BB1_2 Depth 2
102 movq %rax, -48(%rbp) # 8-byte Spill
103 movq stdout(%rip), %rsi
104 xorl %ebx, %ebx
105 .p2align 4, 0x90
106 .LBB1_2: # %for.body3
107 # Parent Loop BB1_1 Depth=1
108 # => This Inner Loop Header: Depth=2
109 movl %ebx, %eax
110 imulq %r12, %rax
111 shrq $38, %rax
112 leal (%rax,%rax,4), %r15d
113 shll $4, %r15d
114 addl $79, %r15d
115 movss (%r13,%rbx,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
116 cvtss2sd %xmm0, %xmm0
117 movb $1, %al
118 movq %rsi, %rdi
119 movq %r14, %rsi
120 callq fprintf
121 cmpl %ebx, %r15d
122 jne .LBB1_4
123 # %bb.3: # %if.then
124 # in Loop: Header=BB1_2 Depth=2
125 movq stdout(%rip), %rsi
126 movl $10, %edi
127 callq fputc@PLT
128 .LBB1_4: # %for.inc
129 # in Loop: Header=BB1_2 Depth=2
130 addq $1, %rbx
131 movq stdout(%rip), %rsi
132 cmpq $1536, %rbx # imm = 0x600
133 jne .LBB1_2
134 # %bb.5: # %for.end
135 # in Loop: Header=BB1_1 Depth=1
136 movl $10, %edi
137 callq fputc@PLT
138 movq -48(%rbp), %rax # 8-byte Reload
139 addq $1, %rax
140 addq $6144, %r13 # imm = 0x1800
141 cmpq $1536, %rax # imm = 0x600
142 jne .LBB1_1
143 # %bb.6: # %for.end12
144 addq $8, %rsp
145 popq %rbx
146 popq %r12
147 popq %r13
148 popq %r14
149 popq %r15
150 popq %rbp
151 .cfi_def_cfa %rsp, 8
152 retq
153 .Lfunc_end1:
154 .size print_array, .Lfunc_end1-print_array
155 .cfi_endproc
156 # -- End function
157 .globl main # -- Begin function main
158 .p2align 4, 0x90
159 .type main,@function
160 main: # @main
161 .cfi_startproc
162 # %bb.0: # %entry
163 pushq %rbp
164 .cfi_def_cfa_offset 16
165 .cfi_offset %rbp, -16
166 movq %rsp, %rbp
167 .cfi_def_cfa_register %rbp
168 pushq %r14
169 pushq %rbx
170 .cfi_offset %rbx, -32
171 .cfi_offset %r14, -24
172 callq init_array
173 leaq C(%rip), %rbx
174 xorl %r14d, %r14d
175 xorl %esi, %esi
176 movl $9437184, %edx # imm = 0x900000
177 movq %rbx, %rdi
178 callq memset@PLT
179 leaq B(%rip), %rax
180 leaq A(%rip), %rcx
181 .p2align 4, 0x90
182 .LBB2_1: # %polly.loop_header8
183 # =>This Loop Header: Depth=1
184 # Child Loop BB2_2 Depth 2
185 # Child Loop BB2_3 Depth 3
186 movq %rax, %rdx
187 xorl %esi, %esi
188 .p2align 4, 0x90
189 .LBB2_2: # %polly.loop_header14
190 # Parent Loop BB2_1 Depth=1
191 # => This Loop Header: Depth=2
192 # Child Loop BB2_3 Depth 3
193 leaq (%r14,%r14,2), %rdi
194 shlq $11, %rdi
195 addq %rcx, %rdi
196 movss (%rdi,%rsi,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
197 shufps $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0]
198 movl $12, %edi
199 .p2align 4, 0x90
200 .LBB2_3: # %vector.body
201 # Parent Loop BB2_1 Depth=1
202 # Parent Loop BB2_2 Depth=2
203 # => This Inner Loop Header: Depth=3
204 movaps -48(%rdx,%rdi,4), %xmm1
205 mulps %xmm0, %xmm1
206 movaps -32(%rdx,%rdi,4), %xmm2
207 mulps %xmm0, %xmm2
208 addps -48(%rbx,%rdi,4), %xmm1
209 addps -32(%rbx,%rdi,4), %xmm2
210 movaps %xmm1, -48(%rbx,%rdi,4)
211 movaps %xmm2, -32(%rbx,%rdi,4)
212 movaps -16(%rdx,%rdi,4), %xmm1
213 mulps %xmm0, %xmm1
214 movaps (%rdx,%rdi,4), %xmm2
215 mulps %xmm0, %xmm2
216 addps -16(%rbx,%rdi,4), %xmm1
217 addps (%rbx,%rdi,4), %xmm2
218 movaps %xmm1, -16(%rbx,%rdi,4)
219 movaps %xmm2, (%rbx,%rdi,4)
220 addq $16, %rdi
221 cmpq $1548, %rdi # imm = 0x60C
222 jne .LBB2_3
223 # %bb.4: # %polly.loop_exit22
224 # in Loop: Header=BB2_2 Depth=2
225 addq $1, %rsi
226 addq $6144, %rdx # imm = 0x1800
227 cmpq $1536, %rsi # imm = 0x600
228 jne .LBB2_2
229 # %bb.5: # %polly.loop_exit16
230 # in Loop: Header=BB2_1 Depth=1
231 addq $1, %r14
232 addq $6144, %rbx # imm = 0x1800
233 cmpq $1536, %r14 # imm = 0x600
234 jne .LBB2_1
235 # %bb.6: # %polly.exiting
236 xorl %eax, %eax
237 popq %rbx
238 popq %r14
239 popq %rbp
240 .cfi_def_cfa %rsp, 8
241 retq
242 .Lfunc_end2:
243 .size main, .Lfunc_end2-main
244 .cfi_endproc
245 # -- End function
246 .type A,@object # @A
247 .comm A,9437184,16
248 .type B,@object # @B
249 .comm B,9437184,16
250 .type .L.str,@object # @.str
251 .section .rodata.str1.1,"aMS",@progbits,1
252 .L.str:
253 .asciz "%lf "
254 .size .L.str, 5
256 .type C,@object # @C
257 .comm C,9437184,16
259 .ident "clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"
260 .section ".note.GNU-stack","",@progbits