1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 | FileCheck %s
3 ; Increment in loop bb.i28.i adjusted to 2, to prevent loop reversal from
6 declare fastcc void @rdft(i32, i32, ptr, ptr, ptr)
8 define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, ptr %tmp2, i32 %nfft, ptr %tmp1fft, ptr %tmp2fft, ptr %ip, ptr %w) nounwind {
9 ; CHECK-LABEL: mp_sqrt:
10 ; CHECK: # %bb.0: # %entry
11 ; CHECK-NEXT: pushl %ebp
12 ; CHECK-NEXT: pushl %ebx
13 ; CHECK-NEXT: pushl %edi
14 ; CHECK-NEXT: pushl %esi
15 ; CHECK-NEXT: pushl %eax
16 ; CHECK-NEXT: movb $1, %cl
17 ; CHECK-NEXT: movl $1, %ebx
18 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
19 ; CHECK-NEXT: .p2align 4
20 ; CHECK-NEXT: .LBB0_1: # %bb.i5
21 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
22 ; CHECK-NEXT: movl %ecx, %eax
23 ; CHECK-NEXT: addl %ebx, %ebx
24 ; CHECK-NEXT: xorl %ecx, %ecx
25 ; CHECK-NEXT: testb $1, %al
26 ; CHECK-NEXT: jne .LBB0_1
27 ; CHECK-NEXT: # %bb.2: # %mp_unexp_mp2d.exit.i
28 ; CHECK-NEXT: je .LBB0_3
29 ; CHECK-NEXT: # %bb.5: # %cond_next.i
30 ; CHECK-NEXT: testb $1, %al
31 ; CHECK-NEXT: jne .LBB0_3
32 ; CHECK-NEXT: # %bb.6: # %cond_next36.i
33 ; CHECK-NEXT: movl $0, 0
34 ; CHECK-NEXT: movzbl %al, %ebp
35 ; CHECK-NEXT: andl $1, %ebp
36 ; CHECK-NEXT: xorpd %xmm0, %xmm0
37 ; CHECK-NEXT: xorl %eax, %eax
38 ; CHECK-NEXT: xorl %ecx, %ecx
39 ; CHECK-NEXT: xorpd %xmm1, %xmm1
40 ; CHECK-NEXT: .p2align 4
41 ; CHECK-NEXT: .LBB0_7: # %bb.i28.i
42 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
43 ; CHECK-NEXT: cvttsd2si %xmm1, %edi
44 ; CHECK-NEXT: cmpl %edx, %edi
45 ; CHECK-NEXT: cmovgel %eax, %edi
46 ; CHECK-NEXT: addl $2, %ecx
47 ; CHECK-NEXT: xorps %xmm2, %xmm2
48 ; CHECK-NEXT: cvtsi2sd %edi, %xmm2
49 ; CHECK-NEXT: xorpd %xmm1, %xmm1
50 ; CHECK-NEXT: subsd %xmm2, %xmm1
51 ; CHECK-NEXT: mulsd %xmm0, %xmm1
52 ; CHECK-NEXT: addl $-2, %ebp
53 ; CHECK-NEXT: jne .LBB0_7
54 ; CHECK-NEXT: # %bb.8: # %mp_unexp_d2mp.exit29.i
55 ; CHECK-NEXT: movl $0, 0
56 ; CHECK-NEXT: je .LBB0_9
57 ; CHECK-NEXT: # %bb.10: # %mp_sqrt_init.exit
58 ; CHECK-NEXT: xorl %ecx, %ecx
59 ; CHECK-NEXT: movl %edx, %edi
60 ; CHECK-NEXT: movl %esi, %edx
61 ; CHECK-NEXT: calll mp_mul_csqu@PLT
62 ; CHECK-NEXT: xorl %ecx, %ecx
63 ; CHECK-NEXT: movl $-1, %edx
64 ; CHECK-NEXT: pushl {{[0-9]+}}(%esp)
65 ; CHECK-NEXT: pushl {{[0-9]+}}(%esp)
66 ; CHECK-NEXT: pushl $0
67 ; CHECK-NEXT: calll rdft@PLT
68 ; CHECK-NEXT: addl $12, %esp
69 ; CHECK-NEXT: xorl %ecx, %ecx
70 ; CHECK-NEXT: movl %edi, (%esp) # 4-byte Spill
71 ; CHECK-NEXT: movl %edi, %edx
72 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
73 ; CHECK-NEXT: pushl %edi
74 ; CHECK-NEXT: pushl %esi
75 ; CHECK-NEXT: pushl $0
76 ; CHECK-NEXT: calll mp_mul_d2i@PLT
77 ; CHECK-NEXT: addl $12, %esp
78 ; CHECK-NEXT: testl %ebp, %ebp
79 ; CHECK-NEXT: je .LBB0_11
80 ; CHECK-NEXT: .LBB0_3: # %cond_true.i
81 ; CHECK-NEXT: addl $4, %esp
82 ; CHECK-NEXT: .LBB0_4: # %cond_true.i
83 ; CHECK-NEXT: popl %esi
84 ; CHECK-NEXT: popl %edi
85 ; CHECK-NEXT: popl %ebx
86 ; CHECK-NEXT: popl %ebp
88 ; CHECK-NEXT: .p2align 4
89 ; CHECK-NEXT: .LBB0_9: # %bb.i.i
90 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
91 ; CHECK-NEXT: jmp .LBB0_9
92 ; CHECK-NEXT: .LBB0_11: # %cond_false.i
93 ; CHECK-NEXT: xorl %ecx, %ecx
94 ; CHECK-NEXT: movl (%esp), %esi # 4-byte Reload
95 ; CHECK-NEXT: movl %esi, %edx
96 ; CHECK-NEXT: pushl {{[0-9]+}}(%esp)
97 ; CHECK-NEXT: pushl $0
98 ; CHECK-NEXT: calll mp_round@PLT
99 ; CHECK-NEXT: addl $8, %esp
100 ; CHECK-NEXT: xorl %ecx, %ecx
101 ; CHECK-NEXT: movl %esi, %edx
102 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
103 ; CHECK-NEXT: pushl %ebp
104 ; CHECK-NEXT: pushl %edi
105 ; CHECK-NEXT: pushl %ebp
106 ; CHECK-NEXT: calll mp_add@PLT
107 ; CHECK-NEXT: addl $12, %esp
108 ; CHECK-NEXT: xorl %ecx, %ecx
109 ; CHECK-NEXT: movl %esi, %edx
110 ; CHECK-NEXT: pushl %edi
111 ; CHECK-NEXT: pushl %edi
112 ; CHECK-NEXT: pushl {{[0-9]+}}(%esp)
113 ; CHECK-NEXT: calll mp_sub@PLT
114 ; CHECK-NEXT: addl $12, %esp
115 ; CHECK-NEXT: xorl %ecx, %ecx
116 ; CHECK-NEXT: movl %esi, %edx
117 ; CHECK-NEXT: pushl %ebp
118 ; CHECK-NEXT: pushl $0
119 ; CHECK-NEXT: calll mp_round@PLT
120 ; CHECK-NEXT: addl $8, %esp
121 ; CHECK-NEXT: xorl %ecx, %ecx
122 ; CHECK-NEXT: movl %esi, %edx
123 ; CHECK-NEXT: pushl %edi
124 ; CHECK-NEXT: pushl {{[0-9]+}}(%esp)
125 ; CHECK-NEXT: pushl %ebx
126 ; CHECK-NEXT: calll mp_mul_d2i@PLT
127 ; CHECK-NEXT: addl $16, %esp
128 ; CHECK-NEXT: jmp .LBB0_4
132 bb.i5: ; preds = %bb.i5, %entry
133 %nfft_init.0.i = phi i32 [ 1, %entry ], [ %tmp7.i3, %bb.i5 ] ; <i32> [#uses=1]
134 %foo = phi i1 [1, %entry], [0, %bb.i5]
135 %tmp7.i3 = shl i32 %nfft_init.0.i, 1 ; <i32> [#uses=2]
136 br i1 %foo, label %bb.i5, label %mp_unexp_mp2d.exit.i
138 mp_unexp_mp2d.exit.i: ; preds = %bb.i5
139 br i1 %foo, label %cond_next.i, label %cond_true.i
141 cond_true.i: ; preds = %mp_unexp_mp2d.exit.i
144 cond_next.i: ; preds = %mp_unexp_mp2d.exit.i
145 %tmp22.i = sdiv i32 0, 2 ; <i32> [#uses=2]
146 br i1 %foo, label %cond_true29.i, label %cond_next36.i
148 cond_true29.i: ; preds = %cond_next.i
151 cond_next36.i: ; preds = %cond_next.i
152 store i32 %tmp22.i, ptr null, align 4
153 %tmp8.i14.i = select i1 %foo, i32 1, i32 0 ; <i32> [#uses=1]
156 bb.i28.i: ; preds = %bb.i28.i, %cond_next36.i
157 %j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ] ; <i32> [#uses=2]
158 %din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ] ; <double> [#uses=1]
159 %tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32 ; <i32> [#uses=2]
160 %tmp4.i19.i = icmp slt i32 %tmp1.i18.i, %radix ; <i1> [#uses=1]
161 %x.0.i21.i = select i1 %tmp4.i19.i, i32 %tmp1.i18.i, i32 0 ; <i32> [#uses=1]
162 %tmp41.sum.i = add i32 %j.0.reg2mem.0.i16.i, 2 ; <i32> [#uses=0]
163 %tmp1213.i23.i = sitofp i32 %x.0.i21.i to double ; <double> [#uses=1]
164 %tmp15.i24.i = fsub double 0.000000e+00, %tmp1213.i23.i ; <double> [#uses=1]
165 %tmp16.i25.i = fmul double 0.000000e+00, %tmp15.i24.i ; <double> [#uses=1]
166 %indvar.next39.i = add i32 %j.0.reg2mem.0.i16.i, 2 ; <i32> [#uses=2]
167 %exitcond40.i = icmp eq i32 %indvar.next39.i, %tmp8.i14.i ; <i1> [#uses=1]
168 br i1 %exitcond40.i, label %mp_unexp_d2mp.exit29.i, label %bb.i28.i
170 mp_unexp_d2mp.exit29.i: ; preds = %bb.i28.i
171 %tmp46.i = sub i32 0, %tmp22.i ; <i32> [#uses=1]
172 store i32 %tmp46.i, ptr null, align 4
173 br i1 %exitcond40.i, label %bb.i.i, label %mp_sqrt_init.exit
175 bb.i.i: ; preds = %bb.i.i, %mp_unexp_d2mp.exit29.i
178 mp_sqrt_init.exit: ; preds = %mp_unexp_d2mp.exit29.i
179 tail call fastcc void @mp_mul_csqu( i32 0, ptr %tmp1fft )
180 tail call fastcc void @rdft( i32 0, i32 -1, ptr null, ptr %ip, ptr %w )
181 tail call fastcc void @mp_mul_d2i( i32 0, i32 %radix, i32 0, ptr %tmp1fft, ptr %tmp2 )
182 br i1 %exitcond40.i, label %cond_false.i, label %cond_true36.i
184 cond_true36.i: ; preds = %mp_sqrt_init.exit
187 cond_false.i: ; preds = %mp_sqrt_init.exit
188 tail call fastcc void @mp_round( i32 0, i32 %radix, i32 0, ptr %out )
189 tail call fastcc void @mp_add( i32 0, i32 %radix, ptr %tmp1, ptr %tmp2, ptr %tmp1 )
190 tail call fastcc void @mp_sub( i32 0, i32 %radix, ptr %in, ptr %tmp2, ptr %tmp2 )
191 tail call fastcc void @mp_round( i32 0, i32 %radix, i32 0, ptr %tmp1 )
192 tail call fastcc void @mp_mul_d2i( i32 0, i32 %radix, i32 %tmp7.i3, ptr %tmp2fft, ptr %tmp2 )
196 declare fastcc void @mp_add(i32, i32, ptr, ptr, ptr)
198 declare fastcc void @mp_sub(i32, i32, ptr, ptr, ptr)
200 declare fastcc void @mp_round(i32, i32, i32, ptr)
202 declare fastcc void @mp_mul_csqu(i32, ptr)
204 declare fastcc void @mp_mul_d2i(i32, i32, i32, ptr, ptr)