1 // simd functions for panning
2 // Copyright (C) 2009, 2010 Tim Blechmann
4 // This program is free software; you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation; either version 2 of the License, or
7 // (at your option) any later version.
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
14 // You should have received a copy of the GNU General Public License
15 // along with this program; see the file COPYING. If not, write to
16 // the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 // Boston, MA 02111-1307, USA.
25 #if defined(__GNUC__) && defined(NDEBUG)
26 #define always_inline inline __attribute__((always_inline))
28 #define always_inline inline
35 inline void pan2_vec(F
* out0
, F
* out1
, const F
* in
, F factor0
, F factor1
, unsigned int n
)
40 *out0
++ = sig
* factor0
;
41 *out1
++ = sig
* factor1
;
46 inline void pan2_vec(F
* out0
, F
* out1
, const F
* in
, F factor0
, F slope0
, F factor1
, F slope1
, unsigned int n
)
51 *out0
++ = sig
* factor0
;
52 *out1
++ = sig
* factor1
;
61 template <typename F
, unsigned int n
>
64 static const int offset
= vec
<F
>::size
;
66 static always_inline
void mp_iteration(F
* out0
, F
* out1
, const F
* in
, vec
<F
> const & factor0
, vec
<F
> const & factor1
)
68 vec
<F
> vin
, vout0
, vout1
;
71 vout0
= vin
* factor0
;
72 vout1
= vin
* factor1
;
74 vout0
.store_aligned(out0
);
75 vout1
.store_aligned(out1
);
77 pan2
<F
, n
-offset
>::mp_iteration(out0
+offset
, out1
+offset
, in
+offset
, factor0
, factor1
);
80 static always_inline
void mp_iteration(F
* out0
, F
* out1
, const F
* in
, vec
<F
> & factor0
, vec
<F
> const & slope0
,
81 vec
<F
> & factor1
, vec
<F
> const & slope1
)
83 vec
<F
> vin
, vout0
, vout1
;
86 vout0
= vin
* factor0
;
87 vout1
= vin
* factor1
;
89 vout0
.store_aligned(out0
);
90 vout1
.store_aligned(out1
);
94 pan2
<F
, n
-offset
>::mp_iteration(out0
+offset
, out1
+offset
, in
+offset
, factor0
, slope0
, factor1
, slope1
);
102 static always_inline
void mp_iteration(F
* out0
, F
* out1
, const F
* in
, vec
<F
> const & factor0
, vec
<F
> const & factor1
)
105 static always_inline
void mp_iteration(F
* out0
, F
* out1
, const F
* in
, vec
<F
> & factor0
, vec
<F
> const & slope0
,
106 vec
<F
> & factor1
, vec
<F
> const & slope1
)
110 } /* namespace detail */
112 template <typename F
>
113 inline void pan2_vec_simd(F
* out0
, F
* out1
, const F
* in
, F factor0
, F factor1
, unsigned int n
)
115 vec
<F
> vf0(factor0
), vf1(factor1
);
116 const int per_loop
= vec
<F
>::objects_per_cacheline
;
120 detail::pan2
<F
, per_loop
>::mp_iteration(out0
, out1
, in
, vf0
, vf1
);
121 out0
+= per_loop
; out1
+= per_loop
; in
+= per_loop
;
125 template <unsigned int n
, typename F
>
126 inline void pan2_vec_simd(F
* out0
, F
* out1
, const F
* in
, F factor0
, F factor1
)
128 vec
<F
> vf0(factor0
), vf1(factor1
);
130 detail::pan2
<F
, n
>::mp_iteration(out0
, out1
, in
, vf0
, vf1
);
133 template <typename F
>
134 inline void pan2_vec_simd(F
* out0
, F
* out1
, const F
* in
, F factor0
, F slope0
, F factor1
, F slope1
, unsigned int n
)
136 const int per_loop
= vec
<F
>::objects_per_cacheline
;
138 vec
<F
> vf0
, vf1
, vslope0
, vslope1
;
139 vslope0
.set_vec(vf0
.set_slope(factor0
, slope0
));
140 vslope1
.set_vec(vf1
.set_slope(factor1
, slope1
));
144 detail::pan2
<F
, per_loop
>::mp_iteration(out0
, out1
, in
, vf0
, vslope0
, vf1
, vslope1
);
145 out0
+= per_loop
; out1
+= per_loop
; in
+= per_loop
;
149 template <unsigned int n
, typename F
>
150 inline void pan2_vec_simd(F
* out0
, F
* out1
, const F
* in
, F factor0
, F slope0
, F factor1
, F slope1
)
152 vec
<F
> vf0
, vf1
, vslope0
, vslope1
;
153 vslope0
.set_vec(vf0
.set_slope(factor0
, slope0
));
154 vslope1
.set_vec(vf1
.set_slope(factor1
, slope1
));
156 detail::pan2
<F
, n
>::mp_iteration(out0
, out1
, in
, vf0
, vslope0
, vf1
, vslope1
);
159 } /* namespace nova */
163 #endif /* SIMD_PAN_HPP */