3 An interface to abstract over different FFT libraries, for SuperCollider 3.
4 (c) 2007-2008 Dan Stowell, incorporating code from
5 SuperCollider 3 Copyright (c) 2002 James McCartney.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 vDSP uses a "SplitBuf" as an intermediate representation of the data.
24 For speed we keep this global, although this makes the code non-thread-safe.
25 (This is not new to this refactoring. Just worth noting.)
35 #include "SC_fftlib.h"
38 #include "simd_binary_arithmetic.hpp"
42 // We include vDSP even if not using for FFT, since we want to use some vectorised add/mul tricks
43 #if defined(__APPLE__) && !defined(SC_IPHONE)
44 #include "vecLib/vDSP.h"
45 #elif defined(SC_IPHONE)
46 #include <Accelerate/Accelerate.h>
49 ////////////////////////////////////////////////////////////////////////////////////////////////////
50 // Constants and structs
52 // Decisions here about which FFT library to use - vDSP only exists on Mac BTW.
53 // We include the relevant libs but also ensure that one, and only one, of them is active...
55 // "SC_FFT_NONE" allows compilation without FFT support; only expected to be used on very limited platforms
58 #define SC_FFT_GREEN 0
62 #define SC_FFT_GREEN 1
63 #elif !SC_FFT_FFTW && defined(__APPLE__)
66 #define SC_FFT_GREEN 0
71 #define SC_FFT_GREEN 0
75 // Note that things like *fftWindow actually allow for other sizes, to be created on user request.
76 #define SC_FFT_ABSOLUTE_MAXSIZE 2147483648
77 #define SC_FFT_LOG2_ABSOLUTE_MAXSIZE 31
78 #define SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1 32
80 // This struct is a bit like FFTW's idea of a "plan": it represents an FFT operation that may be applied once or repeatedly.
81 // It should be possible for indata and outdata to be the same, for quasi-in-place operation.
82 typedef struct scfft
{
83 unsigned int nfull
, nwin
, log2nfull
, log2nwin
; // Lengths of full FFT frame, and the (possibly shorter) windowed data frame
85 float *indata
, *outdata
, *trbuf
;
86 float scalefac
; // Used to rescale the data to unity gain
90 static float *fftWindow
[2][SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1
];
93 static FFTSetup fftSetup
[SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1
]; // vDSP setups, one per FFT size
94 static COMPLEX_SPLIT splitBuf
; // Temp buf for holding rearranged data
100 static float *cosTable
[SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1
];
105 static fftwf_plan precompiledForwardPlans
[SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1
];
106 static fftwf_plan precompiledBackwardPlans
[SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1
];
107 static fftwf_plan precompiledForwardPlansInPlace
[SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1
];
108 static fftwf_plan precompiledBackwardPlansInPlace
[SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1
];
111 #define pi 3.1415926535898f
112 #define twopi 6.28318530717952646f
114 ////////////////////////////////////////////////////////////////////////////////////////////////////
118 static float* create_cosTable(int log2n
);
119 static float* create_cosTable(int log2n
)
121 int size
= 1 << log2n
;
122 int size2
= size
/ 4 + 1;
123 float *win
= (float*)malloc(size2
* sizeof(float));
124 double winc
= twopi
/ size
;
125 for (int i
=0; i
<size2
; ++i
) {
133 static float* scfft_create_fftwindow(int wintype
, int log2n
)
135 int size
= 1 << log2n
;
137 #if _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600
139 int error
= posix_memalign((void**)&win
, 64, size
* sizeof(float)); // align by cacheline
143 float *win
= (float*)malloc(size
* sizeof(float));
149 for (i
=0; i
<size
; ++i
) {
156 for (i
=0; i
<size
; ++i
) {
158 win
[i
] = 0.5 - 0.5 * cos(w
);
167 static void scfft_ensurewindow(unsigned short log2_fullsize
, unsigned short log2_winsize
, short wintype
);
169 static bool scfft_global_initialization (void)
171 for (int wintype
=0; wintype
<2; ++wintype
) {
172 for (int i
=0; i
< SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1
; ++i
) {
173 fftWindow
[wintype
][i
] = 0;
175 for (int i
= SC_FFT_LOG2_MINSIZE
; i
< SC_FFT_LOG2_MAXSIZE
+1; ++i
) {
176 fftWindow
[wintype
][i
] = scfft_create_fftwindow(wintype
, i
);
180 for (int i
=0; i
< SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1
; ++i
) {
183 for (int i
= SC_FFT_LOG2_MINSIZE
; i
< SC_FFT_LOG2_MAXSIZE
+1; ++i
) {
184 cosTable
[i
] = create_cosTable(i
);
186 printf("SC FFT global init: cosTable initialised.\n");
188 // vDSP inits its twiddle factors
189 for (int i
= SC_FFT_LOG2_MINSIZE
; i
< SC_FFT_LOG2_MAXSIZE
+1; ++i
) {
190 fftSetup
[i
] = vDSP_create_fftsetup (i
, FFT_RADIX2
);
191 if(fftSetup
[i
] == NULL
){
192 printf("FFT ERROR: Mac vDSP library could not allocate FFT setup for size %i\n", 1<<i
);
195 // vDSP prepares its memory-aligned buffer for rearranging input data.
196 // Note max size here - meaning max input buffer size is these two sizes added together.
197 // vec_malloc used in API docs, but apparently that's deprecated and malloc is sufficient for aligned memory on OSX.
198 splitBuf
.realp
= (float*) malloc ( SC_FFT_MAXSIZE
* sizeof(float) / 2);
199 splitBuf
.imagp
= (float*) malloc ( SC_FFT_MAXSIZE
* sizeof(float) / 2);
200 //printf("SC FFT global init: vDSP initialised.\n");
202 size_t maxSize
= 1<<SC_FFT_LOG2_MAXSIZE
;
203 float * buffer1
= (float*)fftwf_malloc((maxSize
+ 1) * sizeof(float));
204 float * buffer2
= (float*)fftwf_malloc((maxSize
+ 1) * sizeof(float));
205 for (int i
= SC_FFT_LOG2_MINSIZE
; i
< SC_FFT_LOG2_MAXSIZE
+1; ++i
) {
206 size_t currentSize
= 1<<i
;
208 precompiledForwardPlans
[i
] = fftwf_plan_dft_r2c_1d(currentSize
, buffer1
, (fftwf_complex
*) buffer2
, FFTW_ESTIMATE
);
209 precompiledBackwardPlans
[i
] = fftwf_plan_dft_c2r_1d(currentSize
, (fftwf_complex
*) buffer2
, buffer1
, FFTW_ESTIMATE
);
211 precompiledForwardPlansInPlace
[i
] = fftwf_plan_dft_r2c_1d(currentSize
, buffer1
, (fftwf_complex
*) buffer1
, FFTW_ESTIMATE
);
212 precompiledBackwardPlansInPlace
[i
] = fftwf_plan_dft_c2r_1d(currentSize
, (fftwf_complex
*) buffer1
, buffer1
, FFTW_ESTIMATE
);
220 static bool dummy
= scfft_global_initialization();
222 // You need to provide an intermediate "transform buffer". Size will depend on which underlying lib is being used.
223 // "fullsize" is the number of samples in the input buffer (inc any padding), aka the number of "points" in the FFT.
224 // Often in an SC plugin you can get this number from buf->samples if you're grabbing an external buffer.
225 // The input value is given in samples.
226 // The return value is given in bytes.
227 static size_t scfft_trbufsize(unsigned int fullsize
)
230 // Transform buf is two floats "too big" because of FFTWF's output ordering
231 return (fullsize
+2) * sizeof(float);
233 // vDSP packs the nyquist in with the DC, so size is same as input buffer (plus zeropadding)
234 // Green does this too
235 return (fullsize
) * sizeof(float);
240 scfft
* scfft_create(size_t fullsize
, size_t winsize
, SCFFT_WindowFunction wintype
,
241 float *indata
, float *outdata
, SCFFT_Direction forward
, SCFFT_Allocator
& alloc
)
243 char * chunk
= (char*) alloc
.alloc(sizeof(scfft
) + scfft_trbufsize(fullsize
));
247 scfft
* f
= (scfft
*)chunk
;
248 float *trbuf
= (float*)(chunk
+ sizeof(scfft
));
252 f
->log2nfull
= LOG2CEIL(fullsize
);
253 f
->log2nwin
= LOG2CEIL( winsize
);
254 f
->wintype
= wintype
;
256 f
->outdata
= outdata
;
259 // Buffer is larger than the range of sizes we provide for at startup; we can get ready just-in-time though
260 if (fullsize
> SC_FFT_MAXSIZE
)
261 scfft_ensurewindow(f
->log2nfull
, f
->log2nwin
, wintype
);
263 // The scale factors rescale the data to unity gain. The old Green lib did this itself, meaning scalefacs would here be 1...
267 #else // forward FFTW and Green factor
270 } else { // backward FFTW and VDSP factor
274 f
->scalefac
= 1.f
/ fullsize
;
278 memset(trbuf
, 0, scfft_trbufsize(fullsize
));
283 static int largest_log2n
= SC_FFT_LOG2_MAXSIZE
;
285 // check the global list of windows incs ours; create if not.
286 // Note that expanding the table, if triggered, will cause a CPU hit as things are malloc'ed, realloc'ed, etc.
287 void scfft_ensurewindow(unsigned short log2_fullsize
, unsigned short log2_winsize
, short wintype
)
289 // Ensure we have enough space to do our calcs
290 if(log2_fullsize
> largest_log2n
){
291 largest_log2n
= log2_fullsize
;
293 size_t newsize
= (1 << largest_log2n
) * sizeof(float) / 2;
294 splitBuf
.realp
= (float*) realloc (splitBuf
.realp
, newsize
);
295 splitBuf
.imagp
= (float*) realloc (splitBuf
.imagp
, newsize
);
299 // Ensure our window has been created
300 if((wintype
!= -1) && (fftWindow
[wintype
][log2_winsize
] == 0)){
301 fftWindow
[wintype
][log2_winsize
] = scfft_create_fftwindow(wintype
, log2_winsize
);
304 // Ensure our FFT twiddle factors (or whatever) have been created
306 if(fftSetup
[log2_fullsize
] == 0)
307 fftSetup
[log2_fullsize
] = vDSP_create_fftsetup (log2_fullsize
, FFT_RADIX2
);
309 if(cosTable
[log2_fullsize
] == 0)
310 cosTable
[log2_fullsize
] = create_cosTable(log2_fullsize
);
314 // these do the main jobs.
315 // Note: you DON"T need to call the windowing function yourself, it'll be applied by the _dofft and _doifft funcs.
316 static void scfft_dowindowing(float *data
, unsigned int winsize
, unsigned int fullsize
, unsigned short log2_winsize
,
317 short wintype
, float scalefac
)
319 if (wintype
!= kRectWindow
) {
320 float *win
= fftWindow
[wintype
][log2_winsize
];
323 vDSP_vmul(data
, 1, win
, 1, data
, 1, winsize
);
324 #elif defined (NOVA_SIMD)
325 using namespace nova
;
326 if ((vec
<float>::objects_per_cacheline
& winsize
== 0) && vec
<float>::is_aligned(data
))
327 times_vec_simd(data
, data
, win
, winsize
);
329 times_vec(data
, data
, win
, winsize
);
332 float *in
= data
- 1;
333 for (int i
=0; i
< winsize
; ++i
) {
339 // scale factor is different for different libs. But the compiler switch here is about using vDSP's fast multiplication method.
341 vDSP_vsmul(data
, 1, &scalefac
, data
, 1, winsize
);
343 for(int i
=0; i
<winsize
; ++i
){
349 memset(data
+ winsize
, 0, (fullsize
- winsize
) * sizeof(float));
352 void scfft_dofft(scfft
* f
)
354 // Data goes to transform buf
355 memcpy(f
->trbuf
, f
->indata
, f
->nwin
* sizeof(float));
356 scfft_dowindowing(f
->trbuf
, f
->nwin
, f
->nfull
, f
->log2nwin
, f
->wintype
, f
->scalefac
);
358 // forward transformation is in-place
359 fftwf_execute_dft_r2c(precompiledForwardPlansInPlace
[f
->log2nfull
], f
->trbuf
, (fftwf_complex
*)f
->trbuf
);
361 // Rearrange output data onto public buffer
362 memcpy(f
->outdata
, f
->trbuf
, f
->nfull
* sizeof(float));
363 f
->outdata
[1] = f
->trbuf
[f
->nfull
]; // Pack nyquist val in
365 // Perform even-odd split
366 vDSP_ctoz((COMPLEX
*) f
->trbuf
, 2, &splitBuf
, 1, f
->nfull
>> 1);
367 // Now the actual FFT
368 vDSP_fft_zrip(fftSetup
[f
->log2nfull
], &splitBuf
, 1, f
->log2nfull
, FFT_FORWARD
);
369 // Copy the data to the public output buf, transforming it back out of "split" representation
370 vDSP_ztoc(&splitBuf
, 1, (DSPComplex
*) f
->outdata
, 2, f
->nfull
>> 1);
372 // Green FFT is in-place
373 rffts(f
->trbuf
, f
->log2nfull
, 1, cosTable
[f
->log2nfull
]);
374 // Copy to public buffer
375 memcpy(f
->outdata
, f
->trbuf
, f
->nfull
* sizeof(float));
379 void scfft_doifft(scfft
* f
)
382 float *trbuf
= f
->trbuf
;
383 size_t bytesize
= f
->nfull
* sizeof(float);
384 memcpy(trbuf
, f
->indata
, bytesize
);
386 trbuf
[f
->nfull
] = f
->indata
[1]; // Nyquist goes all the way down to the end of the line...
387 trbuf
[f
->nfull
+1] = 0.f
;
389 fftwf_execute_dft_c2r(precompiledBackwardPlans
[f
->log2nfull
], (fftwf_complex
*)trbuf
, f
->outdata
);
392 vDSP_ctoz((COMPLEX
*) f
->indata
, 2, &splitBuf
, 1, f
->nfull
>> 1);
393 vDSP_fft_zrip(fftSetup
[f
->log2nfull
], &splitBuf
, 1, f
->log2nfull
, FFT_INVERSE
);
394 vDSP_ztoc(&splitBuf
, 1, (DSPComplex
*) f
->outdata
, 2, f
->nfull
>> 1);
396 float *trbuf
= f
->trbuf
;
397 size_t bytesize
= f
->nfull
* sizeof(float);
398 memcpy(trbuf
, f
->indata
, bytesize
);
399 // Green FFT is in-place
400 riffts(trbuf
, f
->log2nfull
, 1, cosTable
[f
->log2nfull
]);
401 // Copy to public buffer
402 memcpy(f
->outdata
, trbuf
, f
->nwin
* sizeof(float));
404 scfft_dowindowing(f
->outdata
, f
->nwin
, f
->nfull
, f
->log2nwin
, f
->wintype
, f
->scalefac
);
407 void scfft_destroy(scfft
*f
, SCFFT_Allocator
& alloc
)