3 An interface to abstract over different FFT libraries, for SuperCollider 3.
4 (c) 2007-2008 Dan Stowell, incorporating code from
5 SuperCollider 3 Copyright (c) 2002 James McCartney.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 vDSP uses a "SplitBuf" as an intermediate representation of the data.
24 For speed we keep this global, although this makes the code non-thread-safe.
25 (This is not new to this refactoring. Just worth noting.)
35 #include "SC_fftlib.h"
38 // We include vDSP even if not using for FFT, since we want to use some vectorised add/mul tricks
39 #if defined(__APPLE__) && !defined(SC_IPHONE)
40 #include "vecLib/vDSP.h"
41 #elif defined(SC_IPHONE)
42 #include <Accelerate/Accelerate.h>
45 ////////////////////////////////////////////////////////////////////////////////////////////////////
46 // Constants and structs
48 // Decisions here about which FFT library to use - vDSP only exists on Mac BTW.
49 // We include the relevant libs but also ensure that one, and only one, of them is active...
51 // "SC_FFT_NONE" allows compilation without FFT support; only expected to be used on very limited platforms
54 #define SC_FFT_GREEN 0
58 #define SC_FFT_GREEN 1
59 #elif !SC_FFT_FFTW && defined(__APPLE__)
62 #define SC_FFT_GREEN 0
67 #define SC_FFT_GREEN 0
71 // Note that things like *fftWindow actually allow for other sizes, to be created on user request.
72 #define SC_FFT_ABSOLUTE_MAXSIZE 2147483648
73 #define SC_FFT_LOG2_ABSOLUTE_MAXSIZE 31
74 #define SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1 32
76 // This struct is a bit like FFTW's idea of a "plan": it represents an FFT operation that may be applied once or repeatedly.
77 // It should be possible for indata and outdata to be the same, for quasi-in-place operation.
78 typedef struct scfft
{
79 unsigned int nfull
, nwin
, log2nfull
, log2nwin
; // Lengths of full FFT frame, and the (possibly shorter) windowed data frame
81 float *indata
, *outdata
, *trbuf
;
82 float scalefac
; // Used to rescale the data to unity gain
89 static float *fftWindow
[2][SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1
];
92 static FFTSetup fftSetup
[SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1
]; // vDSP setups, one per FFT size
93 static COMPLEX_SPLIT splitBuf
; // Temp buf for holding rearranged data
99 static float *cosTable
[SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1
];
103 #define pi 3.1415926535898f
104 #define twopi 6.28318530717952646f
106 ////////////////////////////////////////////////////////////////////////////////////////////////////
110 static float* create_cosTable(int log2n
);
111 static float* create_cosTable(int log2n
)
113 int size
= 1 << log2n
;
114 int size2
= size
/ 4 + 1;
115 float *win
= (float*)malloc(size2
* sizeof(float));
116 double winc
= twopi
/ size
;
117 for (int i
=0; i
<size2
; ++i
) {
125 static float* scfft_create_fftwindow(int wintype
, int log2n
)
127 int size
= 1 << log2n
;
129 float *win
= (float*)malloc(size
* sizeof(float));
135 for (i
=0; i
<size
; ++i
) {
142 for (i
=0; i
<size
; ++i
) {
144 win
[i
] = 0.5 - 0.5 * cos(w
);
153 static void scfft_ensurewindow(unsigned short log2_fullsize
, unsigned short log2_winsize
, short wintype
);
155 static bool scfft_global_initialization (void)
157 for (int wintype
=0; wintype
<2; ++wintype
) {
158 for (int i
=0; i
< SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1
; ++i
) {
159 fftWindow
[wintype
][i
] = 0;
161 for (int i
= SC_FFT_LOG2_MINSIZE
; i
< SC_FFT_LOG2_MAXSIZE
+1; ++i
) {
162 fftWindow
[wintype
][i
] = scfft_create_fftwindow(wintype
, i
);
166 for (int i
=0; i
< SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1
; ++i
) {
169 for (int i
= SC_FFT_LOG2_MINSIZE
; i
< SC_FFT_LOG2_MAXSIZE
+1; ++i
) {
170 cosTable
[i
] = create_cosTable(i
);
172 printf("SC FFT global init: cosTable initialised.\n");
174 // vDSP inits its twiddle factors
175 for (int i
= SC_FFT_LOG2_MINSIZE
; i
< SC_FFT_LOG2_MAXSIZE
+1; ++i
) {
176 fftSetup
[i
] = vDSP_create_fftsetup (i
, FFT_RADIX2
);
177 if(fftSetup
[i
] == NULL
){
178 printf("FFT ERROR: Mac vDSP library could not allocate FFT setup for size %i\n", 1<<i
);
181 // vDSP prepares its memory-aligned buffer for rearranging input data.
182 // Note max size here - meaning max input buffer size is these two sizes added together.
183 // vec_malloc used in API docs, but apparently that's deprecated and malloc is sufficient for aligned memory on OSX.
184 splitBuf
.realp
= (float*) malloc ( SC_FFT_MAXSIZE
* sizeof(float) / 2);
185 splitBuf
.imagp
= (float*) malloc ( SC_FFT_MAXSIZE
* sizeof(float) / 2);
186 //printf("SC FFT global init: vDSP initialised.\n");
188 //printf("SC FFT global init: FFTW, no init needed.\n");
193 static bool dummy
= scfft_global_initialization();
195 // You need to provide an intermediate "transform buffer". Size will depend on which underlying lib is being used.
196 // "fullsize" is the number of samples in the input buffer (inc any padding), aka the number of "points" in the FFT.
197 // Often in an SC plugin you can get this number from buf->samples if you're grabbing an external buffer.
198 // The input value is given in samples.
199 // The return value is given in bytes.
200 static size_t scfft_trbufsize(unsigned int fullsize
)
203 // Transform buf is two floats "too big" because of FFTWF's output ordering
204 return (fullsize
+2) * sizeof(float);
206 // vDSP packs the nyquist in with the DC, so size is same as input buffer (plus zeropadding)
207 // Green does this too
208 return (fullsize
) * sizeof(float);
213 scfft
* scfft_create(size_t fullsize
, size_t winsize
, SCFFT_WindowFunction wintype
,
214 float *indata
, float *outdata
, SCFFT_Direction forward
, SCFFT_Allocator
& alloc
)
216 char * chunk
= (char*) alloc
.alloc(sizeof(scfft
) + scfft_trbufsize(fullsize
));
220 scfft
* f
= (scfft
*)chunk
;
221 float *trbuf
= (float*)(chunk
+ sizeof(scfft
));
225 f
->log2nfull
= LOG2CEIL(fullsize
);
226 f
->log2nwin
= LOG2CEIL( winsize
);
227 f
->wintype
= wintype
;
229 f
->outdata
= outdata
;
232 // Buffer is larger than the range of sizes we provide for at startup; we can get ready just-in-time though
233 if (fullsize
> SC_FFT_MAXSIZE
)
234 scfft_ensurewindow(f
->log2nfull
, f
->log2nwin
, wintype
);
238 f
->plan
= fftwf_plan_dft_r2c_1d(fullsize
, trbuf
, (fftwf_complex
*) trbuf
, FFTW_ESTIMATE
);
240 f
->plan
= fftwf_plan_dft_c2r_1d(fullsize
, (fftwf_complex
*) trbuf
, outdata
, FFTW_ESTIMATE
);
243 // The scale factors rescale the data to unity gain. The old Green lib did this itself, meaning scalefacs would here be 1...
247 #else // forward FFTW and Green factor
250 } else { // backward FFTW and VDSP factor
254 f
->scalefac
= 1.f
/ fullsize
;
258 memset(trbuf
, 0, scfft_trbufsize(fullsize
));
263 static int largest_log2n
= SC_FFT_LOG2_MAXSIZE
;
265 // check the global list of windows incs ours; create if not.
266 // Note that expanding the table, if triggered, will cause a CPU hit as things are malloc'ed, realloc'ed, etc.
267 void scfft_ensurewindow(unsigned short log2_fullsize
, unsigned short log2_winsize
, short wintype
)
269 // Ensure we have enough space to do our calcs
270 if(log2_fullsize
> largest_log2n
){
271 largest_log2n
= log2_fullsize
;
273 size_t newsize
= (1 << largest_log2n
) * sizeof(float) / 2;
274 splitBuf
.realp
= (float*) realloc (splitBuf
.realp
, newsize
);
275 splitBuf
.imagp
= (float*) realloc (splitBuf
.imagp
, newsize
);
279 // Ensure our window has been created
280 if((wintype
!= -1) && (fftWindow
[wintype
][log2_winsize
] == 0)){
281 fftWindow
[wintype
][log2_winsize
] = scfft_create_fftwindow(wintype
, log2_winsize
);
284 // Ensure our FFT twiddle factors (or whatever) have been created
286 if(fftSetup
[log2_fullsize
] == 0)
287 fftSetup
[log2_fullsize
] = vDSP_create_fftsetup (log2_fullsize
, FFT_RADIX2
);
289 if(cosTable
[log2_fullsize
] == 0)
290 cosTable
[log2_fullsize
] = create_cosTable(log2_fullsize
);
294 // these do the main jobs.
295 // Note: you DON"T need to call the windowing function yourself, it'll be applied by the _dofft and _doifft funcs.
296 static void scfft_dowindowing(float *data
, unsigned int winsize
, unsigned int fullsize
, unsigned short log2_winsize
,
297 short wintype
, float scalefac
)
300 if (wintype
!= kRectWindow
) {
301 float *win
= fftWindow
[wintype
][log2_winsize
];
304 vDSP_vmul(data
, 1, win
, 1, data
, 1, winsize
);
307 float *in
= data
- 1;
308 for (i
=0; i
< winsize
; ++i
) {
315 // scale factor is different for different libs. But the compiler switch here is about using vDSP's fast multiplication method.
317 vDSP_vsmul(data
, 1, &scalefac
, data
, 1, winsize
);
319 for(int i
=0; i
<winsize
; ++i
){
325 memset(data
+ winsize
, 0, (fullsize
- winsize
) * sizeof(float));
328 void scfft_dofft(scfft
* f
)
330 // Data goes to transform buf
331 memcpy(f
->trbuf
, f
->indata
, f
->nwin
* sizeof(float));
332 scfft_dowindowing(f
->trbuf
, f
->nwin
, f
->nfull
, f
->log2nfull
, f
->wintype
, f
->scalefac
);
334 fftwf_execute(f
->plan
);
335 // Rearrange output data onto public buffer
336 memcpy(f
->outdata
, f
->trbuf
, f
->nfull
* sizeof(float));
337 f
->outdata
[1] = f
->trbuf
[f
->nfull
]; // Pack nyquist val in
339 // Perform even-odd split
340 vDSP_ctoz((COMPLEX
*) f
->trbuf
, 2, &splitBuf
, 1, f
->nfull
>> 1);
341 // Now the actual FFT
342 vDSP_fft_zrip(fftSetup
[f
->log2nfull
], &splitBuf
, 1, f
->log2nfull
, FFT_FORWARD
);
343 // Copy the data to the public output buf, transforming it back out of "split" representation
344 vDSP_ztoc(&splitBuf
, 1, (DSPComplex
*) f
->outdata
, 2, f
->nfull
>> 1);
346 // Green FFT is in-place
347 rffts(f
->trbuf
, f
->log2nfull
, 1, cosTable
[f
->log2nfull
]);
348 // Copy to public buffer
349 memcpy(f
->outdata
, f
->trbuf
, f
->nfull
* sizeof(float));
353 void scfft_doifft(scfft
* f
)
356 float *trbuf
= f
->trbuf
;
357 size_t bytesize
= f
->nfull
* sizeof(float);
358 memcpy(trbuf
, f
->indata
, bytesize
);
360 trbuf
[f
->nfull
] = f
->indata
[1]; // Nyquist goes all the way down to the end of the line...
361 trbuf
[f
->nfull
+1] = 0.f
;
363 fftwf_execute(f
->plan
);
364 // NB the plan already includes copying data to f->outbuf
367 vDSP_ctoz((COMPLEX
*) f
->indata
, 2, &splitBuf
, 1, f
->nfull
>> 1);
368 vDSP_fft_zrip(fftSetup
[f
->log2nfull
], &splitBuf
, 1, f
->log2nfull
, FFT_INVERSE
);
369 vDSP_ztoc(&splitBuf
, 1, (DSPComplex
*) f
->outdata
, 2, f
->nfull
>> 1);
371 float *trbuf
= f
->trbuf
;
372 size_t bytesize
= f
->nfull
* sizeof(float);
373 memcpy(trbuf
, f
->indata
, bytesize
);
374 // Green FFT is in-place
375 riffts(trbuf
, f
->log2nfull
, 1, cosTable
[f
->log2nfull
]);
376 // Copy to public buffer
377 memcpy(f
->outdata
, trbuf
, f
->nwin
* sizeof(float));
379 scfft_dowindowing(f
->outdata
, f
->nwin
, f
->nfull
, f
->log2nfull
, f
->wintype
, f
->scalefac
);
382 void scfft_destroy(scfft
*f
, SCFFT_Allocator
& alloc
)
387 fftwf_destroy_plan(f
->plan
);