SCDoc: Use proper static string constants instead of comparing string literals.
[supercollider.git] / common / SC_fftlib.cpp
blob6f381a492e6c223a5b18c230bf722ecbaa4ae090
1 /*
2 SC_fftlib.cpp
3 An interface to abstract over different FFT libraries, for SuperCollider 3.
4 (c) 2007-2008 Dan Stowell, incorporating code from
5 SuperCollider 3 Copyright (c) 2002 James McCartney.
6 All rights reserved.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 NOTE:
23 vDSP uses a "SplitBuf" as an intermediate representation of the data.
24 For speed we keep this global, although this makes the code non-thread-safe.
25 (This is not new to this refactoring. Just worth noting.)
28 #include "clz.h"
29 #include <stdlib.h>
30 #include <cmath>
31 #include <stdio.h>
32 #include <cstring>
33 #include <cassert>
35 #include "SC_fftlib.h"
37 #ifdef NOVA_SIMD
38 #include "simd_binary_arithmetic.hpp"
39 #endif
42 // We include vDSP even if not using for FFT, since we want to use some vectorised add/mul tricks
43 #if defined(__APPLE__) && !defined(SC_IPHONE)
44 #include "vecLib/vDSP.h"
45 #elif defined(SC_IPHONE)
46 #include <Accelerate/Accelerate.h>
47 #endif
49 ////////////////////////////////////////////////////////////////////////////////////////////////////
50 // Constants and structs
52 // Decisions here about which FFT library to use - vDSP only exists on Mac BTW.
53 // We include the relevant libs but also ensure that one, and only one, of them is active...
54 #if SC_FFT_NONE
55 // "SC_FFT_NONE" allows compilation without FFT support; only expected to be used on very limited platforms
56 #define SC_FFT_FFTW 0
57 #define SC_FFT_VDSP 0
58 #define SC_FFT_GREEN 0
59 #elif SC_FFT_GREEN
60 #define SC_FFT_FFTW 0
61 #define SC_FFT_VDSP 0
62 #define SC_FFT_GREEN 1
63 #elif !SC_FFT_FFTW && defined(__APPLE__)
64 #define SC_FFT_FFTW 0
65 #define SC_FFT_VDSP 1
66 #define SC_FFT_GREEN 0
67 #else
68 //#elif SC_FFT_FFTW
69 #define SC_FFT_FFTW 1
70 #define SC_FFT_VDSP 0
71 #define SC_FFT_GREEN 0
72 #include <fftw3.h>
73 #endif
75 // Note that things like *fftWindow actually allow for other sizes, to be created on user request.
76 #define SC_FFT_ABSOLUTE_MAXSIZE 2147483648
77 #define SC_FFT_LOG2_ABSOLUTE_MAXSIZE 31
78 #define SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1 32
80 // This struct is a bit like FFTW's idea of a "plan": it represents an FFT operation that may be applied once or repeatedly.
81 // It should be possible for indata and outdata to be the same, for quasi-in-place operation.
82 typedef struct scfft {
83 unsigned int nfull, nwin, log2nfull, log2nwin; // Lengths of full FFT frame, and the (possibly shorter) windowed data frame
84 short wintype;
85 float *indata, *outdata, *trbuf;
86 float scalefac; // Used to rescale the data to unity gain
87 } scfft;
90 static float *fftWindow[2][SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1];
92 #if SC_FFT_VDSP
93 static FFTSetup fftSetup[SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1]; // vDSP setups, one per FFT size
94 static COMPLEX_SPLIT splitBuf; // Temp buf for holding rearranged data
95 #endif
97 #if SC_FFT_GREEN
98 extern "C" {
99 #include "fftlib.h"
100 static float *cosTable[SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1];
102 #endif
104 #if SC_FFT_FFTW
105 static fftwf_plan precompiledForwardPlans[SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1];
106 static fftwf_plan precompiledBackwardPlans[SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1];
107 static fftwf_plan precompiledForwardPlansInPlace[SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1];
108 static fftwf_plan precompiledBackwardPlansInPlace[SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1];
109 #endif
111 #define pi 3.1415926535898f
112 #define twopi 6.28318530717952646f
114 ////////////////////////////////////////////////////////////////////////////////////////////////////
115 // Functions
117 #if SC_FFT_GREEN
118 static float* create_cosTable(int log2n);
119 static float* create_cosTable(int log2n)
121 int size = 1 << log2n;
122 int size2 = size / 4 + 1;
123 float *win = (float*)malloc(size2 * sizeof(float));
124 double winc = twopi / size;
125 for (int i=0; i<size2; ++i) {
126 double w = i * winc;
127 win[i] = cos(w);
129 return win;
131 #endif
133 static float* scfft_create_fftwindow(int wintype, int log2n)
135 int size = 1 << log2n;
136 unsigned short i;
137 #if _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600
138 float *win;
139 int error = posix_memalign((void**)&win, 64, size * sizeof(float)); // align by cacheline
140 if (error)
141 win = NULL;
142 #else
143 float *win = (float*)malloc(size * sizeof(float));
144 #endif
145 double winc;
146 switch(wintype) {
147 case kSineWindow:
148 winc = pi / size;
149 for (i=0; i<size; ++i) {
150 double w = i * winc;
151 win[i] = sin(w);
153 break;
154 case kHannWindow:
155 winc = twopi / size;
156 for (i=0; i<size; ++i) {
157 double w = i * winc;
158 win[i] = 0.5 - 0.5 * cos(w);
160 break;
163 return win;
167 static void scfft_ensurewindow(unsigned short log2_fullsize, unsigned short log2_winsize, short wintype);
169 static bool scfft_global_initialization (void)
171 for (int wintype=0; wintype<2; ++wintype) {
172 for (int i=0; i< SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1; ++i) {
173 fftWindow[wintype][i] = 0;
175 for (int i= SC_FFT_LOG2_MINSIZE; i < SC_FFT_LOG2_MAXSIZE+1; ++i) {
176 fftWindow[wintype][i] = scfft_create_fftwindow(wintype, i);
179 #if SC_FFT_GREEN
180 for (int i=0; i< SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1; ++i) {
181 cosTable[i] = 0;
183 for (int i= SC_FFT_LOG2_MINSIZE; i < SC_FFT_LOG2_MAXSIZE+1; ++i) {
184 cosTable[i] = create_cosTable(i);
186 printf("SC FFT global init: cosTable initialised.\n");
187 #elif SC_FFT_VDSP
188 // vDSP inits its twiddle factors
189 for (int i= SC_FFT_LOG2_MINSIZE; i < SC_FFT_LOG2_MAXSIZE+1; ++i) {
190 fftSetup[i] = vDSP_create_fftsetup (i, FFT_RADIX2);
191 if(fftSetup[i] == NULL){
192 printf("FFT ERROR: Mac vDSP library could not allocate FFT setup for size %i\n", 1<<i);
195 // vDSP prepares its memory-aligned buffer for rearranging input data.
196 // Note max size here - meaning max input buffer size is these two sizes added together.
197 // vec_malloc used in API docs, but apparently that's deprecated and malloc is sufficient for aligned memory on OSX.
198 splitBuf.realp = (float*) malloc ( SC_FFT_MAXSIZE * sizeof(float) / 2);
199 splitBuf.imagp = (float*) malloc ( SC_FFT_MAXSIZE * sizeof(float) / 2);
200 //printf("SC FFT global init: vDSP initialised.\n");
201 #elif SC_FFT_FFTW
202 size_t maxSize = 1<<SC_FFT_LOG2_MAXSIZE;
203 float * buffer1 = (float*)fftwf_malloc((maxSize + 1) * sizeof(float));
204 float * buffer2 = (float*)fftwf_malloc((maxSize + 1) * sizeof(float));
205 for (int i= SC_FFT_LOG2_MINSIZE; i < SC_FFT_LOG2_MAXSIZE+1; ++i) {
206 size_t currentSize = 1<<i;
208 precompiledForwardPlans[i] = fftwf_plan_dft_r2c_1d(currentSize, buffer1, (fftwf_complex*) buffer2, FFTW_ESTIMATE);
209 precompiledBackwardPlans[i] = fftwf_plan_dft_c2r_1d(currentSize, (fftwf_complex*) buffer2, buffer1, FFTW_ESTIMATE);
211 precompiledForwardPlansInPlace[i] = fftwf_plan_dft_r2c_1d(currentSize, buffer1, (fftwf_complex*) buffer1, FFTW_ESTIMATE);
212 precompiledBackwardPlansInPlace[i] = fftwf_plan_dft_c2r_1d(currentSize, (fftwf_complex*) buffer1, buffer1, FFTW_ESTIMATE);
214 fftwf_free(buffer1);
215 fftwf_free(buffer2);
216 #endif
217 return false;
220 static bool dummy = scfft_global_initialization();
222 // You need to provide an intermediate "transform buffer". Size will depend on which underlying lib is being used.
223 // "fullsize" is the number of samples in the input buffer (inc any padding), aka the number of "points" in the FFT.
224 // Often in an SC plugin you can get this number from buf->samples if you're grabbing an external buffer.
225 // The input value is given in samples.
226 // The return value is given in bytes.
227 static size_t scfft_trbufsize(unsigned int fullsize)
229 #if SC_FFT_FFTW
230 // Transform buf is two floats "too big" because of FFTWF's output ordering
231 return (fullsize+2) * sizeof(float);
232 #else
233 // vDSP packs the nyquist in with the DC, so size is same as input buffer (plus zeropadding)
234 // Green does this too
235 return (fullsize ) * sizeof(float);
236 #endif
240 scfft * scfft_create(size_t fullsize, size_t winsize, SCFFT_WindowFunction wintype,
241 float *indata, float *outdata, SCFFT_Direction forward, SCFFT_Allocator & alloc)
243 char * chunk = (char*) alloc.alloc(sizeof(scfft) + scfft_trbufsize(fullsize));
244 if (!chunk)
245 return NULL;
247 scfft * f = (scfft*)chunk;
248 float *trbuf = (float*)(chunk + sizeof(scfft));
250 f->nfull = fullsize;
251 f->nwin = winsize;
252 f->log2nfull = LOG2CEIL(fullsize);
253 f->log2nwin = LOG2CEIL( winsize);
254 f->wintype = wintype;
255 f->indata = indata;
256 f->outdata = outdata;
257 f->trbuf = trbuf;
259 // Buffer is larger than the range of sizes we provide for at startup; we can get ready just-in-time though
260 if (fullsize > SC_FFT_MAXSIZE)
261 scfft_ensurewindow(f->log2nfull, f->log2nwin, wintype);
263 // The scale factors rescale the data to unity gain. The old Green lib did this itself, meaning scalefacs would here be 1...
264 if(forward){
265 #if SC_FFT_VDSP
266 f->scalefac = 0.5f;
267 #else // forward FFTW and Green factor
268 f->scalefac = 1.f;
269 #endif
270 } else { // backward FFTW and VDSP factor
271 #if SC_FFT_GREEN
272 f->scalefac = 1.f;
273 #else // fftw, vdsp
274 f->scalefac = 1.f / fullsize;
275 #endif
278 memset(trbuf, 0, scfft_trbufsize(fullsize));
280 return f;
283 static int largest_log2n = SC_FFT_LOG2_MAXSIZE;
285 // check the global list of windows incs ours; create if not.
286 // Note that expanding the table, if triggered, will cause a CPU hit as things are malloc'ed, realloc'ed, etc.
287 void scfft_ensurewindow(unsigned short log2_fullsize, unsigned short log2_winsize, short wintype)
289 // Ensure we have enough space to do our calcs
290 if(log2_fullsize > largest_log2n){
291 largest_log2n = log2_fullsize;
292 #if SC_FFT_VDSP
293 size_t newsize = (1 << largest_log2n) * sizeof(float) / 2;
294 splitBuf.realp = (float*) realloc (splitBuf.realp, newsize);
295 splitBuf.imagp = (float*) realloc (splitBuf.imagp, newsize);
296 #endif
299 // Ensure our window has been created
300 if((wintype != -1) && (fftWindow[wintype][log2_winsize] == 0)){
301 fftWindow[wintype][log2_winsize] = scfft_create_fftwindow(wintype, log2_winsize);
304 // Ensure our FFT twiddle factors (or whatever) have been created
305 #if SC_FFT_VDSP
306 if(fftSetup[log2_fullsize] == 0)
307 fftSetup[log2_fullsize] = vDSP_create_fftsetup (log2_fullsize, FFT_RADIX2);
308 #elif SC_FFT_GREEN
309 if(cosTable[log2_fullsize] == 0)
310 cosTable[log2_fullsize] = create_cosTable(log2_fullsize);
311 #endif
314 // these do the main jobs.
315 // Note: you DON"T need to call the windowing function yourself, it'll be applied by the _dofft and _doifft funcs.
316 static void scfft_dowindowing(float *data, unsigned int winsize, unsigned int fullsize, unsigned short log2_winsize,
317 short wintype, float scalefac)
319 if (wintype != kRectWindow) {
320 float *win = fftWindow[wintype][log2_winsize];
321 if (!win) return;
322 #if SC_FFT_VDSP
323 vDSP_vmul(data, 1, win, 1, data, 1, winsize);
324 #elif defined (NOVA_SIMD)
325 using namespace nova;
326 if ((vec<float>::objects_per_cacheline & winsize == 0) && vec<float>::is_aligned(data))
327 times_vec_simd(data, data, win, winsize);
328 else
329 times_vec(data, data, win, winsize);
330 #else
331 --win;
332 float *in = data - 1;
333 for (int i=0; i< winsize ; ++i) {
334 *++in *= *++win;
336 #endif
339 // scale factor is different for different libs. But the compiler switch here is about using vDSP's fast multiplication method.
340 #if SC_FFT_VDSP
341 vDSP_vsmul(data, 1, &scalefac, data, 1, winsize);
342 #else
343 for(int i=0; i<winsize; ++i){
344 data[i] *= scalefac;
346 #endif
348 // Zero-padding:
349 memset(data + winsize, 0, (fullsize - winsize) * sizeof(float));
352 void scfft_dofft(scfft * f)
354 // Data goes to transform buf
355 memcpy(f->trbuf, f->indata, f->nwin * sizeof(float));
356 scfft_dowindowing(f->trbuf, f->nwin, f->nfull, f->log2nwin, f->wintype, f->scalefac);
357 #if SC_FFT_FFTW
358 // forward transformation is in-place
359 fftwf_execute_dft_r2c(precompiledForwardPlansInPlace[f->log2nfull], f->trbuf, (fftwf_complex*)f->trbuf);
361 // Rearrange output data onto public buffer
362 memcpy(f->outdata, f->trbuf, f->nfull * sizeof(float));
363 f->outdata[1] = f->trbuf[f->nfull]; // Pack nyquist val in
364 #elif SC_FFT_VDSP
365 // Perform even-odd split
366 vDSP_ctoz((COMPLEX*) f->trbuf, 2, &splitBuf, 1, f->nfull >> 1);
367 // Now the actual FFT
368 vDSP_fft_zrip(fftSetup[f->log2nfull], &splitBuf, 1, f->log2nfull, FFT_FORWARD);
369 // Copy the data to the public output buf, transforming it back out of "split" representation
370 vDSP_ztoc(&splitBuf, 1, (DSPComplex*) f->outdata, 2, f->nfull >> 1);
371 #elif SC_FFT_GREEN
372 // Green FFT is in-place
373 rffts(f->trbuf, f->log2nfull, 1, cosTable[f->log2nfull]);
374 // Copy to public buffer
375 memcpy(f->outdata, f->trbuf, f->nfull * sizeof(float));
376 #endif
379 void scfft_doifft(scfft * f)
381 #if SC_FFT_FFTW
382 float *trbuf = f->trbuf;
383 size_t bytesize = f->nfull * sizeof(float);
384 memcpy(trbuf, f->indata, bytesize);
385 trbuf[1] = 0.f;
386 trbuf[f->nfull] = f->indata[1]; // Nyquist goes all the way down to the end of the line...
387 trbuf[f->nfull+1] = 0.f;
389 fftwf_execute_dft_c2r(precompiledBackwardPlans[f->log2nfull], (fftwf_complex*)trbuf, f->outdata);
391 #elif SC_FFT_VDSP
392 vDSP_ctoz((COMPLEX*) f->indata, 2, &splitBuf, 1, f->nfull >> 1);
393 vDSP_fft_zrip(fftSetup[f->log2nfull], &splitBuf, 1, f->log2nfull, FFT_INVERSE);
394 vDSP_ztoc(&splitBuf, 1, (DSPComplex*) f->outdata, 2, f->nfull >> 1);
395 #elif SC_FFT_GREEN
396 float *trbuf = f->trbuf;
397 size_t bytesize = f->nfull * sizeof(float);
398 memcpy(trbuf, f->indata, bytesize);
399 // Green FFT is in-place
400 riffts(trbuf, f->log2nfull, 1, cosTable[f->log2nfull]);
401 // Copy to public buffer
402 memcpy(f->outdata, trbuf, f->nwin * sizeof(float));
403 #endif
404 scfft_dowindowing(f->outdata, f->nwin, f->nfull, f->log2nwin, f->wintype, f->scalefac);
407 void scfft_destroy(scfft *f, SCFFT_Allocator & alloc)
409 alloc.free(f);