inline unary ops: fix sc_floor for non-sse4.1
[supercollider.git] / common / SC_fftlib.cpp
blob75ac65daf1b2b4f7f416e48891e20a2759139195
1 /*
2 SC_fftlib.cpp
3 An interface to abstract over different FFT libraries, for SuperCollider 3.
4 (c) 2007-2008 Dan Stowell, incorporating code from
5 SuperCollider 3 Copyright (c) 2002 James McCartney.
6 All rights reserved.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 NOTE:
23 vDSP uses a "SplitBuf" as an intermediate representation of the data.
24 For speed we keep this global, although this makes the code non-thread-safe.
25 (This is not new to this refactoring. Just worth noting.)
28 #include "clz.h"
29 #include <stdlib.h>
30 #include <cmath>
31 #include <stdio.h>
32 #include <cstring>
33 #include <cassert>
35 #include "SC_fftlib.h"
38 // We include vDSP even if not using for FFT, since we want to use some vectorised add/mul tricks
39 #if defined(__APPLE__) && !defined(SC_IPHONE)
40 #include "vecLib/vDSP.h"
41 #elif defined(SC_IPHONE)
42 #include <Accelerate/Accelerate.h>
43 #endif
45 ////////////////////////////////////////////////////////////////////////////////////////////////////
46 // Constants and structs
48 // Decisions here about which FFT library to use - vDSP only exists on Mac BTW.
49 // We include the relevant libs but also ensure that one, and only one, of them is active...
50 #if SC_FFT_NONE
51 // "SC_FFT_NONE" allows compilation without FFT support; only expected to be used on very limited platforms
52 #define SC_FFT_FFTW 0
53 #define SC_FFT_VDSP 0
54 #define SC_FFT_GREEN 0
55 #elif SC_FFT_GREEN
56 #define SC_FFT_FFTW 0
57 #define SC_FFT_VDSP 0
58 #define SC_FFT_GREEN 1
59 #elif !SC_FFT_FFTW && defined(__APPLE__) && !defined(SUPERNOVA)
60 #define SC_FFT_FFTW 0
61 #define SC_FFT_VDSP 1
62 #define SC_FFT_GREEN 0
63 #else
64 //#elif SC_FFT_FFTW
65 #define SC_FFT_FFTW 1
66 #define SC_FFT_VDSP 0
67 #define SC_FFT_GREEN 0
68 #include <fftw3.h>
69 #endif
71 // Note that things like *fftWindow actually allow for other sizes, to be created on user request.
72 #define SC_FFT_ABSOLUTE_MAXSIZE 2147483648
73 #define SC_FFT_LOG2_ABSOLUTE_MAXSIZE 31
74 #define SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1 32
76 // This struct is a bit like FFTW's idea of a "plan": it represents an FFT operation that may be applied once or repeatedly.
77 // It should be possible for indata and outdata to be the same, for quasi-in-place operation.
78 typedef struct scfft {
79 unsigned int nfull, nwin, log2nfull, log2nwin; // Lengths of full FFT frame, and the (possibly shorter) windowed data frame
80 short wintype;
81 float *indata, *outdata, *trbuf;
82 float scalefac; // Used to rescale the data to unity gain
83 #if SC_FFT_FFTW
84 fftwf_plan plan;
85 #endif
86 } scfft;
89 static float *fftWindow[2][SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1];
91 #if SC_FFT_VDSP
92 static FFTSetup fftSetup[SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1]; // vDSP setups, one per FFT size
93 static COMPLEX_SPLIT splitBuf; // Temp buf for holding rearranged data
94 #endif
96 #if SC_FFT_GREEN
97 extern "C" {
98 #include "fftlib.h"
99 static float *cosTable[SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1];
101 #endif
103 #define pi 3.1415926535898f
104 #define twopi 6.28318530717952646f
106 ////////////////////////////////////////////////////////////////////////////////////////////////////
107 // Functions
109 #if SC_FFT_GREEN
110 static float* create_cosTable(int log2n);
111 static float* create_cosTable(int log2n)
113 int size = 1 << log2n;
114 int size2 = size / 4 + 1;
115 float *win = (float*)malloc(size2 * sizeof(float));
116 double winc = twopi / size;
117 for (int i=0; i<size2; ++i) {
118 double w = i * winc;
119 win[i] = cos(w);
121 return win;
123 #endif
125 static float* scfft_create_fftwindow(int wintype, int log2n)
127 int size = 1 << log2n;
128 unsigned short i;
129 float *win = (float*)malloc(size * sizeof(float));
131 double winc;
132 switch(wintype) {
133 case kSineWindow:
134 winc = pi / size;
135 for (i=0; i<size; ++i) {
136 double w = i * winc;
137 win[i] = sin(w);
139 break;
140 case kHannWindow:
141 winc = twopi / size;
142 for (i=0; i<size; ++i) {
143 double w = i * winc;
144 win[i] = 0.5 - 0.5 * cos(w);
146 break;
149 return win;
153 static void scfft_ensurewindow(unsigned short log2_fullsize, unsigned short log2_winsize, short wintype);
155 static bool scfft_global_initialization (void)
157 for (int wintype=0; wintype<2; ++wintype) {
158 for (int i=0; i< SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1; ++i) {
159 fftWindow[wintype][i] = 0;
161 for (int i= SC_FFT_LOG2_MINSIZE; i < SC_FFT_LOG2_MAXSIZE+1; ++i) {
162 fftWindow[wintype][i] = scfft_create_fftwindow(wintype, i);
165 #if SC_FFT_GREEN
166 for (int i=0; i< SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1; ++i) {
167 cosTable[i] = 0;
169 for (int i= SC_FFT_LOG2_MINSIZE; i < SC_FFT_LOG2_MAXSIZE+1; ++i) {
170 cosTable[i] = create_cosTable(i);
172 printf("SC FFT global init: cosTable initialised.\n");
173 #elif SC_FFT_VDSP
174 // vDSP inits its twiddle factors
175 for (int i= SC_FFT_LOG2_MINSIZE; i < SC_FFT_LOG2_MAXSIZE+1; ++i) {
176 fftSetup[i] = vDSP_create_fftsetup (i, FFT_RADIX2);
177 if(fftSetup[i] == NULL){
178 printf("FFT ERROR: Mac vDSP library could not allocate FFT setup for size %i\n", 1<<i);
181 // vDSP prepares its memory-aligned buffer for rearranging input data.
182 // Note max size here - meaning max input buffer size is these two sizes added together.
183 // vec_malloc used in API docs, but apparently that's deprecated and malloc is sufficient for aligned memory on OSX.
184 splitBuf.realp = (float*) malloc ( SC_FFT_MAXSIZE * sizeof(float) / 2);
185 splitBuf.imagp = (float*) malloc ( SC_FFT_MAXSIZE * sizeof(float) / 2);
186 //printf("SC FFT global init: vDSP initialised.\n");
187 #elif SC_FFT_FFTW
188 //printf("SC FFT global init: FFTW, no init needed.\n");
189 #endif
190 return false;
193 static bool dummy = scfft_global_initialization();
195 // You need to provide an intermediate "transform buffer". Size will depend on which underlying lib is being used.
196 // "fullsize" is the number of samples in the input buffer (inc any padding), aka the number of "points" in the FFT.
197 // Often in an SC plugin you can get this number from buf->samples if you're grabbing an external buffer.
198 // The input value is given in samples.
199 // The return value is given in bytes.
200 static size_t scfft_trbufsize(unsigned int fullsize)
202 #if SC_FFT_FFTW
203 // Transform buf is two floats "too big" because of FFTWF's output ordering
204 return (fullsize+2) * sizeof(float);
205 #else
206 // vDSP packs the nyquist in with the DC, so size is same as input buffer (plus zeropadding)
207 // Green does this too
208 return (fullsize ) * sizeof(float);
209 #endif
213 scfft * scfft_create(size_t fullsize, size_t winsize, SCFFT_WindowFunction wintype,
214 float *indata, float *outdata, SCFFT_Direction forward, SCFFT_Allocator & alloc)
216 char * chunk = (char*) alloc.alloc(sizeof(scfft) + scfft_trbufsize(fullsize));
217 if (!chunk)
218 return NULL;
220 scfft * f = (scfft*)chunk;
221 float *trbuf = (float*)(chunk + sizeof(scfft));
223 f->nfull = fullsize;
224 f->nwin = winsize;
225 f->log2nfull = LOG2CEIL(fullsize);
226 f->log2nwin = LOG2CEIL( winsize);
227 f->wintype = wintype;
228 f->indata = indata;
229 f->outdata = outdata;
230 f->trbuf = trbuf;
232 // Buffer is larger than the range of sizes we provide for at startup; we can get ready just-in-time though
233 if (fullsize > SC_FFT_MAXSIZE)
234 scfft_ensurewindow(f->log2nfull, f->log2nwin, wintype);
236 #if SC_FFT_FFTW
237 if(forward)
238 f->plan = fftwf_plan_dft_r2c_1d(fullsize, trbuf, (fftwf_complex*) trbuf, FFTW_ESTIMATE);
239 else
240 f->plan = fftwf_plan_dft_c2r_1d(fullsize, (fftwf_complex*) trbuf, outdata, FFTW_ESTIMATE);
241 #endif
243 // The scale factors rescale the data to unity gain. The old Green lib did this itself, meaning scalefacs would here be 1...
244 if(forward){
245 #if SC_FFT_VDSP
246 f->scalefac = 0.5f;
247 #else // forward FFTW and Green factor
248 f->scalefac = 1.f;
249 #endif
250 } else { // backward FFTW and VDSP factor
251 #if SC_FFT_GREEN
252 f->scalefac = 1.f;
253 #else // fftw, vdsp
254 f->scalefac = 1.f / fullsize;
255 #endif
258 memset(trbuf, 0, scfft_trbufsize(fullsize));
260 return f;
263 static int largest_log2n = SC_FFT_LOG2_MAXSIZE;
265 // check the global list of windows incs ours; create if not.
266 // Note that expanding the table, if triggered, will cause a CPU hit as things are malloc'ed, realloc'ed, etc.
267 void scfft_ensurewindow(unsigned short log2_fullsize, unsigned short log2_winsize, short wintype)
269 // Ensure we have enough space to do our calcs
270 if(log2_fullsize > largest_log2n){
271 largest_log2n = log2_fullsize;
272 #if SC_FFT_VDSP
273 size_t newsize = (1 << largest_log2n) * sizeof(float) / 2;
274 splitBuf.realp = (float*) realloc (splitBuf.realp, newsize);
275 splitBuf.imagp = (float*) realloc (splitBuf.imagp, newsize);
276 #endif
279 // Ensure our window has been created
280 if((wintype != -1) && (fftWindow[wintype][log2_winsize] == 0)){
281 fftWindow[wintype][log2_winsize] = scfft_create_fftwindow(wintype, log2_winsize);
284 // Ensure our FFT twiddle factors (or whatever) have been created
285 #if SC_FFT_VDSP
286 if(fftSetup[log2_fullsize] == 0)
287 fftSetup[log2_fullsize] = vDSP_create_fftsetup (log2_fullsize, FFT_RADIX2);
288 #elif SC_FFT_GREEN
289 if(cosTable[log2_fullsize] == 0)
290 cosTable[log2_fullsize] = create_cosTable(log2_fullsize);
291 #endif
294 // these do the main jobs.
295 // Note: you DON"T need to call the windowing function yourself, it'll be applied by the _dofft and _doifft funcs.
296 static void scfft_dowindowing(float *data, unsigned int winsize, unsigned int fullsize, unsigned short log2_winsize,
297 short wintype, float scalefac)
299 int i;
300 if (wintype != kRectWindow) {
301 float *win = fftWindow[wintype][log2_winsize];
302 if (!win) return;
303 #if SC_FFT_VDSP
304 vDSP_vmul(data, 1, win, 1, data, 1, winsize);
305 #else
306 --win;
307 float *in = data - 1;
308 for (i=0; i< winsize ; ++i) {
309 *++in *= *++win;
311 #endif
315 // scale factor is different for different libs. But the compiler switch here is about using vDSP's fast multiplication method.
316 #if SC_FFT_VDSP
317 vDSP_vsmul(data, 1, &scalefac, data, 1, winsize);
318 #else
319 for(int i=0; i<winsize; ++i){
320 data[i] *= scalefac;
322 #endif
324 // Zero-padding:
325 memset(data + winsize, 0, (fullsize - winsize) * sizeof(float));
328 void scfft_dofft(scfft * f)
330 // Data goes to transform buf
331 memcpy(f->trbuf, f->indata, f->nwin * sizeof(float));
332 scfft_dowindowing(f->trbuf, f->nwin, f->nfull, f->log2nfull, f->wintype, f->scalefac);
333 #if SC_FFT_FFTW
334 fftwf_execute(f->plan);
335 // Rearrange output data onto public buffer
336 memcpy(f->outdata, f->trbuf, f->nfull * sizeof(float));
337 f->outdata[1] = f->trbuf[f->nfull]; // Pack nyquist val in
338 #elif SC_FFT_VDSP
339 // Perform even-odd split
340 vDSP_ctoz((COMPLEX*) f->trbuf, 2, &splitBuf, 1, f->nfull >> 1);
341 // Now the actual FFT
342 vDSP_fft_zrip(fftSetup[f->log2nfull], &splitBuf, 1, f->log2nfull, FFT_FORWARD);
343 // Copy the data to the public output buf, transforming it back out of "split" representation
344 vDSP_ztoc(&splitBuf, 1, (DSPComplex*) f->outdata, 2, f->nfull >> 1);
345 #elif SC_FFT_GREEN
346 // Green FFT is in-place
347 rffts(f->trbuf, f->log2nfull, 1, cosTable[f->log2nfull]);
348 // Copy to public buffer
349 memcpy(f->outdata, f->trbuf, f->nfull * sizeof(float));
350 #endif
353 void scfft_doifft(scfft * f)
355 #if SC_FFT_FFTW
356 float *trbuf = f->trbuf;
357 size_t bytesize = f->nfull * sizeof(float);
358 memcpy(trbuf, f->indata, bytesize);
359 trbuf[1] = 0.f;
360 trbuf[f->nfull] = f->indata[1]; // Nyquist goes all the way down to the end of the line...
361 trbuf[f->nfull+1] = 0.f;
363 fftwf_execute(f->plan);
364 // NB the plan already includes copying data to f->outbuf
366 #elif SC_FFT_VDSP
367 vDSP_ctoz((COMPLEX*) f->indata, 2, &splitBuf, 1, f->nfull >> 1);
368 vDSP_fft_zrip(fftSetup[f->log2nfull], &splitBuf, 1, f->log2nfull, FFT_INVERSE);
369 vDSP_ztoc(&splitBuf, 1, (DSPComplex*) f->outdata, 2, f->nfull >> 1);
370 #elif SC_FFT_GREEN
371 float *trbuf = f->trbuf;
372 size_t bytesize = f->nfull * sizeof(float);
373 memcpy(trbuf, f->indata, bytesize);
374 // Green FFT is in-place
375 riffts(trbuf, f->log2nfull, 1, cosTable[f->log2nfull]);
376 // Copy to public buffer
377 memcpy(f->outdata, trbuf, f->nwin * sizeof(float));
378 #endif
379 scfft_dowindowing(f->outdata, f->nwin, f->nfull, f->log2nfull, f->wintype, f->scalefac);
382 void scfft_destroy(scfft *f, SCFFT_Allocator & alloc)
384 #if SC_FFT_FFTW
385 fftwf_destroy_plan(f->plan);
386 #endif
387 alloc.free(f);