common/SC_fftlib.cpp

   1 /*
   2 SC_fftlib.cpp
   3 An interface to abstract over different FFT libraries, for SuperCollider 3.
   4 (c) 2007-2008 Dan Stowell, incorporating code from
   5         SuperCollider 3 Copyright (c) 2002 James McCartney.
   6         All rights reserved.
   7
   8     This program is free software; you can redistribute it and/or modify
   9     it under the terms of the GNU General Public License as published by
  10     the Free Software Foundation; either version 2 of the License, or
  11     (at your option) any later version.
  12
  13     This program is distributed in the hope that it will be useful,
  14     but WITHOUT ANY WARRANTY; without even the implied warranty of
  15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16     GNU General Public License for more details.
  17
  18     You should have received a copy of the GNU General Public License
  19     along with this program; if not, write to the Free Software
  20     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
  21
  22 NOTE:
  23 vDSP uses a "SplitBuf" as an intermediate representation of the data.
  24 For speed we keep this global, although this makes the code non-thread-safe.
  25 (This is not new to this refactoring. Just worth noting.)
  26 */
  27
  28 #include "clz.h"
  29 #include <stdlib.h>
  30 #include <cmath>
  31 #include <stdio.h>
  32 #include <cstring>
  33 #include <cassert>
  34
  35 #include "SC_fftlib.h"
  36
  37
  38 // We include vDSP even if not using for FFT, since we want to use some vectorised add/mul tricks
  39 #if defined(__APPLE__) && !defined(SC_IPHONE)
  40         #include "vecLib/vDSP.h"
  41 #elif defined(SC_IPHONE)
  42         #include <Accelerate/Accelerate.h>
  43 #endif
  44
  45 ////////////////////////////////////////////////////////////////////////////////////////////////////
  46 // Constants and structs
  47
  48 // Decisions here about which FFT library to use - vDSP only exists on Mac BTW.
  49 // We include the relevant libs but also ensure that one, and only one, of them is active...
  50 #if SC_FFT_NONE
  51         // "SC_FFT_NONE" allows compilation without FFT support; only expected to be used on very limited platforms
  52         #define SC_FFT_FFTW 0
  53         #define SC_FFT_VDSP 0
  54         #define SC_FFT_GREEN 0
  55 #elif SC_FFT_GREEN
  56         #define SC_FFT_FFTW 0
  57         #define SC_FFT_VDSP 0
  58         #define SC_FFT_GREEN 1
  59 #elif !SC_FFT_FFTW && defined(__APPLE__) && !defined(SUPERNOVA)
  60         #define SC_FFT_FFTW 0
  61         #define SC_FFT_VDSP 1
  62         #define SC_FFT_GREEN 0
  63 #else
  64 //#elif SC_FFT_FFTW
  65         #define SC_FFT_FFTW 1
  66         #define SC_FFT_VDSP 0
  67         #define SC_FFT_GREEN 0
  68         #include <fftw3.h>
  69 #endif
  70
  71 // Note that things like *fftWindow actually allow for other sizes, to be created on user request.
  72 #define SC_FFT_ABSOLUTE_MAXSIZE 2147483648
  73 #define SC_FFT_LOG2_ABSOLUTE_MAXSIZE 31
  74 #define SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1 32
  75
  76 // This struct is a bit like FFTW's idea of a "plan": it represents an FFT operation that may be applied once or repeatedly.
  77 // It should be possible for indata and outdata to be the same, for quasi-in-place operation.
  78 typedef struct scfft {
  79         unsigned int nfull, nwin, log2nfull, log2nwin; // Lengths of full FFT frame, and the (possibly shorter) windowed data frame
  80         short wintype;
  81         float *indata, *outdata, *trbuf;
  82         float scalefac; // Used to rescale the data to unity gain
  83         #if SC_FFT_FFTW
  84                 fftwf_plan plan;
  85         #endif
  86 } scfft;
  87
  88
  89 static float *fftWindow[2][SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1];
  90
  91 #if SC_FFT_VDSP
  92         static FFTSetup fftSetup[SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1]; // vDSP setups, one per FFT size
  93         static COMPLEX_SPLIT splitBuf; // Temp buf for holding rearranged data
  94 #endif
  95
  96 #if SC_FFT_GREEN
  97         extern "C" {
  98                 #include "fftlib.h"
  99                 static float *cosTable[SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1];
 100         }
 101 #endif
 102
 103 #define pi 3.1415926535898f
 104 #define twopi 6.28318530717952646f
 105
 106 ////////////////////////////////////////////////////////////////////////////////////////////////////
 107 // Functions
 108
 109 #if SC_FFT_GREEN
 110 static float* create_cosTable(int log2n);
 111 static float* create_cosTable(int log2n)
 112 {
 113         int size = 1 << log2n;
 114         int size2 = size / 4 + 1;
 115         float *win = (float*)malloc(size2 * sizeof(float));
 116         double winc = twopi / size;
 117         for (int i=0; i<size2; ++i) {
 118                 double w = i * winc;
 119                 win[i] = cos(w);
 120         }
 121         return win;
 122 }
 123 #endif
 124
 125 static float* scfft_create_fftwindow(int wintype, int log2n)
 126 {
 127         int size = 1 << log2n;
 128         unsigned short i;
 129         float *win = (float*)malloc(size * sizeof(float));
 130
 131         double winc;
 132         switch(wintype) {
 133                 case kSineWindow:
 134                         winc = pi / size;
 135                         for (i=0; i<size; ++i) {
 136                                 double w = i * winc;
 137                                 win[i] = sin(w);
 138                         }
 139                         break;
 140                 case kHannWindow:
 141                         winc = twopi / size;
 142                         for (i=0; i<size; ++i) {
 143                                 double w = i * winc;
 144                                 win[i] = 0.5 - 0.5 * cos(w);
 145                         }
 146                         break;
 147         }
 148
 149         return win;
 150
 151 }
 152
 153 static void scfft_ensurewindow(unsigned short log2_fullsize, unsigned short log2_winsize, short wintype);
 154
 155 static bool scfft_global_initialization (void)
 156 {
 157         for (int wintype=0; wintype<2; ++wintype) {
 158                 for (int i=0; i< SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1; ++i) {
 159                         fftWindow[wintype][i] = 0;
 160                 }
 161                 for (int i= SC_FFT_LOG2_MINSIZE; i < SC_FFT_LOG2_MAXSIZE+1; ++i) {
 162                         fftWindow[wintype][i] = scfft_create_fftwindow(wintype, i);
 163                 }
 164         }
 165 #if SC_FFT_GREEN
 166         for (int i=0; i< SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1; ++i) {
 167                 cosTable[i] = 0;
 168         }
 169         for (int i= SC_FFT_LOG2_MINSIZE; i < SC_FFT_LOG2_MAXSIZE+1; ++i) {
 170                 cosTable[i] = create_cosTable(i);
 171         }
 172         printf("SC FFT global init: cosTable initialised.\n");
 173 #elif SC_FFT_VDSP
 174         // vDSP inits its twiddle factors
 175         for (int i= SC_FFT_LOG2_MINSIZE; i < SC_FFT_LOG2_MAXSIZE+1; ++i) {
 176                 fftSetup[i] = vDSP_create_fftsetup (i, FFT_RADIX2);
 177                 if(fftSetup[i] == NULL){
 178                         printf("FFT ERROR: Mac vDSP library could not allocate FFT setup for size %i\n", 1<<i);
 179                 }
 180         }
 181         // vDSP prepares its memory-aligned buffer for rearranging input data.
 182         // Note max size here - meaning max input buffer size is these two sizes added together.
 183         // vec_malloc used in API docs, but apparently that's deprecated and malloc is sufficient for aligned memory on OSX.
 184         splitBuf.realp = (float*) malloc ( SC_FFT_MAXSIZE * sizeof(float) / 2);
 185         splitBuf.imagp = (float*) malloc ( SC_FFT_MAXSIZE * sizeof(float) / 2);
 186         //printf("SC FFT global init: vDSP initialised.\n");
 187 #elif SC_FFT_FFTW
 188         //printf("SC FFT global init: FFTW, no init needed.\n");
 189 #endif
 190         return false;
 191 }
 192
 193 static bool dummy = scfft_global_initialization();
 194
 195 // You need to provide an intermediate "transform buffer". Size will depend on which underlying lib is being used.
 196 // "fullsize" is the number of samples in the input buffer (inc any padding), aka the number of "points" in the FFT.
 197 //   Often in an SC plugin you can get this number from buf->samples if you're grabbing an external buffer.
 198 // The input value is given in samples.
 199 // The return value is given in bytes.
 200 static size_t scfft_trbufsize(unsigned int fullsize)
 201 {
 202 #if SC_FFT_FFTW
 203         // Transform buf is two floats "too big" because of FFTWF's output ordering
 204         return (fullsize+2) * sizeof(float);
 205 #else
 206         // vDSP packs the nyquist in with the DC, so size is same as input buffer (plus zeropadding)
 207         // Green does this too
 208         return (fullsize  ) * sizeof(float);
 209 #endif
 210 }
 211
 212
 213 scfft * scfft_create(size_t fullsize, size_t winsize, SCFFT_WindowFunction wintype,
 214                                          float *indata, float *outdata, SCFFT_Direction forward, SCFFT_Allocator & alloc)
 215 {
 216         char * chunk = (char*) alloc.alloc(sizeof(scfft) + scfft_trbufsize(fullsize));
 217         if (!chunk)
 218                 return NULL;
 219
 220         scfft * f = (scfft*)chunk;
 221         float *trbuf = (float*)(chunk + sizeof(scfft));
 222
 223         f->nfull = fullsize;
 224         f->nwin  =  winsize;
 225         f->log2nfull = LOG2CEIL(fullsize);
 226         f->log2nwin  = LOG2CEIL( winsize);
 227         f->wintype = wintype;
 228         f->indata  = indata;
 229         f->outdata = outdata;
 230         f->trbuf   = trbuf;
 231
 232         // Buffer is larger than the range of sizes we provide for at startup; we can get ready just-in-time though
 233         if (fullsize > SC_FFT_MAXSIZE)
 234                 scfft_ensurewindow(f->log2nfull, f->log2nwin, wintype);
 235
 236 #if SC_FFT_FFTW
 237         if(forward)
 238                 f->plan = fftwf_plan_dft_r2c_1d(fullsize, trbuf, (fftwf_complex*) trbuf, FFTW_ESTIMATE);
 239         else
 240                 f->plan = fftwf_plan_dft_c2r_1d(fullsize, (fftwf_complex*) trbuf, outdata, FFTW_ESTIMATE);
 241 #endif
 242
 243         // The scale factors rescale the data to unity gain. The old Green lib did this itself, meaning scalefacs would here be 1...
 244         if(forward){
 245 #if SC_FFT_VDSP
 246                 f->scalefac = 0.5f;
 247 #else // forward FFTW and Green factor
 248                 f->scalefac = 1.f;
 249 #endif
 250         } else { // backward FFTW and VDSP factor
 251 #if SC_FFT_GREEN
 252                 f->scalefac = 1.f;
 253 #else  // fftw, vdsp
 254                 f->scalefac = 1.f / fullsize;
 255 #endif
 256         }
 257
 258         memset(trbuf, 0, scfft_trbufsize(fullsize));
 259
 260         return f;
 261 }
 262
 263 static int largest_log2n = SC_FFT_LOG2_MAXSIZE;
 264
 265 // check the global list of windows incs ours; create if not.
 266 // Note that expanding the table, if triggered, will cause a CPU hit as things are malloc'ed, realloc'ed, etc.
 267 void scfft_ensurewindow(unsigned short log2_fullsize, unsigned short log2_winsize, short wintype)
 268 {
 269         // Ensure we have enough space to do our calcs
 270         if(log2_fullsize > largest_log2n){
 271                 largest_log2n = log2_fullsize;
 272                 #if SC_FFT_VDSP
 273                         size_t newsize = (1 << largest_log2n) * sizeof(float) / 2;
 274                         splitBuf.realp = (float*) realloc (splitBuf.realp, newsize);
 275                         splitBuf.imagp = (float*) realloc (splitBuf.imagp, newsize);
 276                 #endif
 277         }
 278
 279         // Ensure our window has been created
 280         if((wintype != -1) && (fftWindow[wintype][log2_winsize] == 0)){
 281                 fftWindow[wintype][log2_winsize] = scfft_create_fftwindow(wintype, log2_winsize);
 282         }
 283
 284         // Ensure our FFT twiddle factors (or whatever) have been created
 285         #if SC_FFT_VDSP
 286                 if(fftSetup[log2_fullsize] == 0)
 287                         fftSetup[log2_fullsize] = vDSP_create_fftsetup (log2_fullsize, FFT_RADIX2);
 288         #elif SC_FFT_GREEN
 289                 if(cosTable[log2_fullsize] == 0)
 290                         cosTable[log2_fullsize] = create_cosTable(log2_fullsize);
 291         #endif
 292 }
 293
 294 // these do the main jobs.
 295 // Note: you DON"T need to call the windowing function yourself, it'll be applied by the _dofft and _doifft funcs.
 296 static void scfft_dowindowing(float *data, unsigned int winsize, unsigned int fullsize, unsigned short log2_winsize,
 297                                                           short wintype, float scalefac)
 298 {
 299         int i;
 300         if (wintype != kRectWindow) {
 301                 float *win = fftWindow[wintype][log2_winsize];
 302                 if (!win) return;
 303                 #if SC_FFT_VDSP
 304                         vDSP_vmul(data, 1, win, 1, data, 1, winsize);
 305                 #else
 306                         --win;
 307                         float *in = data - 1;
 308                         for (i=0; i< winsize ; ++i) {
 309                                 *++in *= *++win;
 310                         }
 311                 #endif
 312
 313         }
 314
 315                 // scale factor is different for different libs. But the compiler switch here is about using vDSP's fast multiplication method.
 316         #if SC_FFT_VDSP
 317                 vDSP_vsmul(data, 1, &scalefac, data, 1, winsize);
 318         #else
 319                 for(int i=0; i<winsize; ++i){
 320                         data[i] *= scalefac;
 321                 }
 322         #endif
 323
 324         // Zero-padding:
 325         memset(data + winsize, 0, (fullsize - winsize) * sizeof(float));
 326 }
 327
 328 void scfft_dofft(scfft * f)
 329 {
 330         // Data goes to transform buf
 331         memcpy(f->trbuf, f->indata, f->nwin * sizeof(float));
 332         scfft_dowindowing(f->trbuf, f->nwin, f->nfull, f->log2nfull, f->wintype, f->scalefac);
 333         #if SC_FFT_FFTW
 334                 fftwf_execute(f->plan);
 335                 // Rearrange output data onto public buffer
 336                 memcpy(f->outdata, f->trbuf, f->nfull * sizeof(float));
 337                 f->outdata[1] = f->trbuf[f->nfull]; // Pack nyquist val in
 338         #elif SC_FFT_VDSP
 339                 // Perform even-odd split
 340                 vDSP_ctoz((COMPLEX*) f->trbuf, 2, &splitBuf, 1, f->nfull >> 1);
 341                 // Now the actual FFT
 342                 vDSP_fft_zrip(fftSetup[f->log2nfull], &splitBuf, 1, f->log2nfull, FFT_FORWARD);
 343                 // Copy the data to the public output buf, transforming it back out of "split" representation
 344                 vDSP_ztoc(&splitBuf, 1, (DSPComplex*) f->outdata, 2, f->nfull >> 1);
 345         #elif SC_FFT_GREEN
 346                 // Green FFT is in-place
 347                 rffts(f->trbuf, f->log2nfull, 1, cosTable[f->log2nfull]);
 348                 // Copy to public buffer
 349                 memcpy(f->outdata, f->trbuf, f->nfull * sizeof(float));
 350         #endif
 351 }
 352
 353 void scfft_doifft(scfft * f)
 354 {
 355         #if SC_FFT_FFTW
 356                 float *trbuf = f->trbuf;
 357                 size_t bytesize = f->nfull * sizeof(float);
 358                 memcpy(trbuf, f->indata, bytesize);
 359                 trbuf[1] = 0.f;
 360                 trbuf[f->nfull] = f->indata[1];  // Nyquist goes all the way down to the end of the line...
 361                 trbuf[f->nfull+1] = 0.f;
 362
 363                 fftwf_execute(f->plan);
 364                 // NB the plan already includes copying data to f->outbuf
 365
 366         #elif SC_FFT_VDSP
 367                 vDSP_ctoz((COMPLEX*) f->indata, 2, &splitBuf, 1, f->nfull >> 1);
 368                 vDSP_fft_zrip(fftSetup[f->log2nfull], &splitBuf, 1, f->log2nfull, FFT_INVERSE);
 369                 vDSP_ztoc(&splitBuf, 1, (DSPComplex*) f->outdata, 2, f->nfull >> 1);
 370         #elif SC_FFT_GREEN
 371                 float *trbuf = f->trbuf;
 372                 size_t bytesize = f->nfull * sizeof(float);
 373                 memcpy(trbuf, f->indata, bytesize);
 374                 // Green FFT is in-place
 375                 riffts(trbuf, f->log2nfull, 1, cosTable[f->log2nfull]);
 376                 // Copy to public buffer
 377                 memcpy(f->outdata, trbuf, f->nwin * sizeof(float));
 378         #endif
 379         scfft_dowindowing(f->outdata, f->nwin, f->nfull, f->log2nfull, f->wintype, f->scalefac);
 380 }
 381
 382 void scfft_destroy(scfft *f, SCFFT_Allocator & alloc)
 383 {
 384 #if SC_FFT_FFTW
 385         fftwf_destroy_plan(f->plan);
 386 #endif
 387         alloc.free(f);
 388 }