2 OnsetsDS - real time musical onset detection library.
3 Copyright (c) 2007 Dan Stowell. All rights reserved.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
35 ////////////////////////////////////////////////////////////////////////////////
39 #define ods_log1 -2.30258509
41 #define PI 3.1415926535898f
42 #define MINUSPI -3.1415926535898f
43 #define TWOPI 6.28318530717952646f
44 #define INV_TWOPI 0.1591549430919f
46 #define ods_abs(a) ((a)<0? -(a) : (a))
47 #define ods_max(a,b) (((a) > (b)) ? (a) : (b))
48 #define ods_min(a,b) (((a) < (b)) ? (a) : (b))
50 #define ODS_LOG_LOWER_LIMIT 2e-42
51 #define ODS_LOGOF_LOG_LOWER_LIMIT -96.0154267
52 #define ODS_ABSINVOF_LOGOF_LOG_LOWER_LIMIT 0.010414993
54 ////////////////////////////////////////////////////////////////////////////////
58 * Types of incoming FFT data format. OnsetsDS needs to know where the FFT
59 * data comes from in order to interpret it correctly.
61 enum onsetsds_fft_types
{
62 ODS_FFT_SC3_COMPLEX
, ///< SuperCollider, cartesian co-ords ("SCComplexBuf") - NB it's more efficient to provide polar data from SC
63 ODS_FFT_SC3_POLAR
, ///< SuperCollider, polar co-ords ("SCPolarBuf")
64 ODS_FFT_FFTW3_HC
, ///< FFTW <a href="http://www.fftw.org/fftw3_doc/The-Halfcomplex_002dformat-DFT.html">"halfcomplex"</a> format
65 ODS_FFT_FFTW3_R2C
///< FFTW regular format, typically produced using <a href="http://www.fftw.org/fftw3_doc/One_002dDimensional-DFTs-of-Real-Data.html#One_002dDimensional-DFTs-of-Real-Data">real-to-complex</a> transform
69 * Types of onset detection function
71 enum onsetsds_odf_types
{
72 ODS_ODF_POWER
, ///< Power
73 ODS_ODF_MAGSUM
, ///< Sum of magnitudes
74 ODS_ODF_COMPLEX
, ///< Complex-domain deviation
75 ODS_ODF_RCOMPLEX
, ///< Complex-domain deviation, rectified (only increases counted)
76 ODS_ODF_PHASE
, ///< Phase deviation
77 ODS_ODF_WPHASE
, ///< Weighted phase deviation
78 ODS_ODF_MKL
///< Modified Kullback-Liebler deviation
82 * Types of whitening - may not all be implemented yet.
84 enum onsetsds_wh_types
{
85 ODS_WH_NONE
, ///< No whitening - onsetsds_whiten() becomes a no-op
86 ODS_WH_ADAPT_MAX1
, ///< Adaptive whitening - tracks recent-peak-magnitude in each bin, normalises that to 1
87 ODS_WH_NORMMAX
, ///< Simple normalisation - each frame is normalised (independent of others) so largest magnitude becomes 1. Not implemented.
88 ODS_WH_NORMMEAN
///< Simple normalisation - each frame is normalised (independent of others) so mean magnitude becomes 1. Not implemented.
91 ////////////////////////////////////////////////////////////////////////////////
94 typedef struct OdsPolarBin
{ float mag
, phase
; } OdsPolarBin
;
96 typedef struct OdsPolarBuf
{
101 /// The main data structure for the onset detection routine
102 typedef struct OnsetsDS
{
103 /// "data" is a pointer to the memory that must be EXTERNALLY allocated.
104 /// Other pointers will point to locations within this memory.
106 *psp
, ///< Peak Spectral Profile - size is numbins+2, data is stored in order dc through to nyquist
107 *odfvals
, // odfvals[0] will be the current val, odfvals[1] prev, etc
108 *sortbuf
, // Used to calculate the median
109 *other
; // Typically stores data about the previous frame
110 OdsPolarBuf
* curr
; // Current FFT frame, as polar
113 srate
, ///< The sampling rate of the input audio. Set by onsetsds_init()
114 // Adaptive whitening params
115 relaxtime
, ///< Do NOT set this directly. Use onsetsds_setrelax() which will also update relaxcoef.
116 relaxcoef
, ///< Relaxation coefficient (memory coefficient). See also onsetsds_setrelax()
117 floor
, ///< floor - the lowest value that a PSP magnitude can take.
118 /// A parameter for the ODF. For most this is a magnitude threshold for a single bin to be considered;
119 /// but for #ODS_ODF_MKL it is the "epsilon" parameter.
121 /// Value used internally to scale ODF value according to the FFT frame size. Automatically set by onsetsds_init()
123 // ODF val after median processing
125 // Previous val is needed for threshold-crossing detection
127 /// Threshold (of ODF value, after median processing) for detection.
128 /// Values between 0 and 1 are expected, but outside this range may
129 /// sometimes be appropriate too.
132 int odftype
, ///< Choose from #onsetsds_odf_types
133 whtype
, ///< Choose from #onsetsds_wh_types
134 fftformat
; ///< Choose from #onsetsds_fft_types
135 bool whiten
, ///< Whether to apply whitening - onsetsds_init() decides this on your behalf
136 detected
,///< Output val - true if onset detected in curr frame
138 NOT YET USED: Whether to convert magnitudes to log domain before processing. This is done as follows:
139 Magnitudes below a log-lower-limit threshold (ODS_LOG_LOWER_LIMIT) are pushed up to that threshold (to avoid log(0) infinity problems),
140 then the log is taken. The values are re-scaled to a similar range as the linear-domain values (assumed to lie
141 between zero and approximately one) by subtracting log(ODS_LOG_LOWER_LIMIT) and then dividing by abs(log(ODS_LOG_LOWER_LIMIT)).
144 med_odd
; ///< Whether median span is odd or not (used internally)
147 /// Number of frames used in median calculation
149 /// Size of enforced gap between detections, measured in FFT frames.
151 size_t fftsize
, numbins
; // numbins is the count not including DC/nyq
155 ////////////////////////////////////////////////////////////////////////////////
156 // Function prototypes
160 * \defgroup MainUserFuncs Main user functions
165 * Determine how many bytes of memory must be allocated (e.g. using malloc) to
166 * accompany the OnsetsDS struct, operating using the specified settings (used to
167 * store part-processed FFT data etc). The user must
168 * call this, and then allocate the memory, BEFORE calling onsetsds_init().
169 * @param odftype Which onset detection function (ODF) you'll be using, chosen from #onsetsds_odf_types
170 * @param fftsize Size of FFT: 512 is recommended.
171 * @param medspan The number of past frames that will be used for median calculation during triggering
173 size_t onsetsds_memneeded (int odftype
, size_t fftsize
, unsigned int medspan
);
176 * Initialise the OnsetsDS struct and its associated memory, ready to detect
177 * onsets using the specified settings. Must be called before any call to
178 * onsetsds_process().
180 * Note: you can change the onset detection function type in mid-operation
181 * by calling onsetsds_init() again, but because memory will be reset this
182 * will behave as if starting from scratch (rather than being aware of the past
183 * few frames of sound). Do not attempt to change the
184 * onset detection function in a more hacky way (e.g. fiddling with the struct)
185 * because memory is set up differently for each of the different ODFs.
186 * @param ods An instance of the OnsetsDS struct
187 * @param odsdata A pointer to the memory allocated, size given by onsetsds_memneeded().
188 * @param fftformat Which format of FFT data is to be expected, chosen from #onsetsds_fft_types
189 * @param odftype Which onset detection function (ODF) you'll be using, chosen from #onsetsds_odf_types
190 * @param fftsize Size of FFT: 512 or 1024 is recommended.
191 * @param medspan The number of past frames that will be used for median calculation during triggering
192 * @param srate The sampling rate of the input audio
194 void onsetsds_init(OnsetsDS
* ods
, float* odsdata
, int fftformat
,
195 int odftype
, size_t fftsize
, unsigned int medspan
, float srate
);
198 * Process a single FFT data frame in the audio signal. Note that processing
199 * assumes that each call to onsetsds_process() is on a subsequent frame in
200 * the same audio stream - to handle multiple streams you must use separate
201 * OnsetsDS structs and memory!
203 * This function's main purpose is to call some of the library's other functions,
204 * in the expected sequence.
206 bool onsetsds_process(OnsetsDS
* ods
, float* fftbuf
);
211 ////////////////////////////////////////////////////////////////////////////////
212 // Function prototypes less commonly called by users
215 * \defgroup LessCommonFuncs Other useful functions
219 * Set the "memory coefficient" indirectly via the time for the
220 * memory to decay by 60 dB.
221 * @param ods The OnsetsDS
222 * @param time The time in seconds
223 * @param hopsize The FFT frame hopsize (typically this will be half the FFT frame size)
225 void onsetsds_setrelax(OnsetsDS
* ods
, float time
, size_t hopsize
);
229 ////////////////////////////////////////////////////////////////////////////////
230 // Function prototypes not typically called by users
233 * \defgroup OtherFuncs Other functions, not typically called by users
237 * Load the current frame of FFT data into the OnsetsDS struct.
239 * Not typically called directly by users since onsetsds_process() calls this.
241 void onsetsds_loadframe(OnsetsDS
* ods
, float* fftbuf
);
244 * Apply adaptive whitening to the FFT data in the OnsetsDS struct.
246 * Not typically called directly by users since onsetsds_process() calls this.
248 void onsetsds_whiten(OnsetsDS
* ods
);
251 * Calculate the Onset Detection Function (includes scaling ODF outputs to
254 * Not typically called directly by users since onsetsds_process() calls this.
256 void onsetsds_odf(OnsetsDS
* ods
);
259 * Detects salient peaks in Onset Detection Function by removing the median,
260 * then thresholding. Afterwards, the member ods.detected will indicate whether
261 * or not an onset was detected.
263 * Not typically called directly by users since onsetsds_process() calls this.
265 void onsetsds_detect(OnsetsDS
* ods
);
269 ////////////////////////////////////////////////////////////////////////////////