1 /*****************************************************************************
2 * sofalizer.c : SOFAlizer filter for virtual binaural acoustics
3 *****************************************************************************
4 * Copyright (C) 2013-2015 Andreas Fuchs, Wolfgang Hrauda,
5 * Acoustics Research Institute (ARI), Vienna, Austria
7 * Authors: Andreas Fuchs <andi.fuchs.mail@gmail.com>
8 * Wolfgang Hrauda <wolfgang.hrauda@gmx.at>
10 * SOFAlizer project coordinator at ARI, main developer of SOFA:
11 * Piotr Majdak <piotr@majdak.at>
13 * This program is free software; you can redistribute it and/or modify it
14 * under the terms of the GNU Lesser General Public License as published by
15 * the Free Software Foundation; either version 2.1 of the License, or
16 * (at your option) any later version.
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License for more details.
23 * You should have received a copy of the GNU Lesser General Public License
24 * along with this program; if not, write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
26 *****************************************************************************/
31 #include "libavutil/mem.h"
32 #include "libavutil/tx.h"
33 #include "libavutil/avstring.h"
34 #include "libavutil/channel_layout.h"
35 #include "libavutil/float_dsp.h"
36 #include "libavutil/intmath.h"
37 #include "libavutil/opt.h"
44 #define FREQUENCY_DOMAIN 1
46 typedef struct MySofa
{ /* contains data of one SOFA file */
47 struct MYSOFA_HRTF
*hrtf
;
48 struct MYSOFA_LOOKUP
*lookup
;
49 struct MYSOFA_NEIGHBORHOOD
*neighborhood
;
50 int ir_samples
; /* length of one impulse response (IR) */
51 int n_samples
; /* ir_samples to next power of 2 */
52 float *lir
, *rir
; /* IRs (time-domain) */
57 typedef struct VirtualSpeaker
{
63 typedef struct SOFAlizerContext
{
66 char *filename
; /* name of SOFA file */
67 MySofa sofa
; /* contains data of the SOFA file */
69 int sample_rate
; /* sample rate from SOFA file */
70 float *speaker_azim
; /* azimuth of the virtual loudspeakers */
71 float *speaker_elev
; /* elevation of the virtual loudspeakers */
72 char *speakers_pos
; /* custom positions of the virtual loudspeakers */
73 float lfe_gain
; /* initial gain for the LFE channel */
74 float gain_lfe
; /* gain applied to LFE channel */
75 int lfe_channel
; /* LFE channel position in channel layout */
77 int n_conv
; /* number of channels to convolute */
79 /* buffer variables (for convolution) */
80 float *ringbuffer
[2]; /* buffers input samples, length of one buffer: */
81 /* no. input ch. (incl. LFE) x buffer_length */
82 int write
[2]; /* current write position to ringbuffer */
83 int buffer_length
; /* is: longest IR plus max. delay in all SOFA files */
84 /* then choose next power of 2 */
85 int n_fft
; /* number of samples in one FFT block */
88 /* netCDF variables */
89 int *delay
[2]; /* broadband delay for each channel/IR to be convolved */
91 float *data_ir
[2]; /* IRs for all channels to be convolved */
92 /* (this excludes the LFE) */
94 AVComplexFloat
*in_fft
[2]; /* Array to hold input FFT values */
95 AVComplexFloat
*out_fft
[2]; /* Array to hold output FFT values */
96 AVComplexFloat
*temp_afft
[2]; /* Array to accumulate FFT values prior to IFFT */
98 /* control variables */
99 float gain
; /* filter gain (in dB) */
100 float rotation
; /* rotation of virtual loudspeakers (in degrees) */
101 float elevation
; /* elevation of virtual loudspeakers (in deg.) */
102 float radius
; /* distance virtual loudspeakers to listener (in metres) */
103 int type
; /* processing type */
104 int framesize
; /* size of buffer */
105 int normalize
; /* should all IRs be normalized upon import ? */
106 int interpolate
; /* should wanted IRs be interpolated from neighbors ? */
107 int minphase
; /* should all IRs be minphased upon import ? */
108 float anglestep
; /* neighbor search angle step, in agles */
109 float radstep
; /* neighbor search radius step, in meters */
111 VirtualSpeaker vspkrpos
[64];
113 AVTXContext
*fft
[2], *ifft
[2];
114 av_tx_fn tx_fn
[2], itx_fn
[2];
115 AVComplexFloat
*data_hrtf
[2];
117 AVFloatDSPContext
*fdsp
;
120 static int close_sofa(struct MySofa
*sofa
)
122 if (sofa
->neighborhood
)
123 mysofa_neighborhood_free(sofa
->neighborhood
);
124 sofa
->neighborhood
= NULL
;
126 mysofa_lookup_free(sofa
->lookup
);
129 mysofa_free(sofa
->hrtf
);
131 av_freep(&sofa
->fir
);
136 static int preload_sofa(AVFilterContext
*ctx
, char *filename
, int *samplingrate
)
138 struct SOFAlizerContext
*s
= ctx
->priv
;
139 struct MYSOFA_HRTF
*mysofa
;
143 mysofa
= mysofa_load(filename
, &ret
);
144 s
->sofa
.hrtf
= mysofa
;
145 if (ret
|| !mysofa
) {
146 av_log(ctx
, AV_LOG_ERROR
, "Can't find SOFA-file '%s'\n", filename
);
147 return AVERROR(EINVAL
);
150 ret
= mysofa_check(mysofa
);
151 if (ret
!= MYSOFA_OK
) {
152 av_log(ctx
, AV_LOG_ERROR
, "Selected SOFA file is invalid. Please select valid SOFA file.\n");
157 mysofa_loudness(s
->sofa
.hrtf
);
160 mysofa_minphase(s
->sofa
.hrtf
, 0.01f
);
162 mysofa_tocartesian(s
->sofa
.hrtf
);
164 s
->sofa
.lookup
= mysofa_lookup_init(s
->sofa
.hrtf
);
165 if (s
->sofa
.lookup
== NULL
)
166 return AVERROR(EINVAL
);
169 s
->sofa
.neighborhood
= mysofa_neighborhood_init_withstepdefine(s
->sofa
.hrtf
,
174 s
->sofa
.fir
= av_calloc(s
->sofa
.hrtf
->N
* s
->sofa
.hrtf
->R
, sizeof(*s
->sofa
.fir
));
176 return AVERROR(ENOMEM
);
178 if (mysofa
->DataSamplingRate
.elements
!= 1)
179 return AVERROR(EINVAL
);
180 av_log(ctx
, AV_LOG_DEBUG
, "Original IR length: %d.\n", mysofa
->N
);
181 *samplingrate
= mysofa
->DataSamplingRate
.values
[0];
182 license
= mysofa_getAttribute(mysofa
->attributes
, (char *)"License");
184 av_log(ctx
, AV_LOG_INFO
, "SOFA license: %s\n", license
);
189 static int parse_channel_name(AVFilterContext
*ctx
, char **arg
, int *rchannel
)
192 enum AVChannel channel_id
= 0;
195 /* try to parse a channel name, e.g. "FL" */
196 if (av_sscanf(*arg
, "%7[A-Z]%n", buf
, &len
)) {
197 channel_id
= av_channel_from_string(buf
);
198 if (channel_id
< 0 || channel_id
>= 64) {
199 av_log(ctx
, AV_LOG_WARNING
, "Failed to parse \'%s\' as channel name.\n", buf
);
200 return AVERROR(EINVAL
);
203 *rchannel
= channel_id
;
206 } else if (av_sscanf(*arg
, "%d%n", &channel_id
, &len
) == 1) {
207 if (channel_id
< 0 || channel_id
>= 64) {
208 av_log(ctx
, AV_LOG_WARNING
, "Failed to parse \'%d\' as channel number.\n", channel_id
);
209 return AVERROR(EINVAL
);
211 *rchannel
= channel_id
;
215 return AVERROR(EINVAL
);
218 static void parse_speaker_pos(AVFilterContext
*ctx
)
220 SOFAlizerContext
*s
= ctx
->priv
;
221 char *arg
, *tokenizer
, *p
, *args
= av_strdup(s
->speakers_pos
);
227 while ((arg
= av_strtok(p
, "|", &tokenizer
))) {
232 if (parse_channel_name(ctx
, &arg
, &out_ch_id
)) {
235 if (av_sscanf(arg
, "%f %f", &azim
, &elev
) == 2) {
236 s
->vspkrpos
[out_ch_id
].set
= 1;
237 s
->vspkrpos
[out_ch_id
].azim
= azim
;
238 s
->vspkrpos
[out_ch_id
].elev
= elev
;
239 } else if (av_sscanf(arg
, "%f", &azim
) == 1) {
240 s
->vspkrpos
[out_ch_id
].set
= 1;
241 s
->vspkrpos
[out_ch_id
].azim
= azim
;
242 s
->vspkrpos
[out_ch_id
].elev
= 0;
249 static int get_speaker_pos(AVFilterContext
*ctx
,
250 float *speaker_azim
, float *speaker_elev
)
252 struct SOFAlizerContext
*s
= ctx
->priv
;
253 AVChannelLayout
*channel_layout
= &ctx
->inputs
[0]->ch_layout
;
254 float azim
[64] = { 0 };
255 float elev
[64] = { 0 };
256 int ch
, n_conv
= ctx
->inputs
[0]->ch_layout
.nb_channels
; /* get no. input channels */
258 if (n_conv
< 0 || n_conv
> 64)
259 return AVERROR(EINVAL
);
264 parse_speaker_pos(ctx
);
266 /* set speaker positions according to input channel configuration: */
267 for (ch
= 0; ch
< n_conv
; ch
++) {
268 int chan
= av_channel_layout_channel_from_index(channel_layout
, ch
);
271 case AV_CHAN_FRONT_LEFT
: azim
[ch
] = 30; break;
272 case AV_CHAN_FRONT_RIGHT
: azim
[ch
] = 330; break;
273 case AV_CHAN_FRONT_CENTER
: azim
[ch
] = 0; break;
274 case AV_CHAN_LOW_FREQUENCY
:
275 case AV_CHAN_LOW_FREQUENCY_2
: s
->lfe_channel
= ch
; break;
276 case AV_CHAN_BACK_LEFT
: azim
[ch
] = 150; break;
277 case AV_CHAN_BACK_RIGHT
: azim
[ch
] = 210; break;
278 case AV_CHAN_BACK_CENTER
: azim
[ch
] = 180; break;
279 case AV_CHAN_SIDE_LEFT
: azim
[ch
] = 90; break;
280 case AV_CHAN_SIDE_RIGHT
: azim
[ch
] = 270; break;
281 case AV_CHAN_FRONT_LEFT_OF_CENTER
: azim
[ch
] = 15; break;
282 case AV_CHAN_FRONT_RIGHT_OF_CENTER
: azim
[ch
] = 345; break;
283 case AV_CHAN_TOP_CENTER
: azim
[ch
] = 0;
284 elev
[ch
] = 90; break;
285 case AV_CHAN_TOP_FRONT_LEFT
: azim
[ch
] = 30;
286 elev
[ch
] = 45; break;
287 case AV_CHAN_TOP_FRONT_CENTER
: azim
[ch
] = 0;
288 elev
[ch
] = 45; break;
289 case AV_CHAN_TOP_FRONT_RIGHT
: azim
[ch
] = 330;
290 elev
[ch
] = 45; break;
291 case AV_CHAN_TOP_BACK_LEFT
: azim
[ch
] = 150;
292 elev
[ch
] = 45; break;
293 case AV_CHAN_TOP_BACK_RIGHT
: azim
[ch
] = 210;
294 elev
[ch
] = 45; break;
295 case AV_CHAN_TOP_BACK_CENTER
: azim
[ch
] = 180;
296 elev
[ch
] = 45; break;
297 case AV_CHAN_WIDE_LEFT
: azim
[ch
] = 90; break;
298 case AV_CHAN_WIDE_RIGHT
: azim
[ch
] = 270; break;
299 case AV_CHAN_SURROUND_DIRECT_LEFT
: azim
[ch
] = 90; break;
300 case AV_CHAN_SURROUND_DIRECT_RIGHT
: azim
[ch
] = 270; break;
301 case AV_CHAN_STEREO_LEFT
: azim
[ch
] = 90; break;
302 case AV_CHAN_STEREO_RIGHT
: azim
[ch
] = 270; break;
304 return AVERROR(EINVAL
);
307 if (s
->vspkrpos
[ch
].set
) {
308 azim
[ch
] = s
->vspkrpos
[ch
].azim
;
309 elev
[ch
] = s
->vspkrpos
[ch
].elev
;
313 memcpy(speaker_azim
, azim
, n_conv
* sizeof(float));
314 memcpy(speaker_elev
, elev
, n_conv
* sizeof(float));
320 typedef struct ThreadData
{
328 AVComplexFloat
**in_fft
;
329 AVComplexFloat
**out_fft
;
330 AVComplexFloat
**temp_afft
;
333 static int sofalizer_convolute(AVFilterContext
*ctx
, void *arg
, int jobnr
, int nb_jobs
)
335 SOFAlizerContext
*s
= ctx
->priv
;
336 ThreadData
*td
= arg
;
337 AVFrame
*in
= td
->in
, *out
= td
->out
;
339 int *write
= &td
->write
[jobnr
];
340 const int *const delay
= td
->delay
[jobnr
];
341 const float *const ir
= td
->ir
[jobnr
];
342 int *n_clippings
= &td
->n_clippings
[jobnr
];
343 float *ringbuffer
= td
->ringbuffer
[jobnr
];
344 float *temp_src
= td
->temp_src
[jobnr
];
345 const int ir_samples
= s
->sofa
.ir_samples
; /* length of one IR */
346 const int n_samples
= s
->sofa
.n_samples
;
347 const int planar
= in
->format
== AV_SAMPLE_FMT_FLTP
;
348 const int mult
= 1 + !planar
;
349 const float *src
= (const float *)in
->extended_data
[0]; /* get pointer to audio input buffer */
350 float *dst
= (float *)out
->extended_data
[jobnr
* planar
]; /* get pointer to audio output buffer */
351 const int in_channels
= s
->n_conv
; /* number of input channels */
352 /* ring buffer length is: longest IR plus max. delay -> next power of 2 */
353 const int buffer_length
= s
->buffer_length
;
354 /* -1 for AND instead of MODULO (applied to powers of 2): */
355 const uint32_t modulo
= (uint32_t)buffer_length
- 1;
356 float *buffer
[64]; /* holds ringbuffer for each input channel */
364 for (l
= 0; l
< in_channels
; l
++) {
365 /* get starting address of ringbuffer for each input channel */
366 buffer
[l
] = ringbuffer
+ l
* buffer_length
;
369 for (i
= 0; i
< in
->nb_samples
; i
++) {
370 const float *temp_ir
= ir
; /* using same set of IRs for each sample */
374 for (l
= 0; l
< in_channels
; l
++) {
375 const float *srcp
= (const float *)in
->extended_data
[l
];
377 /* write current input sample to ringbuffer (for each channel) */
378 buffer
[l
][wr
] = srcp
[i
];
381 for (l
= 0; l
< in_channels
; l
++) {
382 /* write current input sample to ringbuffer (for each channel) */
383 buffer
[l
][wr
] = src
[l
];
387 /* loop goes through all channels to be convolved */
388 for (l
= 0; l
< in_channels
; l
++) {
389 const float *const bptr
= buffer
[l
];
391 if (l
== s
->lfe_channel
) {
392 /* LFE is an input channel but requires no convolution */
393 /* apply gain to LFE signal and add to output buffer */
394 dst
[0] += *(buffer
[s
->lfe_channel
] + wr
) * s
->gain_lfe
;
395 temp_ir
+= n_samples
;
399 /* current read position in ringbuffer: input sample write position
400 * - delay for l-th ch. + diff. betw. IR length and buffer length
401 * (mod buffer length) */
402 read
= (wr
- delay
[l
] - (ir_samples
- 1) + buffer_length
) & modulo
;
404 if (read
+ ir_samples
< buffer_length
) {
405 memmove(temp_src
, bptr
+ read
, ir_samples
* sizeof(*temp_src
));
407 int len
= FFMIN(n_samples
- (read
% ir_samples
), buffer_length
- read
);
409 memmove(temp_src
, bptr
+ read
, len
* sizeof(*temp_src
));
410 memmove(temp_src
+ len
, bptr
, (n_samples
- len
) * sizeof(*temp_src
));
413 /* multiply signal and IR, and add up the results */
414 dst
[0] += s
->fdsp
->scalarproduct_float(temp_ir
, temp_src
, FFALIGN(ir_samples
, 32));
415 temp_ir
+= n_samples
;
418 /* clippings counter */
419 if (fabsf(dst
[0]) > 1)
422 /* move output buffer pointer by +2 to get to next sample of processed channel: */
425 wr
= (wr
+ 1) & modulo
; /* update ringbuffer write position */
428 *write
= wr
; /* remember write position in ringbuffer for next call */
433 static int sofalizer_fast_convolute(AVFilterContext
*ctx
, void *arg
, int jobnr
, int nb_jobs
)
435 SOFAlizerContext
*s
= ctx
->priv
;
436 ThreadData
*td
= arg
;
437 AVFrame
*in
= td
->in
, *out
= td
->out
;
439 int *write
= &td
->write
[jobnr
];
440 AVComplexFloat
*hrtf
= s
->data_hrtf
[jobnr
]; /* get pointers to current HRTF data */
441 int *n_clippings
= &td
->n_clippings
[jobnr
];
442 float *ringbuffer
= td
->ringbuffer
[jobnr
];
443 const int ir_samples
= s
->sofa
.ir_samples
; /* length of one IR */
444 const int planar
= in
->format
== AV_SAMPLE_FMT_FLTP
;
445 const int mult
= 1 + !planar
;
446 float *dst
= (float *)out
->extended_data
[jobnr
* planar
]; /* get pointer to audio output buffer */
447 const int in_channels
= s
->n_conv
; /* number of input channels */
448 /* ring buffer length is: longest IR plus max. delay -> next power of 2 */
449 const int buffer_length
= s
->buffer_length
;
450 /* -1 for AND instead of MODULO (applied to powers of 2): */
451 const uint32_t modulo
= (uint32_t)buffer_length
- 1;
452 AVComplexFloat
*fft_in
= s
->in_fft
[jobnr
]; /* temporary array for FFT input data */
453 AVComplexFloat
*fft_out
= s
->out_fft
[jobnr
]; /* temporary array for FFT output data */
454 AVComplexFloat
*fft_acc
= s
->temp_afft
[jobnr
];
455 AVTXContext
*ifft
= s
->ifft
[jobnr
];
456 av_tx_fn itx_fn
= s
->itx_fn
[jobnr
];
457 AVTXContext
*fft
= s
->fft
[jobnr
];
458 av_tx_fn tx_fn
= s
->tx_fn
[jobnr
];
459 const int n_conv
= s
->n_conv
;
460 const int n_fft
= s
->n_fft
;
461 const float fft_scale
= 1.0f
/ s
->n_fft
;
462 AVComplexFloat
*hrtf_offset
;
470 /* find minimum between number of samples and output buffer length:
471 * (important, if one IR is longer than the output buffer) */
472 n_read
= FFMIN(ir_samples
, in
->nb_samples
);
473 for (j
= 0; j
< n_read
; j
++) {
474 /* initialize output buf with saved signal from overflow buf */
475 dst
[mult
* j
] = ringbuffer
[wr
];
476 ringbuffer
[wr
] = 0.0f
; /* re-set read samples to zero */
477 /* update ringbuffer read/write position */
478 wr
= (wr
+ 1) & modulo
;
481 /* initialize rest of output buffer with 0 */
482 for (j
= n_read
; j
< in
->nb_samples
; j
++) {
486 /* fill FFT accumulation with 0 */
487 memset(fft_acc
, 0, sizeof(AVComplexFloat
) * n_fft
);
489 for (i
= 0; i
< n_conv
; i
++) {
490 const float *src
= (const float *)in
->extended_data
[i
* planar
]; /* get pointer to audio input buffer */
492 if (i
== s
->lfe_channel
) { /* LFE */
493 if (in
->format
== AV_SAMPLE_FMT_FLT
) {
494 for (j
= 0; j
< in
->nb_samples
; j
++) {
495 /* apply gain to LFE signal and add to output buffer */
496 dst
[2 * j
] += src
[i
+ j
* in_channels
] * s
->gain_lfe
;
499 for (j
= 0; j
< in
->nb_samples
; j
++) {
500 /* apply gain to LFE signal and add to output buffer */
501 dst
[j
] += src
[j
] * s
->gain_lfe
;
507 /* outer loop: go through all input channels to be convolved */
508 offset
= i
* n_fft
; /* no. samples already processed */
509 hrtf_offset
= hrtf
+ offset
;
511 /* fill FFT input with 0 (we want to zero-pad) */
512 memset(fft_in
, 0, sizeof(AVComplexFloat
) * n_fft
);
514 if (in
->format
== AV_SAMPLE_FMT_FLT
) {
515 for (j
= 0; j
< in
->nb_samples
; j
++) {
516 /* prepare input for FFT */
517 /* write all samples of current input channel to FFT input array */
518 fft_in
[j
].re
= src
[j
* in_channels
+ i
];
521 for (j
= 0; j
< in
->nb_samples
; j
++) {
522 /* prepare input for FFT */
523 /* write all samples of current input channel to FFT input array */
524 fft_in
[j
].re
= src
[j
];
528 /* transform input signal of current channel to frequency domain */
529 tx_fn(fft
, fft_out
, fft_in
, sizeof(*fft_in
));
531 for (j
= 0; j
< n_fft
; j
++) {
532 const AVComplexFloat
*hcomplex
= hrtf_offset
+ j
;
533 const float re
= fft_out
[j
].re
;
534 const float im
= fft_out
[j
].im
;
536 /* complex multiplication of input signal and HRTFs */
537 /* output channel (real): */
538 fft_acc
[j
].re
+= re
* hcomplex
->re
- im
* hcomplex
->im
;
539 /* output channel (imag): */
540 fft_acc
[j
].im
+= re
* hcomplex
->im
+ im
* hcomplex
->re
;
544 /* transform output signal of current channel back to time domain */
545 itx_fn(ifft
, fft_out
, fft_acc
, sizeof(*fft_acc
));
547 for (j
= 0; j
< in
->nb_samples
; j
++) {
548 /* write output signal of current channel to output buffer */
549 dst
[mult
* j
] += fft_out
[j
].re
* fft_scale
;
552 for (j
= 0; j
< ir_samples
- 1; j
++) { /* overflow length is IR length - 1 */
553 /* write the rest of output signal to overflow buffer */
554 int write_pos
= (wr
+ j
) & modulo
;
556 *(ringbuffer
+ write_pos
) += fft_out
[in
->nb_samples
+ j
].re
* fft_scale
;
559 /* go through all samples of current output buffer: count clippings */
560 for (i
= 0; i
< out
->nb_samples
; i
++) {
561 /* clippings counter */
562 if (fabsf(dst
[i
* mult
]) > 1) { /* if current output sample > 1 */
567 /* remember read/write position in ringbuffer for next call */
573 static int filter_frame(AVFilterLink
*inlink
, AVFrame
*in
)
575 AVFilterContext
*ctx
= inlink
->dst
;
576 SOFAlizerContext
*s
= ctx
->priv
;
577 AVFilterLink
*outlink
= ctx
->outputs
[0];
578 int n_clippings
[2] = { 0 };
582 out
= ff_get_audio_buffer(outlink
, in
->nb_samples
);
585 return AVERROR(ENOMEM
);
587 av_frame_copy_props(out
, in
);
589 td
.in
= in
; td
.out
= out
; td
.write
= s
->write
;
590 td
.delay
= s
->delay
; td
.ir
= s
->data_ir
; td
.n_clippings
= n_clippings
;
591 td
.ringbuffer
= s
->ringbuffer
; td
.temp_src
= s
->temp_src
;
592 td
.in_fft
= s
->in_fft
;
593 td
.out_fft
= s
->out_fft
;
594 td
.temp_afft
= s
->temp_afft
;
596 if (s
->type
== TIME_DOMAIN
) {
597 ff_filter_execute(ctx
, sofalizer_convolute
, &td
, NULL
, 2);
598 } else if (s
->type
== FREQUENCY_DOMAIN
) {
599 ff_filter_execute(ctx
, sofalizer_fast_convolute
, &td
, NULL
, 2);
602 /* display error message if clipping occurred */
603 if (n_clippings
[0] + n_clippings
[1] > 0) {
604 av_log(ctx
, AV_LOG_WARNING
, "%d of %d samples clipped. Please reduce gain.\n",
605 n_clippings
[0] + n_clippings
[1], out
->nb_samples
* 2);
609 return ff_filter_frame(outlink
, out
);
612 static int activate(AVFilterContext
*ctx
)
614 AVFilterLink
*inlink
= ctx
->inputs
[0];
615 AVFilterLink
*outlink
= ctx
->outputs
[0];
616 SOFAlizerContext
*s
= ctx
->priv
;
620 FF_FILTER_FORWARD_STATUS_BACK(outlink
, inlink
);
623 ret
= ff_inlink_consume_samples(inlink
, s
->nb_samples
, s
->nb_samples
, &in
);
625 ret
= ff_inlink_consume_frame(inlink
, &in
);
629 return filter_frame(inlink
, in
);
631 FF_FILTER_FORWARD_STATUS(inlink
, outlink
);
632 FF_FILTER_FORWARD_WANTED(outlink
, inlink
);
634 return FFERROR_NOT_READY
;
637 static int query_formats(const AVFilterContext
*ctx
,
638 AVFilterFormatsConfig
**cfg_in
,
639 AVFilterFormatsConfig
**cfg_out
)
641 const SOFAlizerContext
*s
= ctx
->priv
;
642 AVFilterChannelLayouts
*layouts
= NULL
;
643 int ret
, sample_rates
[] = { 48000, -1 };
644 static const enum AVSampleFormat sample_fmts
[] = {
645 AV_SAMPLE_FMT_FLT
, AV_SAMPLE_FMT_FLTP
,
649 ret
= ff_set_common_formats_from_list2(ctx
, cfg_in
, cfg_out
, sample_fmts
);
653 layouts
= ff_all_channel_layouts();
655 return AVERROR(ENOMEM
);
657 ret
= ff_channel_layouts_ref(layouts
, &cfg_in
[0]->channel_layouts
);
662 ret
= ff_add_channel_layout(&layouts
, &(AVChannelLayout
)AV_CHANNEL_LAYOUT_STEREO
);
666 ret
= ff_channel_layouts_ref(layouts
, &cfg_out
[0]->channel_layouts
);
670 sample_rates
[0] = s
->sample_rate
;
671 return ff_set_common_samplerates_from_list2(ctx
, cfg_in
, cfg_out
, sample_rates
);
674 static int getfilter_float(AVFilterContext
*ctx
, float x
, float y
, float z
,
675 float *left
, float *right
,
676 float *delay_left
, float *delay_right
)
678 struct SOFAlizerContext
*s
= ctx
->priv
;
679 float c
[3], delays
[2];
685 c
[0] = x
, c
[1] = y
, c
[2] = z
;
686 nearest
= mysofa_lookup(s
->sofa
.lookup
, c
);
688 return AVERROR(EINVAL
);
690 if (s
->interpolate
) {
691 neighbors
= mysofa_neighborhood(s
->sofa
.neighborhood
, nearest
);
692 res
= mysofa_interpolate(s
->sofa
.hrtf
, c
,
694 s
->sofa
.fir
, delays
);
696 if (s
->sofa
.hrtf
->DataDelay
.elements
> s
->sofa
.hrtf
->R
) {
697 delays
[0] = s
->sofa
.hrtf
->DataDelay
.values
[nearest
* s
->sofa
.hrtf
->R
];
698 delays
[1] = s
->sofa
.hrtf
->DataDelay
.values
[nearest
* s
->sofa
.hrtf
->R
+ 1];
700 delays
[0] = s
->sofa
.hrtf
->DataDelay
.values
[0];
701 delays
[1] = s
->sofa
.hrtf
->DataDelay
.values
[1];
703 res
= s
->sofa
.hrtf
->DataIR
.values
+ nearest
* s
->sofa
.hrtf
->N
* s
->sofa
.hrtf
->R
;
706 *delay_left
= delays
[0];
707 *delay_right
= delays
[1];
710 fr
= res
+ s
->sofa
.hrtf
->N
;
712 memcpy(left
, fl
, sizeof(float) * s
->sofa
.hrtf
->N
);
713 memcpy(right
, fr
, sizeof(float) * s
->sofa
.hrtf
->N
);
718 static int load_data(AVFilterContext
*ctx
, int azim
, int elev
, float radius
, int sample_rate
)
720 struct SOFAlizerContext
*s
= ctx
->priv
;
723 int n_conv
= s
->n_conv
; /* no. channels to convolve */
725 float delay_l
; /* broadband delay for each IR */
727 int nb_input_channels
= ctx
->inputs
[0]->ch_layout
.nb_channels
; /* no. input channels */
728 float gain_lin
= expf((s
->gain
- 3 * nb_input_channels
) / 20 * M_LN10
); /* gain - 3dB/channel */
729 AVComplexFloat
*data_hrtf_l
= NULL
;
730 AVComplexFloat
*data_hrtf_r
= NULL
;
731 AVComplexFloat
*fft_out_l
= NULL
;
732 AVComplexFloat
*fft_out_r
= NULL
;
733 AVComplexFloat
*fft_in_l
= NULL
;
734 AVComplexFloat
*fft_in_r
= NULL
;
735 float *data_ir_l
= NULL
;
736 float *data_ir_r
= NULL
;
737 int offset
= 0; /* used for faster pointer arithmetics in for-loop */
738 int i
, j
, azim_orig
= azim
, elev_orig
= elev
;
743 av_log(ctx
, AV_LOG_DEBUG
, "IR length: %d.\n", s
->sofa
.hrtf
->N
);
744 s
->sofa
.ir_samples
= s
->sofa
.hrtf
->N
;
745 s
->sofa
.n_samples
= 1 << (32 - ff_clz(s
->sofa
.ir_samples
));
747 n_samples
= s
->sofa
.n_samples
;
748 ir_samples
= s
->sofa
.ir_samples
;
750 if (s
->type
== TIME_DOMAIN
) {
751 s
->data_ir
[0] = av_calloc(n_samples
, sizeof(float) * s
->n_conv
);
752 s
->data_ir
[1] = av_calloc(n_samples
, sizeof(float) * s
->n_conv
);
754 if (!s
->data_ir
[0] || !s
->data_ir
[1]) {
755 ret
= AVERROR(ENOMEM
);
760 s
->delay
[0] = av_calloc(s
->n_conv
, sizeof(int));
761 s
->delay
[1] = av_calloc(s
->n_conv
, sizeof(int));
763 if (!s
->delay
[0] || !s
->delay
[1]) {
764 ret
= AVERROR(ENOMEM
);
768 /* get temporary IR for L and R channel */
769 data_ir_l
= av_calloc(n_conv
* n_samples
, sizeof(*data_ir_l
));
770 data_ir_r
= av_calloc(n_conv
* n_samples
, sizeof(*data_ir_r
));
771 if (!data_ir_r
|| !data_ir_l
) {
772 ret
= AVERROR(ENOMEM
);
776 if (s
->type
== TIME_DOMAIN
) {
777 s
->temp_src
[0] = av_calloc(n_samples
, sizeof(float));
778 s
->temp_src
[1] = av_calloc(n_samples
, sizeof(float));
779 if (!s
->temp_src
[0] || !s
->temp_src
[1]) {
780 ret
= AVERROR(ENOMEM
);
785 s
->speaker_azim
= av_calloc(s
->n_conv
, sizeof(*s
->speaker_azim
));
786 s
->speaker_elev
= av_calloc(s
->n_conv
, sizeof(*s
->speaker_elev
));
787 if (!s
->speaker_azim
|| !s
->speaker_elev
) {
788 ret
= AVERROR(ENOMEM
);
792 /* get speaker positions */
793 if ((ret
= get_speaker_pos(ctx
, s
->speaker_azim
, s
->speaker_elev
)) < 0) {
794 av_log(ctx
, AV_LOG_ERROR
, "Couldn't get speaker positions. Input channel configuration not supported.\n");
798 for (i
= 0; i
< s
->n_conv
; i
++) {
799 float coordinates
[3];
801 /* load and store IRs and corresponding delays */
802 azim
= (int)(s
->speaker_azim
[i
] + azim_orig
) % 360;
803 elev
= (int)(s
->speaker_elev
[i
] + elev_orig
) % 90;
805 coordinates
[0] = azim
;
806 coordinates
[1] = elev
;
807 coordinates
[2] = radius
;
809 mysofa_s2c(coordinates
);
811 /* get id of IR closest to desired position */
812 ret
= getfilter_float(ctx
, coordinates
[0], coordinates
[1], coordinates
[2],
813 data_ir_l
+ n_samples
* i
,
814 data_ir_r
+ n_samples
* i
,
819 s
->delay
[0][i
] = delay_l
* sample_rate
;
820 s
->delay
[1][i
] = delay_r
* sample_rate
;
822 s
->sofa
.max_delay
= FFMAX3(s
->sofa
.max_delay
, s
->delay
[0][i
], s
->delay
[1][i
]);
825 /* get size of ringbuffer (longest IR plus max. delay) */
826 /* then choose next power of 2 for performance optimization */
827 n_current
= n_samples
+ s
->sofa
.max_delay
;
828 /* length of longest IR plus max. delay */
829 n_max
= FFMAX(n_max
, n_current
);
831 /* buffer length is longest IR plus max. delay -> next power of 2
832 (32 - count leading zeros gives required exponent) */
833 s
->buffer_length
= 1 << (32 - ff_clz(n_max
));
834 s
->n_fft
= n_fft
= 1 << (32 - ff_clz(n_max
+ s
->framesize
));
836 if (s
->type
== FREQUENCY_DOMAIN
) {
839 av_tx_uninit(&s
->fft
[0]);
840 av_tx_uninit(&s
->fft
[1]);
841 ret
= av_tx_init(&s
->fft
[0], &s
->tx_fn
[0], AV_TX_FLOAT_FFT
, 0, s
->n_fft
, &scale
, 0);
844 ret
= av_tx_init(&s
->fft
[1], &s
->tx_fn
[1], AV_TX_FLOAT_FFT
, 0, s
->n_fft
, &scale
, 0);
847 av_tx_uninit(&s
->ifft
[0]);
848 av_tx_uninit(&s
->ifft
[1]);
849 ret
= av_tx_init(&s
->ifft
[0], &s
->itx_fn
[0], AV_TX_FLOAT_FFT
, 1, s
->n_fft
, &scale
, 0);
852 ret
= av_tx_init(&s
->ifft
[1], &s
->itx_fn
[1], AV_TX_FLOAT_FFT
, 1, s
->n_fft
, &scale
, 0);
857 if (s
->type
== TIME_DOMAIN
) {
858 s
->ringbuffer
[0] = av_calloc(s
->buffer_length
, sizeof(float) * nb_input_channels
);
859 s
->ringbuffer
[1] = av_calloc(s
->buffer_length
, sizeof(float) * nb_input_channels
);
860 } else if (s
->type
== FREQUENCY_DOMAIN
) {
861 /* get temporary HRTF memory for L and R channel */
862 data_hrtf_l
= av_malloc_array(n_fft
, sizeof(*data_hrtf_l
) * n_conv
);
863 data_hrtf_r
= av_malloc_array(n_fft
, sizeof(*data_hrtf_r
) * n_conv
);
864 if (!data_hrtf_r
|| !data_hrtf_l
) {
865 ret
= AVERROR(ENOMEM
);
869 s
->ringbuffer
[0] = av_calloc(s
->buffer_length
, sizeof(float));
870 s
->ringbuffer
[1] = av_calloc(s
->buffer_length
, sizeof(float));
871 s
->in_fft
[0] = av_malloc_array(s
->n_fft
, sizeof(AVComplexFloat
));
872 s
->in_fft
[1] = av_malloc_array(s
->n_fft
, sizeof(AVComplexFloat
));
873 s
->out_fft
[0] = av_malloc_array(s
->n_fft
, sizeof(AVComplexFloat
));
874 s
->out_fft
[1] = av_malloc_array(s
->n_fft
, sizeof(AVComplexFloat
));
875 s
->temp_afft
[0] = av_malloc_array(s
->n_fft
, sizeof(AVComplexFloat
));
876 s
->temp_afft
[1] = av_malloc_array(s
->n_fft
, sizeof(AVComplexFloat
));
877 if (!s
->in_fft
[0] || !s
->in_fft
[1] ||
878 !s
->out_fft
[0] || !s
->out_fft
[1] ||
879 !s
->temp_afft
[0] || !s
->temp_afft
[1]) {
880 ret
= AVERROR(ENOMEM
);
885 if (!s
->ringbuffer
[0] || !s
->ringbuffer
[1]) {
886 ret
= AVERROR(ENOMEM
);
890 if (s
->type
== FREQUENCY_DOMAIN
) {
891 fft_out_l
= av_calloc(n_fft
, sizeof(*fft_out_l
));
892 fft_out_r
= av_calloc(n_fft
, sizeof(*fft_out_r
));
893 fft_in_l
= av_calloc(n_fft
, sizeof(*fft_in_l
));
894 fft_in_r
= av_calloc(n_fft
, sizeof(*fft_in_r
));
895 if (!fft_in_l
|| !fft_in_r
||
896 !fft_out_l
|| !fft_out_r
) {
897 ret
= AVERROR(ENOMEM
);
902 for (i
= 0; i
< s
->n_conv
; i
++) {
905 offset
= i
* n_samples
; /* no. samples already written */
907 lir
= data_ir_l
+ offset
;
908 rir
= data_ir_r
+ offset
;
910 if (s
->type
== TIME_DOMAIN
) {
911 for (j
= 0; j
< ir_samples
; j
++) {
912 /* load reversed IRs of the specified source position
913 * sample-by-sample for left and right ear; and apply gain */
914 s
->data_ir
[0][offset
+ j
] = lir
[ir_samples
- 1 - j
] * gain_lin
;
915 s
->data_ir
[1][offset
+ j
] = rir
[ir_samples
- 1 - j
] * gain_lin
;
917 } else if (s
->type
== FREQUENCY_DOMAIN
) {
918 memset(fft_in_l
, 0, n_fft
* sizeof(*fft_in_l
));
919 memset(fft_in_r
, 0, n_fft
* sizeof(*fft_in_r
));
921 offset
= i
* n_fft
; /* no. samples already written */
922 for (j
= 0; j
< ir_samples
; j
++) {
923 /* load non-reversed IRs of the specified source position
924 * sample-by-sample and apply gain,
925 * L channel is loaded to real part, R channel to imag part,
926 * IRs are shifted by L and R delay */
927 fft_in_l
[s
->delay
[0][i
] + j
].re
= lir
[j
] * gain_lin
;
928 fft_in_r
[s
->delay
[1][i
] + j
].re
= rir
[j
] * gain_lin
;
931 /* actually transform to frequency domain (IRs -> HRTFs) */
932 s
->tx_fn
[0](s
->fft
[0], fft_out_l
, fft_in_l
, sizeof(*fft_in_l
));
933 memcpy(data_hrtf_l
+ offset
, fft_out_l
, n_fft
* sizeof(*fft_out_l
));
934 s
->tx_fn
[1](s
->fft
[1], fft_out_r
, fft_in_r
, sizeof(*fft_in_r
));
935 memcpy(data_hrtf_r
+ offset
, fft_out_r
, n_fft
* sizeof(*fft_out_r
));
939 if (s
->type
== FREQUENCY_DOMAIN
) {
940 s
->data_hrtf
[0] = av_malloc_array(n_fft
* s
->n_conv
, sizeof(AVComplexFloat
));
941 s
->data_hrtf
[1] = av_malloc_array(n_fft
* s
->n_conv
, sizeof(AVComplexFloat
));
942 if (!s
->data_hrtf
[0] || !s
->data_hrtf
[1]) {
943 ret
= AVERROR(ENOMEM
);
947 memcpy(s
->data_hrtf
[0], data_hrtf_l
, /* copy HRTF data to */
948 sizeof(AVComplexFloat
) * n_conv
* n_fft
); /* filter struct */
949 memcpy(s
->data_hrtf
[1], data_hrtf_r
,
950 sizeof(AVComplexFloat
) * n_conv
* n_fft
);
954 av_freep(&data_hrtf_l
); /* free temporary HRTF memory */
955 av_freep(&data_hrtf_r
);
957 av_freep(&data_ir_l
); /* free temprary IR memory */
958 av_freep(&data_ir_r
);
960 av_freep(&fft_out_l
); /* free temporary FFT memory */
961 av_freep(&fft_out_r
);
963 av_freep(&fft_in_l
); /* free temporary FFT memory */
969 static av_cold
int init(AVFilterContext
*ctx
)
971 SOFAlizerContext
*s
= ctx
->priv
;
975 av_log(ctx
, AV_LOG_ERROR
, "Valid SOFA filename must be set.\n");
976 return AVERROR(EINVAL
);
979 /* preload SOFA file, */
980 ret
= preload_sofa(ctx
, s
->filename
, &s
->sample_rate
);
982 /* file loading error */
983 av_log(ctx
, AV_LOG_ERROR
, "Error while loading SOFA file: '%s'\n", s
->filename
);
984 } else { /* no file loading error, resampling not required */
985 av_log(ctx
, AV_LOG_DEBUG
, "File '%s' loaded.\n", s
->filename
);
989 av_log(ctx
, AV_LOG_ERROR
, "No valid SOFA file could be loaded. Please specify valid SOFA file.\n");
993 s
->fdsp
= avpriv_float_dsp_alloc(0);
995 return AVERROR(ENOMEM
);
1000 static int config_input(AVFilterLink
*inlink
)
1002 AVFilterContext
*ctx
= inlink
->dst
;
1003 SOFAlizerContext
*s
= ctx
->priv
;
1006 if (s
->type
== FREQUENCY_DOMAIN
)
1007 s
->nb_samples
= s
->framesize
;
1009 /* gain -3 dB per channel */
1010 s
->gain_lfe
= expf((s
->gain
- 3 * inlink
->ch_layout
.nb_channels
+ s
->lfe_gain
) / 20 * M_LN10
);
1012 s
->n_conv
= inlink
->ch_layout
.nb_channels
;
1014 /* load IRs to data_ir[0] and data_ir[1] for required directions */
1015 if ((ret
= load_data(ctx
, s
->rotation
, s
->elevation
, s
->radius
, inlink
->sample_rate
)) < 0)
1018 av_log(ctx
, AV_LOG_DEBUG
, "Samplerate: %d Channels to convolute: %d, Length of ringbuffer: %d x %d\n",
1019 inlink
->sample_rate
, s
->n_conv
, inlink
->ch_layout
.nb_channels
, s
->buffer_length
);
1024 static av_cold
void uninit(AVFilterContext
*ctx
)
1026 SOFAlizerContext
*s
= ctx
->priv
;
1028 close_sofa(&s
->sofa
);
1029 av_tx_uninit(&s
->ifft
[0]);
1030 av_tx_uninit(&s
->ifft
[1]);
1031 av_tx_uninit(&s
->fft
[0]);
1032 av_tx_uninit(&s
->fft
[1]);
1037 av_freep(&s
->delay
[0]);
1038 av_freep(&s
->delay
[1]);
1039 av_freep(&s
->data_ir
[0]);
1040 av_freep(&s
->data_ir
[1]);
1041 av_freep(&s
->ringbuffer
[0]);
1042 av_freep(&s
->ringbuffer
[1]);
1043 av_freep(&s
->speaker_azim
);
1044 av_freep(&s
->speaker_elev
);
1045 av_freep(&s
->temp_src
[0]);
1046 av_freep(&s
->temp_src
[1]);
1047 av_freep(&s
->temp_afft
[0]);
1048 av_freep(&s
->temp_afft
[1]);
1049 av_freep(&s
->in_fft
[0]);
1050 av_freep(&s
->in_fft
[1]);
1051 av_freep(&s
->out_fft
[0]);
1052 av_freep(&s
->out_fft
[1]);
1053 av_freep(&s
->data_hrtf
[0]);
1054 av_freep(&s
->data_hrtf
[1]);
1058 #define OFFSET(x) offsetof(SOFAlizerContext, x)
1059 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
1061 static const AVOption sofalizer_options
[] = {
1062 { "sofa", "sofa filename", OFFSET(filename
), AV_OPT_TYPE_STRING
, {.str
=NULL
}, .flags
= FLAGS
},
1063 { "gain", "set gain in dB", OFFSET(gain
), AV_OPT_TYPE_FLOAT
, {.dbl
=0}, -20, 40, .flags
= FLAGS
},
1064 { "rotation", "set rotation" , OFFSET(rotation
), AV_OPT_TYPE_FLOAT
, {.dbl
=0}, -360, 360, .flags
= FLAGS
},
1065 { "elevation", "set elevation", OFFSET(elevation
), AV_OPT_TYPE_FLOAT
, {.dbl
=0}, -90, 90, .flags
= FLAGS
},
1066 { "radius", "set radius", OFFSET(radius
), AV_OPT_TYPE_FLOAT
, {.dbl
=1}, 0, 5, .flags
= FLAGS
},
1067 { "type", "set processing", OFFSET(type
), AV_OPT_TYPE_INT
, {.i64
=1}, 0, 1, .flags
= FLAGS
, .unit
= "type" },
1068 { "time", "time domain", 0, AV_OPT_TYPE_CONST
, {.i64
=0}, 0, 0, .flags
= FLAGS
, .unit
= "type" },
1069 { "freq", "frequency domain", 0, AV_OPT_TYPE_CONST
, {.i64
=1}, 0, 0, .flags
= FLAGS
, .unit
= "type" },
1070 { "speakers", "set speaker custom positions", OFFSET(speakers_pos
), AV_OPT_TYPE_STRING
, {.str
=0}, 0, 0, .flags
= FLAGS
},
1071 { "lfegain", "set lfe gain", OFFSET(lfe_gain
), AV_OPT_TYPE_FLOAT
, {.dbl
=0}, -20,40, .flags
= FLAGS
},
1072 { "framesize", "set frame size", OFFSET(framesize
), AV_OPT_TYPE_INT
, {.i64
=1024},1024,96000, .flags
= FLAGS
},
1073 { "normalize", "normalize IRs", OFFSET(normalize
), AV_OPT_TYPE_BOOL
, {.i64
=1}, 0, 1, .flags
= FLAGS
},
1074 { "interpolate","interpolate IRs from neighbors", OFFSET(interpolate
),AV_OPT_TYPE_BOOL
, {.i64
=0}, 0, 1, .flags
= FLAGS
},
1075 { "minphase", "minphase IRs", OFFSET(minphase
), AV_OPT_TYPE_BOOL
, {.i64
=0}, 0, 1, .flags
= FLAGS
},
1076 { "anglestep", "set neighbor search angle step", OFFSET(anglestep
), AV_OPT_TYPE_FLOAT
, {.dbl
=.5}, 0.01, 10, .flags
= FLAGS
},
1077 { "radstep", "set neighbor search radius step", OFFSET(radstep
), AV_OPT_TYPE_FLOAT
, {.dbl
=.01}, 0.01, 1, .flags
= FLAGS
},
1081 AVFILTER_DEFINE_CLASS(sofalizer
);
1083 static const AVFilterPad inputs
[] = {
1086 .type
= AVMEDIA_TYPE_AUDIO
,
1087 .config_props
= config_input
,
1091 const FFFilter ff_af_sofalizer
= {
1092 .p
.name
= "sofalizer",
1093 .p
.description
= NULL_IF_CONFIG_SMALL("SOFAlizer (Spatially Oriented Format for Acoustics)."),
1094 .p
.priv_class
= &sofalizer_class
,
1095 .p
.flags
= AVFILTER_FLAG_SLICE_THREADS
,
1096 .priv_size
= sizeof(SOFAlizerContext
),
1098 .activate
= activate
,
1100 FILTER_INPUTS(inputs
),
1101 FILTER_OUTPUTS(ff_audio_default_filterpad
),
1102 FILTER_QUERY_FUNC2(query_formats
),