2 * Copyright (c) 2017 Paul B Mahol
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/tx.h"
34 #define SAMPLE_FORMAT float
37 #define ctype AVComplexFloat
39 #define TX_TYPE AV_TX_FLOAT_RDFT
43 #define SAMPLE_FORMAT double
46 #define ctype AVComplexDouble
48 #define TX_TYPE AV_TX_DOUBLE_RDFT
53 #define fn3(a,b) a##_##b
54 #define fn2(a,b) fn3(a,b)
55 #define fn(a) fn2(a, SAMPLE_FORMAT)
57 static ftype
fn(ir_gain
)(AVFilterContext
*ctx
, AudioFIRContext
*s
,
58 int cur_nb_taps
, const ftype
*time
)
60 ftype ch_gain
, sum
= 0;
62 if (s
->ir_norm
< 0.f
) {
64 } else if (s
->ir_norm
== 0.f
) {
65 for (int i
= 0; i
< cur_nb_taps
; i
++)
69 ftype ir_norm
= s
->ir_norm
;
71 for (int i
= 0; i
< cur_nb_taps
; i
++)
72 sum
+= POW(FABS(time
[i
]), ir_norm
);
73 ch_gain
= 1. / POW(sum
, 1. / ir_norm
);
79 static void fn(ir_scale
)(AVFilterContext
*ctx
, AudioFIRContext
*s
,
80 int cur_nb_taps
, int ch
,
81 ftype
*time
, ftype ch_gain
)
83 if (ch_gain
!= 1. || s
->ir_gain
!= 1.) {
84 ftype gain
= ch_gain
* s
->ir_gain
;
86 av_log(ctx
, AV_LOG_DEBUG
, "ch%d gain %f\n", ch
, gain
);
88 s
->fdsp
->vector_fmul_scalar(time
, time
, gain
, FFALIGN(cur_nb_taps
, 4));
90 s
->fdsp
->vector_dmul_scalar(time
, time
, gain
, FFALIGN(cur_nb_taps
, 8));
95 static void fn(convert_channel
)(AVFilterContext
*ctx
, AudioFIRContext
*s
, int ch
,
96 AudioFIRSegment
*seg
, int coeff_partition
, int selir
)
98 const int coffset
= coeff_partition
* seg
->coeff_size
;
99 const int nb_taps
= s
->nb_taps
[selir
];
100 ftype
*time
= (ftype
*)s
->norm_ir
[selir
]->extended_data
[ch
];
101 ftype
*tempin
= (ftype
*)seg
->tempin
->extended_data
[ch
];
102 ftype
*tempout
= (ftype
*)seg
->tempout
->extended_data
[ch
];
103 ctype
*coeff
= (ctype
*)seg
->coeff
->extended_data
[ch
];
104 const int remaining
= nb_taps
- (seg
->input_offset
+ coeff_partition
* seg
->part_size
);
105 const int size
= remaining
>= seg
->part_size
? seg
->part_size
: remaining
;
107 memset(tempin
+ size
, 0, sizeof(*tempin
) * (seg
->block_size
- size
));
108 memcpy(tempin
, time
+ seg
->input_offset
+ coeff_partition
* seg
->part_size
,
109 size
* sizeof(*tempin
));
110 seg
->ctx_fn(seg
->ctx
[ch
], tempout
, tempin
, sizeof(*tempin
));
111 memcpy(coeff
+ coffset
, tempout
, seg
->coeff_size
* sizeof(*coeff
));
113 av_log(ctx
, AV_LOG_DEBUG
, "channel: %d\n", ch
);
114 av_log(ctx
, AV_LOG_DEBUG
, "nb_partitions: %d\n", seg
->nb_partitions
);
115 av_log(ctx
, AV_LOG_DEBUG
, "partition size: %d\n", seg
->part_size
);
116 av_log(ctx
, AV_LOG_DEBUG
, "block size: %d\n", seg
->block_size
);
117 av_log(ctx
, AV_LOG_DEBUG
, "fft_length: %d\n", seg
->fft_length
);
118 av_log(ctx
, AV_LOG_DEBUG
, "coeff_size: %d\n", seg
->coeff_size
);
119 av_log(ctx
, AV_LOG_DEBUG
, "input_size: %d\n", seg
->input_size
);
120 av_log(ctx
, AV_LOG_DEBUG
, "input_offset: %d\n", seg
->input_offset
);
123 static void fn(fir_fadd
)(AudioFIRContext
*s
, ftype
*dst
, const ftype
*src
, int nb_samples
)
125 if ((nb_samples
& 15) == 0 && nb_samples
>= 8) {
127 s
->fdsp
->vector_fmac_scalar(dst
, src
, 1.f
, nb_samples
);
129 s
->fdsp
->vector_dmac_scalar(dst
, src
, 1.0, nb_samples
);
132 for (int n
= 0; n
< nb_samples
; n
++)
137 static int fn(fir_quantum
)(AVFilterContext
*ctx
, AVFrame
*out
, int ch
, int ioffset
, int offset
, int selir
)
139 AudioFIRContext
*s
= ctx
->priv
;
140 const ftype
*in
= (const ftype
*)s
->in
->extended_data
[ch
] + ioffset
;
141 ftype
*blockout
, *ptr
= (ftype
*)out
->extended_data
[ch
] + offset
;
142 const int min_part_size
= s
->min_part_size
;
143 const int nb_samples
= FFMIN(min_part_size
, out
->nb_samples
- offset
);
144 const int nb_segments
= s
->nb_segments
[selir
];
145 const float dry_gain
= s
->dry_gain
;
146 const float wet_gain
= s
->wet_gain
;
148 for (int segment
= 0; segment
< nb_segments
; segment
++) {
149 AudioFIRSegment
*seg
= &s
->seg
[selir
][segment
];
150 ftype
*src
= (ftype
*)seg
->input
->extended_data
[ch
];
151 ftype
*dst
= (ftype
*)seg
->output
->extended_data
[ch
];
152 ftype
*sumin
= (ftype
*)seg
->sumin
->extended_data
[ch
];
153 ftype
*sumout
= (ftype
*)seg
->sumout
->extended_data
[ch
];
154 ftype
*tempin
= (ftype
*)seg
->tempin
->extended_data
[ch
];
155 ftype
*buf
= (ftype
*)seg
->buffer
->extended_data
[ch
];
156 int *output_offset
= &seg
->output_offset
[ch
];
157 const int nb_partitions
= seg
->nb_partitions
;
158 const int input_offset
= seg
->input_offset
;
159 const int part_size
= seg
->part_size
;
162 seg
->part_index
[ch
] = seg
->part_index
[ch
] % nb_partitions
;
163 if (dry_gain
== 1.f
) {
164 memcpy(src
+ input_offset
, in
, nb_samples
* sizeof(*src
));
165 } else if (min_part_size
>= 8) {
167 s
->fdsp
->vector_fmul_scalar(src
+ input_offset
, in
, dry_gain
, FFALIGN(nb_samples
, 4));
169 s
->fdsp
->vector_dmul_scalar(src
+ input_offset
, in
, dry_gain
, FFALIGN(nb_samples
, 8));
172 ftype
*src2
= src
+ input_offset
;
173 for (int n
= 0; n
< nb_samples
; n
++)
174 src2
[n
] = in
[n
] * dry_gain
;
177 output_offset
[0] += min_part_size
;
178 if (output_offset
[0] >= part_size
) {
179 output_offset
[0] = 0;
181 memmove(src
, src
+ min_part_size
, (seg
->input_size
- min_part_size
) * sizeof(*src
));
183 dst
+= output_offset
[0];
184 fn(fir_fadd
)(s
, ptr
, dst
, nb_samples
);
188 memset(sumin
, 0, sizeof(*sumin
) * seg
->fft_length
);
190 blockout
= (ftype
*)seg
->blockout
->extended_data
[ch
] + seg
->part_index
[ch
] * seg
->block_size
;
191 memset(tempin
+ part_size
, 0, sizeof(*tempin
) * (seg
->block_size
- part_size
));
192 memcpy(tempin
, src
, sizeof(*src
) * part_size
);
193 seg
->tx_fn(seg
->tx
[ch
], blockout
, tempin
, sizeof(ftype
));
195 j
= seg
->part_index
[ch
];
196 for (int i
= 0; i
< nb_partitions
; i
++) {
197 const int input_partition
= j
;
198 const int coeff_partition
= i
;
199 const int coffset
= coeff_partition
* seg
->coeff_size
;
200 const ftype
*blockout
= (const ftype
*)seg
->blockout
->extended_data
[ch
] + input_partition
* seg
->block_size
;
201 const ctype
*coeff
= ((const ctype
*)seg
->coeff
->extended_data
[ch
]) + coffset
;
208 s
->afirdsp
.fcmul_add(sumin
, blockout
, (const ftype
*)coeff
, part_size
);
210 s
->afirdsp
.dcmul_add(sumin
, blockout
, (const ftype
*)coeff
, part_size
);
214 seg
->itx_fn(seg
->itx
[ch
], sumout
, sumin
, sizeof(ctype
));
216 fn(fir_fadd
)(s
, buf
, sumout
, part_size
);
217 memcpy(dst
, buf
, part_size
* sizeof(*dst
));
218 memcpy(buf
, sumout
+ part_size
, part_size
* sizeof(*buf
));
220 fn(fir_fadd
)(s
, ptr
, dst
, nb_samples
);
222 if (part_size
!= min_part_size
)
223 memmove(src
, src
+ min_part_size
, (seg
->input_size
- min_part_size
) * sizeof(*src
));
225 seg
->part_index
[ch
] = (seg
->part_index
[ch
] + 1) % nb_partitions
;
231 if (min_part_size
>= 8) {
233 s
->fdsp
->vector_fmul_scalar(ptr
, ptr
, wet_gain
, FFALIGN(nb_samples
, 4));
235 s
->fdsp
->vector_dmul_scalar(ptr
, ptr
, wet_gain
, FFALIGN(nb_samples
, 8));
238 for (int n
= 0; n
< nb_samples
; n
++)
245 static void fn(fir_quantums
)(AVFilterContext
*ctx
, AudioFIRContext
*s
, AVFrame
*out
,
246 int min_part_size
, int ch
, int offset
,
247 int prev_selir
, int selir
)
249 if (ctx
->is_disabled
|| s
->prev_is_disabled
) {
250 const ftype
*in
= (const ftype
*)s
->in
->extended_data
[ch
] + offset
;
251 const ftype
*xfade0
= (const ftype
*)s
->xfade
[0]->extended_data
[ch
];
252 const ftype
*xfade1
= (const ftype
*)s
->xfade
[1]->extended_data
[ch
];
253 ftype
*src0
= (ftype
*)s
->fadein
[0]->extended_data
[ch
];
254 ftype
*src1
= (ftype
*)s
->fadein
[1]->extended_data
[ch
];
255 ftype
*dst
= ((ftype
*)out
->extended_data
[ch
]) + offset
;
257 if (ctx
->is_disabled
&& !s
->prev_is_disabled
) {
258 memset(src0
, 0, min_part_size
* sizeof(ftype
));
259 fn(fir_quantum
)(ctx
, s
->fadein
[0], ch
, offset
, 0, selir
);
260 for (int n
= 0; n
< min_part_size
; n
++)
261 dst
[n
] = xfade1
[n
] * src0
[n
] + xfade0
[n
] * in
[n
];
262 } else if (!ctx
->is_disabled
&& s
->prev_is_disabled
) {
263 memset(src1
, 0, min_part_size
* sizeof(ftype
));
264 fn(fir_quantum
)(ctx
, s
->fadein
[1], ch
, offset
, 0, selir
);
265 for (int n
= 0; n
< min_part_size
; n
++)
266 dst
[n
] = xfade1
[n
] * in
[n
] + xfade0
[n
] * src1
[n
];
268 memcpy(dst
, in
, sizeof(ftype
) * min_part_size
);
270 } else if (prev_selir
!= selir
&& s
->loading
[ch
] != 0) {
271 const ftype
*xfade0
= (const ftype
*)s
->xfade
[0]->extended_data
[ch
];
272 const ftype
*xfade1
= (const ftype
*)s
->xfade
[1]->extended_data
[ch
];
273 ftype
*src0
= (ftype
*)s
->fadein
[0]->extended_data
[ch
];
274 ftype
*src1
= (ftype
*)s
->fadein
[1]->extended_data
[ch
];
275 ftype
*dst
= ((ftype
*)out
->extended_data
[ch
]) + offset
;
277 memset(src0
, 0, min_part_size
* sizeof(ftype
));
278 memset(src1
, 0, min_part_size
* sizeof(ftype
));
280 fn(fir_quantum
)(ctx
, s
->fadein
[0], ch
, offset
, 0, prev_selir
);
281 fn(fir_quantum
)(ctx
, s
->fadein
[1], ch
, offset
, 0, selir
);
283 if (s
->loading
[ch
] > s
->max_offset
[selir
]) {
284 for (int n
= 0; n
< min_part_size
; n
++)
285 dst
[n
] = xfade1
[n
] * src0
[n
] + xfade0
[n
] * src1
[n
];
288 memcpy(dst
, src0
, min_part_size
* sizeof(ftype
));
291 fn(fir_quantum
)(ctx
, out
, ch
, offset
, offset
, selir
);