libavcodec/celp_filters.c

   1 /*
   2  * various filters for ACELP-based codecs
   3  *
   4  * Copyright (c) 2008 Vladimir Voroshilov
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #include <stdint.h>
  24 #include <string.h>
  25
  26 #include "config.h"
  27 #include "celp_filters.h"
  28 #include "libavutil/avassert.h"
  29 #include "libavutil/common.h"
  30
  31 void ff_celp_convolve_circ(int16_t* fc_out, const int16_t* fc_in,
  32                            const int16_t* filter, int len)
  33 {
  34     int i, k;
  35
  36     memset(fc_out, 0, len * sizeof(int16_t));
  37
  38     /* Since there are few pulses over an entire subframe (i.e. almost
  39        all fc_in[i] are zero) it is faster to loop over fc_in first. */
  40     for (i = 0; i < len; i++) {
  41         if (fc_in[i]) {
  42             for (k = 0; k < i; k++)
  43                 fc_out[k] += (fc_in[i] * filter[len + k - i]) >> 15;
  44
  45             for (k = i; k < len; k++)
  46                 fc_out[k] += (fc_in[i] * filter[      k - i]) >> 15;
  47         }
  48     }
  49 }
  50
  51 void ff_celp_circ_addf(float *out, const float *in,
  52                        const float *lagged, int lag, float fac, int n)
  53 {
  54     int k;
  55     for (k = 0; k < lag; k++)
  56         out[k] = in[k] + fac * lagged[n + k - lag];
  57     for (; k < n; k++)
  58         out[k] = in[k] + fac * lagged[    k - lag];
  59 }
  60
  61 int ff_celp_lp_synthesis_filter(int16_t *out, const int16_t *filter_coeffs,
  62                                 const int16_t *in, int buffer_length,
  63                                 int filter_length, int stop_on_overflow,
  64                                 int shift, int rounder)
  65 {
  66     int i,n;
  67
  68     for (n = 0; n < buffer_length; n++) {
  69         int sum = rounder, sum1;
  70         for (i = 1; i <= filter_length; i++)
  71             sum -= (unsigned)(filter_coeffs[i-1] * out[n-i]);
  72
  73         sum1 = ((sum >> 12) + in[n]) >> shift;
  74         sum  = av_clip_int16(sum1);
  75
  76         if (stop_on_overflow && sum != sum1)
  77             return 1;
  78
  79         out[n] = sum;
  80     }
  81
  82     return 0;
  83 }
  84
  85 void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs,
  86                                   const float* in, int buffer_length,
  87                                   int filter_length)
  88 {
  89     int i,n;
  90
  91 #if 0 // Unoptimized code path for improved readability
  92     for (n = 0; n < buffer_length; n++) {
  93         out[n] = in[n];
  94         for (i = 1; i <= filter_length; i++)
  95             out[n] -= filter_coeffs[i-1] * out[n-i];
  96     }
  97 #else
  98     float out0, out1, out2, out3;
  99     float old_out0, old_out1, old_out2, old_out3;
 100     float a,b,c;
 101
 102     a = filter_coeffs[0];
 103     b = filter_coeffs[1];
 104     c = filter_coeffs[2];
 105     b -= filter_coeffs[0] * filter_coeffs[0];
 106     c -= filter_coeffs[1] * filter_coeffs[0];
 107     c -= filter_coeffs[0] * b;
 108
 109     av_assert2((filter_length&1)==0 && filter_length>=4);
 110
 111     old_out0 = out[-4];
 112     old_out1 = out[-3];
 113     old_out2 = out[-2];
 114     old_out3 = out[-1];
 115     for (n = 0; n <= buffer_length - 4; n+=4) {
 116         float tmp0,tmp1,tmp2;
 117         float val;
 118
 119         out0 = in[0];
 120         out1 = in[1];
 121         out2 = in[2];
 122         out3 = in[3];
 123
 124         out0 -= filter_coeffs[2] * old_out1;
 125         out1 -= filter_coeffs[2] * old_out2;
 126         out2 -= filter_coeffs[2] * old_out3;
 127
 128         out0 -= filter_coeffs[1] * old_out2;
 129         out1 -= filter_coeffs[1] * old_out3;
 130
 131         out0 -= filter_coeffs[0] * old_out3;
 132
 133         val = filter_coeffs[3];
 134
 135         out0 -= val * old_out0;
 136         out1 -= val * old_out1;
 137         out2 -= val * old_out2;
 138         out3 -= val * old_out3;
 139
 140         for (i = 5; i < filter_length; i += 2) {
 141             old_out3 = out[-i];
 142             val = filter_coeffs[i-1];
 143
 144             out0 -= val * old_out3;
 145             out1 -= val * old_out0;
 146             out2 -= val * old_out1;
 147             out3 -= val * old_out2;
 148
 149             old_out2 = out[-i-1];
 150
 151             val = filter_coeffs[i];
 152
 153             out0 -= val * old_out2;
 154             out1 -= val * old_out3;
 155             out2 -= val * old_out0;
 156             out3 -= val * old_out1;
 157
 158             FFSWAP(float, old_out0, old_out2);
 159             old_out1 = old_out3;
 160         }
 161
 162         tmp0 = out0;
 163         tmp1 = out1;
 164         tmp2 = out2;
 165
 166         out3 -= a * tmp2;
 167         out2 -= a * tmp1;
 168         out1 -= a * tmp0;
 169
 170         out3 -= b * tmp1;
 171         out2 -= b * tmp0;
 172
 173         out3 -= c * tmp0;
 174
 175
 176         out[0] = out0;
 177         out[1] = out1;
 178         out[2] = out2;
 179         out[3] = out3;
 180
 181         old_out0 = out0;
 182         old_out1 = out1;
 183         old_out2 = out2;
 184         old_out3 = out3;
 185
 186         out += 4;
 187         in  += 4;
 188     }
 189
 190     out -= n;
 191     in -= n;
 192     for (; n < buffer_length; n++) {
 193         out[n] = in[n];
 194         for (i = 1; i <= filter_length; i++)
 195             out[n] -= filter_coeffs[i-1] * out[n-i];
 196     }
 197 #endif
 198 }
 199
 200 void ff_celp_lp_zero_synthesis_filterf(float *out, const float *filter_coeffs,
 201                                        const float *in, int buffer_length,
 202                                        int filter_length)
 203 {
 204     int i,n;
 205
 206     for (n = 0; n < buffer_length; n++) {
 207         out[n] = in[n];
 208         for (i = 1; i <= filter_length; i++)
 209             out[n] += filter_coeffs[i-1] * in[n-i];
 210     }
 211 }
 212
 213 void ff_celp_filter_init(CELPFContext *c)
 214 {
 215     c->celp_lp_synthesis_filterf        = ff_celp_lp_synthesis_filterf;
 216     c->celp_lp_zero_synthesis_filterf   = ff_celp_lp_zero_synthesis_filterf;
 217
 218 #if HAVE_MIPSFPU
 219     ff_celp_filter_init_mips(c);
 220 #endif
 221 }