1 /* Copyright (C) 2002 Jean-Marc Valin
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
10 - Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
13 - Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
17 - Neither the name of the Xiph.org Foundation nor the names of its
18 contributors may be used to endorse or promote products derived from
19 this software without specific prior written permission.
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
25 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 #include "config-speex.h"
43 #define sqr(x) ((x)*(x))
45 #define MIN_ENERGY 6000
50 const float vbr_nb_thresh
[9][11]={
51 {-1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
}, /* CNG */
52 { 4.0f
, 2.5f
, 2.0f
, 1.2f
, 0.5f
, 0.0f
, -0.5f
, -0.7f
, -0.8f
, -0.9f
, -1.0f
}, /* 2 kbps */
53 {10.0f
, 6.5f
, 5.2f
, 4.5f
, 3.9f
, 3.5f
, 3.0f
, 2.5f
, 2.3f
, 1.8f
, 1.0f
}, /* 6 kbps */
54 {11.0f
, 8.8f
, 7.5f
, 6.5f
, 5.0f
, 3.9f
, 3.9f
, 3.9f
, 3.5f
, 3.0f
, 1.0f
}, /* 8 kbps */
55 {11.0f
, 11.0f
, 9.9f
, 8.5f
, 7.0f
, 6.0f
, 4.5f
, 4.0f
, 4.0f
, 4.0f
, 2.0f
}, /* 11 kbps */
56 {11.0f
, 11.0f
, 11.0f
, 11.0f
, 9.5f
, 8.5f
, 8.0f
, 7.0f
, 6.0f
, 5.0f
, 3.0f
}, /* 15 kbps */
57 {11.0f
, 11.0f
, 11.0f
, 11.0f
, 11.0f
, 11.0f
, 9.5f
, 8.5f
, 7.0f
, 6.0f
, 5.0f
}, /* 18 kbps */
58 {11.0f
, 11.0f
, 11.0f
, 11.0f
, 11.0f
, 11.0f
, 11.0f
, 11.0f
, 9.8f
, 9.5f
, 7.5f
}, /* 24 kbps */
59 { 7.0f
, 4.5f
, 3.7f
, 3.0f
, 2.5f
, 2.0f
, 1.8f
, 1.5f
, 1.0f
, 0.0f
, 0.0f
} /* 4 kbps */
63 const float vbr_hb_thresh
[5][11]={
64 {-1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
}, /* silence */
65 {-1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
}, /* 2 kbps */
66 {11.0f
, 11.0f
, 9.5f
, 8.5f
, 7.5f
, 6.0f
, 5.0f
, 3.9f
, 3.0f
, 2.0f
, 1.0f
}, /* 6 kbps */
67 {11.0f
, 11.0f
, 11.0f
, 11.0f
, 11.0f
, 9.5f
, 8.7f
, 7.8f
, 7.0f
, 6.5f
, 4.0f
}, /* 10 kbps */
68 {11.0f
, 11.0f
, 11.0f
, 11.0f
, 11.0f
, 11.0f
, 11.0f
, 11.0f
, 9.8f
, 7.5f
, 5.5f
} /* 18 kbps */
71 const float vbr_uhb_thresh
[2][11]={
72 {-1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
, -1.0f
}, /* silence */
73 { 3.9f
, 2.5f
, 0.0f
, 0.0f
, 0.0f
, 0.0f
, 0.0f
, 0.0f
, 0.0f
, 0.0f
, -1.0f
} /* 2 kbps */
76 void vbr_init(VBRState
*vbr
)
80 vbr
->average_energy
=0;
85 vbr
->last_pitch_coef
=0;
88 vbr
->noise_accum
= .05*pow(MIN_ENERGY
, NOISE_POW
);
89 vbr
->noise_accum_count
=.05;
90 vbr
->noise_level
=vbr
->noise_accum
/vbr
->noise_accum_count
;
94 for (i
=0;i
<VBR_MEMORY_SIZE
;i
++)
95 vbr
->last_log_energy
[i
] = log(MIN_ENERGY
);
100 This function should analyse the signal and decide how critical the
101 coding error will be perceptually. The following factors should be
104 -Attacks (positive energy derivative) should be coded with more bits
106 -Stationary voiced segments should receive more bits
108 -Segments with (very) low absolute energy should receive less bits (maybe
111 -DTX for near-zero energy?
113 -Stationary fricative segments should have less bits
115 -Temporal masking: when energy slope is decreasing, decrease the bit-rate
117 -Decrease bit-rate for males (low pitch)?
119 -(wideband only) less bits in the high-band when signal is very
120 non-stationary (harder to notice high-frequency noise)???
124 float vbr_analysis(VBRState
*vbr
, spx_word16_t
*sig
, int len
, int pitch
, float pitch_coef
)
127 float ener
=0, ener1
=0, ener2
=0;
135 for (i
=0;i
<len
>>1;i
++)
136 ener1
+= ((float)sig
[i
])*sig
[i
];
138 for (i
=len
>>1;i
<len
;i
++)
139 ener2
+= ((float)sig
[i
])*sig
[i
];
142 log_energy
= log(ener
+MIN_ENERGY
);
143 for (i
=0;i
<VBR_MEMORY_SIZE
;i
++)
144 non_st
+= sqr(log_energy
-vbr
->last_log_energy
[i
]);
145 non_st
= non_st
/(30*VBR_MEMORY_SIZE
);
149 voicing
= 3*(pitch_coef
-.4)*fabs(pitch_coef
-.4);
150 vbr
->average_energy
= (1-vbr
->energy_alpha
)*vbr
->average_energy
+ vbr
->energy_alpha
*ener
;
151 vbr
->noise_level
=vbr
->noise_accum
/vbr
->noise_accum_count
;
152 pow_ener
= pow(ener
,NOISE_POW
);
153 if (vbr
->noise_accum_count
<.06 && ener
>MIN_ENERGY
)
154 vbr
->noise_accum
= .05*pow_ener
;
156 if ((voicing
<.3 && non_st
< .2 && pow_ener
< 1.2*vbr
->noise_level
)
157 || (voicing
<.3 && non_st
< .05 && pow_ener
< 1.5*vbr
->noise_level
)
158 || (voicing
<.4 && non_st
< .05 && pow_ener
< 1.2*vbr
->noise_level
)
159 || (voicing
<0 && non_st
< .05))
164 if (pow_ener
> 3*vbr
->noise_level
)
165 tmp
= 3*vbr
->noise_level
;
168 if (vbr
->consec_noise
>=4)
170 vbr
->noise_accum
= .95*vbr
->noise_accum
+ .05*tmp
;
171 vbr
->noise_accum_count
= .95*vbr
->noise_accum_count
+ .05;
178 if (pow_ener
< vbr
->noise_level
&& ener
>MIN_ENERGY
)
180 vbr
->noise_accum
= .95*vbr
->noise_accum
+ .05*pow_ener
;
181 vbr
->noise_accum_count
= .95*vbr
->noise_accum_count
+ .05;
184 /* Checking for very low absolute energy */
193 float short_diff
, long_diff
;
194 short_diff
= log((ener
+1)/(1+vbr
->last_energy
));
195 long_diff
= log((ener
+1)/(1+vbr
->average_energy
));
196 /*fprintf (stderr, "%f %f\n", short_diff, long_diff);*/
204 qual
+= .6*long_diff
;
206 qual
+= .5*long_diff
;
211 qual
+= .5*short_diff
;
213 /* Checking for energy increases */
214 if (ener2
> 1.6*ener1
)
217 vbr
->last_energy
= ener
;
218 vbr
->soft_pitch
= .6*vbr
->soft_pitch
+ .4*pitch_coef
;
219 qual
+= 2.2*((pitch_coef
-.4) + (vbr
->soft_pitch
-.4));
221 if (qual
< vbr
->last_quality
)
222 qual
= .5*qual
+ .5*vbr
->last_quality
;
229 if (vbr->consec_noise>=2)
231 if (vbr->consec_noise>=5)
233 if (vbr->consec_noise>=12)
236 if (vbr
->consec_noise
>=3)
239 if (vbr
->consec_noise
)
240 qual
-= 1.0 * (log(3.0 + vbr
->consec_noise
)-log(3));
246 if (vbr
->consec_noise
>2)
247 qual
-=0.5*(log(3.0 + vbr
->consec_noise
)-log(3));
248 if (ener
<10000&&vbr
->consec_noise
>2)
249 qual
-=0.5*(log(3.0 + vbr
->consec_noise
)-log(3));
252 qual
+= .3*log(.0001+ener
/60000.0);
257 /*printf ("%f %f %f %f %d\n", qual, voicing, non_st, pow_ener/(.01+vbr->noise_level), va);*/
259 vbr
->last_pitch_coef
= pitch_coef
;
260 vbr
->last_quality
= qual
;
262 for (i
=VBR_MEMORY_SIZE
-1;i
>0;i
--)
263 vbr
->last_log_energy
[i
] = vbr
->last_log_energy
[i
-1];
264 vbr
->last_log_energy
[0] = log_energy
;
266 /*printf ("VBR: %f %f %f %d %f\n", (float)(log_energy-log(vbr->average_energy+MIN_ENERGY)), non_st, voicing, va, vbr->noise_level);*/
271 void vbr_destroy(VBRState
*vbr
)
275 #endif /* #ifndef DISABLE_VBR */