FS#8961 - Anti-Aliased Fonts.
[kugel-rb/myfork.git] / apps / codecs / libspeex / cb_search.c
blob8a190e535db0e950ad8bea37c67b9c9428aa6eb8
1 /* Copyright (C) 2002-2006 Jean-Marc Valin
2 File: cb_search.c
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
15 - Neither the name of the Xiph.org Foundation nor the names of its
16 contributors may be used to endorse or promote products derived from
17 this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
23 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 #ifdef HAVE_CONFIG_H
33 #include "config-speex.h"
34 #endif
36 #include "cb_search.h"
37 #include "filters.h"
38 #include "stack_alloc.h"
39 #include "vq.h"
40 #include "arch.h"
41 #include "math_approx.h"
42 #include "os_support.h"
44 #ifdef _USE_SSE
45 #include "cb_search_sse.h"
46 #elif defined(ARM4_ASM) || defined(ARM5E_ASM)
47 #include "cb_search_arm4.h"
48 #elif defined(BFIN_ASM)
49 #include "cb_search_bfin.h"
50 #endif
52 #ifndef SPEEX_DISABLE_ENCODER
53 #ifndef OVERRIDE_COMPUTE_WEIGHTED_CODEBOOK
54 static void compute_weighted_codebook(const signed char *shape_cb, const spx_word16_t *r, spx_word16_t *resp, spx_word16_t *resp2, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack)
56 int i, j, k;
57 VARDECL(spx_word16_t *shape);
58 ALLOC(shape, subvect_size, spx_word16_t);
59 for (i=0;i<shape_cb_size;i++)
61 spx_word16_t *res;
63 res = resp+i*subvect_size;
64 for (k=0;k<subvect_size;k++)
65 shape[k] = (spx_word16_t)shape_cb[i*subvect_size+k];
66 E[i]=0;
68 /* Compute codeword response using convolution with impulse response */
69 for(j=0;j<subvect_size;j++)
71 spx_word32_t resj=0;
72 spx_word16_t res16;
73 for (k=0;k<=j;k++)
74 resj = MAC16_16(resj,shape[k],r[j-k]);
75 #ifdef FIXED_POINT
76 res16 = EXTRACT16(SHR32(resj, 13));
77 #else
78 res16 = 0.03125f*resj;
79 #endif
80 /* Compute codeword energy */
81 E[i]=MAC16_16(E[i],res16,res16);
82 res[j] = res16;
83 /*printf ("%d\n", (int)res[j]);*/
88 #endif
90 #ifndef OVERRIDE_TARGET_UPDATE
91 static inline void target_update(spx_word16_t *t, spx_word16_t g, spx_word16_t *r, int len)
93 int n;
94 for (n=0;n<len;n++)
95 t[n] = SUB16(t[n],PSHR32(MULT16_16(g,r[n]),13));
97 #endif
101 static void split_cb_search_shape_sign_N1(
102 spx_word16_t target[], /* target vector */
103 spx_coef_t ak[], /* LPCs for this subframe */
104 spx_coef_t awk1[], /* Weighted LPCs for this subframe */
105 spx_coef_t awk2[], /* Weighted LPCs for this subframe */
106 const void *par, /* Codebook/search parameters*/
107 int p, /* number of LPC coeffs */
108 int nsf, /* number of samples in subframe */
109 spx_sig_t *exc,
110 spx_word16_t *r,
111 SpeexBits *bits,
112 char *stack,
113 int update_target
116 int i,j,m,q;
117 VARDECL(spx_word16_t *resp);
118 #ifdef _USE_SSE
119 VARDECL(__m128 *resp2);
120 VARDECL(__m128 *E);
121 #else
122 spx_word16_t *resp2;
123 VARDECL(spx_word32_t *E);
124 #endif
125 VARDECL(spx_word16_t *t);
126 VARDECL(spx_sig_t *e);
127 const signed char *shape_cb;
128 int shape_cb_size, subvect_size, nb_subvect;
129 const split_cb_params *params;
130 int best_index;
131 spx_word32_t best_dist;
132 int have_sign;
134 params = (const split_cb_params *) par;
135 subvect_size = params->subvect_size;
136 nb_subvect = params->nb_subvect;
137 shape_cb_size = 1<<params->shape_bits;
138 shape_cb = params->shape_cb;
139 have_sign = params->have_sign;
140 ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t);
141 #ifdef _USE_SSE
142 ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128);
143 ALLOC(E, shape_cb_size>>2, __m128);
144 #else
145 resp2 = resp;
146 ALLOC(E, shape_cb_size, spx_word32_t);
147 #endif
148 ALLOC(t, nsf, spx_word16_t);
149 ALLOC(e, nsf, spx_sig_t);
151 /* FIXME: Do we still need to copy the target? */
152 SPEEX_COPY(t, target, nsf);
154 compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);
156 for (i=0;i<nb_subvect;i++)
158 spx_word16_t *x=t+subvect_size*i;
159 /*Find new n-best based on previous n-best j*/
160 if (have_sign)
161 vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack);
162 else
163 vq_nbest(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack);
165 speex_bits_pack(bits,best_index,params->shape_bits+have_sign);
168 int rind;
169 spx_word16_t *res;
170 spx_word16_t sign=1;
171 rind = best_index;
172 if (rind>=shape_cb_size)
174 sign=-1;
175 rind-=shape_cb_size;
177 res = resp+rind*subvect_size;
178 if (sign>0)
179 for (m=0;m<subvect_size;m++)
180 t[subvect_size*i+m] = SUB16(t[subvect_size*i+m], res[m]);
181 else
182 for (m=0;m<subvect_size;m++)
183 t[subvect_size*i+m] = ADD16(t[subvect_size*i+m], res[m]);
185 #ifdef FIXED_POINT
186 if (sign==1)
188 for (j=0;j<subvect_size;j++)
189 e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5);
190 } else {
191 for (j=0;j<subvect_size;j++)
192 e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5));
194 #else
195 for (j=0;j<subvect_size;j++)
196 e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j];
197 #endif
201 for (m=0;m<subvect_size;m++)
203 spx_word16_t g;
204 int rind;
205 spx_word16_t sign=1;
206 rind = best_index;
207 if (rind>=shape_cb_size)
209 sign=-1;
210 rind-=shape_cb_size;
213 q=subvect_size-m;
214 #ifdef FIXED_POINT
215 g=sign*shape_cb[rind*subvect_size+m];
216 #else
217 g=sign*0.03125*shape_cb[rind*subvect_size+m];
218 #endif
219 target_update(t+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1));
223 /* Update excitation */
224 /* FIXME: We could update the excitation directly above */
225 for (j=0;j<nsf;j++)
226 exc[j]=ADD32(exc[j],e[j]);
228 /* Update target: only update target if necessary */
229 if (update_target)
231 VARDECL(spx_word16_t *r2);
232 ALLOC(r2, nsf, spx_word16_t);
233 for (j=0;j<nsf;j++)
234 r2[j] = EXTRACT16(PSHR32(e[j] ,6));
235 syn_percep_zero16(r2, ak, awk1, awk2, r2, nsf,p, stack);
236 for (j=0;j<nsf;j++)
237 target[j]=SUB16(target[j],PSHR16(r2[j],2));
243 void split_cb_search_shape_sign(
244 spx_word16_t target[], /* target vector */
245 spx_coef_t ak[], /* LPCs for this subframe */
246 spx_coef_t awk1[], /* Weighted LPCs for this subframe */
247 spx_coef_t awk2[], /* Weighted LPCs for this subframe */
248 const void *par, /* Codebook/search parameters*/
249 int p, /* number of LPC coeffs */
250 int nsf, /* number of samples in subframe */
251 spx_sig_t *exc,
252 spx_word16_t *r,
253 SpeexBits *bits,
254 char *stack,
255 int complexity,
256 int update_target
259 int i,j,k,m,n,q;
260 VARDECL(spx_word16_t *resp);
261 #ifdef _USE_SSE
262 VARDECL(__m128 *resp2);
263 VARDECL(__m128 *E);
264 #else
265 spx_word16_t *resp2;
266 VARDECL(spx_word32_t *E);
267 #endif
268 VARDECL(spx_word16_t *t);
269 VARDECL(spx_sig_t *e);
270 VARDECL(spx_word16_t *tmp);
271 VARDECL(spx_word32_t *ndist);
272 VARDECL(spx_word32_t *odist);
273 VARDECL(int *itmp);
274 VARDECL(spx_word16_t **ot2);
275 VARDECL(spx_word16_t **nt2);
276 spx_word16_t **ot, **nt;
277 VARDECL(int **nind);
278 VARDECL(int **oind);
279 VARDECL(int *ind);
280 const signed char *shape_cb;
281 int shape_cb_size, subvect_size, nb_subvect;
282 const split_cb_params *params;
283 int N=2;
284 VARDECL(int *best_index);
285 VARDECL(spx_word32_t *best_dist);
286 VARDECL(int *best_nind);
287 VARDECL(int *best_ntarget);
288 int have_sign;
289 N=complexity;
290 if (N>10)
291 N=10;
292 /* Complexity isn't as important for the codebooks as it is for the pitch */
293 N=(2*N)/3;
294 if (N<1)
295 N=1;
296 if (N==1)
298 split_cb_search_shape_sign_N1(target,ak,awk1,awk2,par,p,nsf,exc,r,bits,stack,update_target);
299 return;
301 ALLOC(ot2, N, spx_word16_t*);
302 ALLOC(nt2, N, spx_word16_t*);
303 ALLOC(oind, N, int*);
304 ALLOC(nind, N, int*);
306 params = (const split_cb_params *) par;
307 subvect_size = params->subvect_size;
308 nb_subvect = params->nb_subvect;
309 shape_cb_size = 1<<params->shape_bits;
310 shape_cb = params->shape_cb;
311 have_sign = params->have_sign;
312 ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t);
313 #ifdef _USE_SSE
314 ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128);
315 ALLOC(E, shape_cb_size>>2, __m128);
316 #else
317 resp2 = resp;
318 ALLOC(E, shape_cb_size, spx_word32_t);
319 #endif
320 ALLOC(t, nsf, spx_word16_t);
321 ALLOC(e, nsf, spx_sig_t);
322 ALLOC(ind, nb_subvect, int);
324 ALLOC(tmp, 2*N*nsf, spx_word16_t);
325 for (i=0;i<N;i++)
327 ot2[i]=tmp+2*i*nsf;
328 nt2[i]=tmp+(2*i+1)*nsf;
330 ot=ot2;
331 nt=nt2;
332 ALLOC(best_index, N, int);
333 ALLOC(best_dist, N, spx_word32_t);
334 ALLOC(best_nind, N, int);
335 ALLOC(best_ntarget, N, int);
336 ALLOC(ndist, N, spx_word32_t);
337 ALLOC(odist, N, spx_word32_t);
339 ALLOC(itmp, 2*N*nb_subvect, int);
340 for (i=0;i<N;i++)
342 nind[i]=itmp+2*i*nb_subvect;
343 oind[i]=itmp+(2*i+1)*nb_subvect;
346 SPEEX_COPY(t, target, nsf);
348 for (j=0;j<N;j++)
349 SPEEX_COPY(&ot[j][0], t, nsf);
351 /* Pre-compute codewords response and energy */
352 compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);
354 for (j=0;j<N;j++)
355 odist[j]=0;
357 /*For all subvectors*/
358 for (i=0;i<nb_subvect;i++)
360 /*"erase" nbest list*/
361 for (j=0;j<N;j++)
362 ndist[j]=VERY_LARGE32;
363 /* This is not strictly necessary, but it provides an additonal safety
364 to prevent crashes in case something goes wrong in the previous
365 steps (e.g. NaNs) */
366 for (j=0;j<N;j++)
367 best_nind[j] = best_ntarget[j] = 0;
368 /*For all n-bests of previous subvector*/
369 for (j=0;j<N;j++)
371 spx_word16_t *x=ot[j]+subvect_size*i;
372 spx_word32_t tener = 0;
373 for (m=0;m<subvect_size;m++)
374 tener = MAC16_16(tener, x[m],x[m]);
375 #ifdef FIXED_POINT
376 tener = SHR32(tener,1);
377 #else
378 tener *= .5;
379 #endif
380 /*Find new n-best based on previous n-best j*/
381 if (have_sign)
382 vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
383 else
384 vq_nbest(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
386 /*For all new n-bests*/
387 for (k=0;k<N;k++)
389 /* Compute total distance (including previous sub-vectors */
390 spx_word32_t err = ADD32(ADD32(odist[j],best_dist[k]),tener);
392 /*update n-best list*/
393 if (err<ndist[N-1])
395 for (m=0;m<N;m++)
397 if (err < ndist[m])
399 for (n=N-1;n>m;n--)
401 ndist[n] = ndist[n-1];
402 best_nind[n] = best_nind[n-1];
403 best_ntarget[n] = best_ntarget[n-1];
405 /* n is equal to m here, so they're interchangeable */
406 ndist[m] = err;
407 best_nind[n] = best_index[k];
408 best_ntarget[n] = j;
409 break;
414 if (i==0)
415 break;
417 for (j=0;j<N;j++)
419 /*previous target (we don't care what happened before*/
420 for (m=(i+1)*subvect_size;m<nsf;m++)
421 nt[j][m]=ot[best_ntarget[j]][m];
423 /* New code: update the rest of the target only if it's worth it */
424 for (m=0;m<subvect_size;m++)
426 spx_word16_t g;
427 int rind;
428 spx_word16_t sign=1;
429 rind = best_nind[j];
430 if (rind>=shape_cb_size)
432 sign=-1;
433 rind-=shape_cb_size;
436 q=subvect_size-m;
437 #ifdef FIXED_POINT
438 g=sign*shape_cb[rind*subvect_size+m];
439 #else
440 g=sign*0.03125*shape_cb[rind*subvect_size+m];
441 #endif
442 target_update(nt[j]+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1));
445 for (q=0;q<nb_subvect;q++)
446 nind[j][q]=oind[best_ntarget[j]][q];
447 nind[j][i]=best_nind[j];
450 /*update old-new data*/
451 /* just swap pointers instead of a long copy */
453 spx_word16_t **tmp2;
454 tmp2=ot;
455 ot=nt;
456 nt=tmp2;
458 for (j=0;j<N;j++)
459 for (m=0;m<nb_subvect;m++)
460 oind[j][m]=nind[j][m];
461 for (j=0;j<N;j++)
462 odist[j]=ndist[j];
465 /*save indices*/
466 for (i=0;i<nb_subvect;i++)
468 ind[i]=nind[0][i];
469 speex_bits_pack(bits,ind[i],params->shape_bits+have_sign);
472 /* Put everything back together */
473 for (i=0;i<nb_subvect;i++)
475 int rind;
476 spx_word16_t sign=1;
477 rind = ind[i];
478 if (rind>=shape_cb_size)
480 sign=-1;
481 rind-=shape_cb_size;
483 #ifdef FIXED_POINT
484 if (sign==1)
486 for (j=0;j<subvect_size;j++)
487 e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5);
488 } else {
489 for (j=0;j<subvect_size;j++)
490 e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5));
492 #else
493 for (j=0;j<subvect_size;j++)
494 e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j];
495 #endif
497 /* Update excitation */
498 for (j=0;j<nsf;j++)
499 exc[j]=ADD32(exc[j],e[j]);
501 /* Update target: only update target if necessary */
502 if (update_target)
504 VARDECL(spx_word16_t *r2);
505 ALLOC(r2, nsf, spx_word16_t);
506 for (j=0;j<nsf;j++)
507 r2[j] = EXTRACT16(PSHR32(e[j] ,6));
508 syn_percep_zero16(r2, ak, awk1, awk2, r2, nsf,p, stack);
509 for (j=0;j<nsf;j++)
510 target[j]=SUB16(target[j],PSHR16(r2[j],2));
513 #endif /* SPEEX_DISABLE_ENCODER*/
516 void split_cb_shape_sign_unquant(
517 spx_sig_t *exc,
518 const void *par, /* non-overlapping codebook */
519 int nsf, /* number of samples in subframe */
520 SpeexBits *bits,
521 char *stack,
522 spx_int32_t *seed
525 (void)nsf;
526 (void)stack;
527 (void)seed;
528 int i,j;
529 VARDECL(int *ind);
530 VARDECL(int *signs);
531 const signed char *shape_cb;
532 int shape_cb_size, subvect_size, nb_subvect;
533 const split_cb_params *params;
534 int have_sign;
536 params = (const split_cb_params *) par;
537 subvect_size = params->subvect_size;
538 nb_subvect = params->nb_subvect;
539 shape_cb_size = 1<<params->shape_bits;
540 shape_cb = params->shape_cb;
541 have_sign = params->have_sign;
543 ALLOC(ind, nb_subvect, int);
544 ALLOC(signs, nb_subvect, int);
546 /* Decode codewords and gains */
547 for (i=0;i<nb_subvect;i++)
549 if (have_sign)
550 signs[i] = speex_bits_unpack_unsigned(bits, 1);
551 else
552 signs[i] = 0;
553 ind[i] = speex_bits_unpack_unsigned(bits, params->shape_bits);
555 /* Compute decoded excitation */
556 for (i=0;i<nb_subvect;i++)
558 spx_word16_t s=1;
559 if (signs[i])
560 s=-1;
561 #ifdef FIXED_POINT
562 if (s==1)
564 for (j=0;j<subvect_size;j++)
565 exc[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[ind[i]*subvect_size+j]),SIG_SHIFT-5);
566 } else {
567 for (j=0;j<subvect_size;j++)
568 exc[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[ind[i]*subvect_size+j]),SIG_SHIFT-5));
570 #else
571 for (j=0;j<subvect_size;j++)
572 exc[subvect_size*i+j]+=s*0.03125*shape_cb[ind[i]*subvect_size+j];
573 #endif
577 #ifndef SPEEX_DISABLE_ENCODER
578 void noise_codebook_quant(
579 spx_word16_t target[], /* target vector */
580 spx_coef_t ak[], /* LPCs for this subframe */
581 spx_coef_t awk1[], /* Weighted LPCs for this subframe */
582 spx_coef_t awk2[], /* Weighted LPCs for this subframe */
583 const void *par, /* Codebook/search parameters*/
584 int p, /* number of LPC coeffs */
585 int nsf, /* number of samples in subframe */
586 spx_sig_t *exc,
587 spx_word16_t *r,
588 SpeexBits *bits,
589 char *stack,
590 int complexity,
591 int update_target
594 int i;
595 VARDECL(spx_word16_t *tmp);
596 ALLOC(tmp, nsf, spx_word16_t);
597 residue_percep_zero16(target, ak, awk1, awk2, tmp, nsf, p, stack);
599 for (i=0;i<nsf;i++)
600 exc[i]+=SHL32(EXTEND32(tmp[i]),8);
601 SPEEX_MEMSET(target, 0, nsf);
603 #endif /* SPEEX_DISABLE_ENCODER */
606 void noise_codebook_unquant(
607 spx_sig_t *exc,
608 const void *par, /* non-overlapping codebook */
609 int nsf, /* number of samples in subframe */
610 SpeexBits *bits,
611 char *stack,
612 spx_int32_t *seed
615 (void)par;
616 (void)bits;
617 (void)stack;
618 int i;
619 /* FIXME: This is bad, but I don't think the function ever gets called anyway */
620 for (i=0;i<nsf;i++)
621 exc[i]=SHL32(EXTEND32(speex_rand(1, seed)),SIG_SHIFT);