1 /* Copyright (C) 2002-2006 Jean-Marc Valin
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
15 - Neither the name of the Xiph.org Foundation nor the names of its
16 contributors may be used to endorse or promote products derived from
17 this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
23 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 #include "cb_search.h"
38 #include "stack_alloc.h"
41 #include "math_approx.h"
42 #include "os_support.h"
45 #include "cb_search_sse.h"
46 #elif defined(ARM4_ASM) || defined(ARM5E_ASM)
47 #include "cb_search_arm4.h"
48 #elif defined(BFIN_ASM)
49 #include "cb_search_bfin.h"
52 #ifndef OVERRIDE_COMPUTE_WEIGHTED_CODEBOOK
53 static void compute_weighted_codebook(const signed char *shape_cb
, const spx_word16_t
*r
, spx_word16_t
*resp
, spx_word16_t
*resp2
, spx_word32_t
*E
, int shape_cb_size
, int subvect_size
, char *stack
)
56 VARDECL(spx_word16_t
*shape
);
57 ALLOC(shape
, subvect_size
, spx_word16_t
);
58 for (i
=0;i
<shape_cb_size
;i
++)
62 res
= resp
+i
*subvect_size
;
63 for (k
=0;k
<subvect_size
;k
++)
64 shape
[k
] = (spx_word16_t
)shape_cb
[i
*subvect_size
+k
];
67 /* Compute codeword response using convolution with impulse response */
68 for(j
=0;j
<subvect_size
;j
++)
73 resj
= MAC16_16(resj
,shape
[k
],r
[j
-k
]);
75 res16
= EXTRACT16(SHR32(resj
, 13));
77 res16
= 0.03125f
*resj
;
79 /* Compute codeword energy */
80 E
[i
]=MAC16_16(E
[i
],res16
,res16
);
82 /*printf ("%d\n", (int)res[j]);*/
89 #ifndef OVERRIDE_TARGET_UPDATE
90 static inline void target_update(spx_word16_t
*t
, spx_word16_t g
, spx_word16_t
*r
, int len
)
94 t
[n
] = SUB16(t
[n
],PSHR32(MULT16_16(g
,r
[n
]),13));
100 static void split_cb_search_shape_sign_N1(
101 spx_word16_t target
[], /* target vector */
102 spx_coef_t ak
[], /* LPCs for this subframe */
103 spx_coef_t awk1
[], /* Weighted LPCs for this subframe */
104 spx_coef_t awk2
[], /* Weighted LPCs for this subframe */
105 const void *par
, /* Codebook/search parameters*/
106 int p
, /* number of LPC coeffs */
107 int nsf
, /* number of samples in subframe */
116 VARDECL(spx_word16_t
*resp
);
118 VARDECL(__m128
*resp2
);
122 VARDECL(spx_word32_t
*E
);
124 VARDECL(spx_word16_t
*t
);
125 VARDECL(spx_sig_t
*e
);
126 const signed char *shape_cb
;
127 int shape_cb_size
, subvect_size
, nb_subvect
;
128 const split_cb_params
*params
;
130 spx_word32_t best_dist
;
133 params
= (const split_cb_params
*) par
;
134 subvect_size
= params
->subvect_size
;
135 nb_subvect
= params
->nb_subvect
;
136 shape_cb_size
= 1<<params
->shape_bits
;
137 shape_cb
= params
->shape_cb
;
138 have_sign
= params
->have_sign
;
139 ALLOC(resp
, shape_cb_size
*subvect_size
, spx_word16_t
);
141 ALLOC(resp2
, (shape_cb_size
*subvect_size
)>>2, __m128
);
142 ALLOC(E
, shape_cb_size
>>2, __m128
);
145 ALLOC(E
, shape_cb_size
, spx_word32_t
);
147 ALLOC(t
, nsf
, spx_word16_t
);
148 ALLOC(e
, nsf
, spx_sig_t
);
150 /* FIXME: Do we still need to copy the target? */
151 SPEEX_COPY(t
, target
, nsf
);
153 compute_weighted_codebook(shape_cb
, r
, resp
, resp2
, E
, shape_cb_size
, subvect_size
, stack
);
155 for (i
=0;i
<nb_subvect
;i
++)
157 spx_word16_t
*x
=t
+subvect_size
*i
;
158 /*Find new n-best based on previous n-best j*/
160 vq_nbest_sign(x
, resp2
, subvect_size
, shape_cb_size
, E
, 1, &best_index
, &best_dist
, stack
);
162 vq_nbest(x
, resp2
, subvect_size
, shape_cb_size
, E
, 1, &best_index
, &best_dist
, stack
);
164 speex_bits_pack(bits
,best_index
,params
->shape_bits
+have_sign
);
171 if (rind
>=shape_cb_size
)
176 res
= resp
+rind
*subvect_size
;
178 for (m
=0;m
<subvect_size
;m
++)
179 t
[subvect_size
*i
+m
] = SUB16(t
[subvect_size
*i
+m
], res
[m
]);
181 for (m
=0;m
<subvect_size
;m
++)
182 t
[subvect_size
*i
+m
] = ADD16(t
[subvect_size
*i
+m
], res
[m
]);
187 for (j
=0;j
<subvect_size
;j
++)
188 e
[subvect_size
*i
+j
]=SHL32(EXTEND32(shape_cb
[rind
*subvect_size
+j
]),SIG_SHIFT
-5);
190 for (j
=0;j
<subvect_size
;j
++)
191 e
[subvect_size
*i
+j
]=NEG32(SHL32(EXTEND32(shape_cb
[rind
*subvect_size
+j
]),SIG_SHIFT
-5));
194 for (j
=0;j
<subvect_size
;j
++)
195 e
[subvect_size
*i
+j
]=sign
*0.03125*shape_cb
[rind
*subvect_size
+j
];
200 for (m
=0;m
<subvect_size
;m
++)
206 if (rind
>=shape_cb_size
)
214 g
=sign
*shape_cb
[rind
*subvect_size
+m
];
216 g
=sign
*0.03125*shape_cb
[rind
*subvect_size
+m
];
218 target_update(t
+subvect_size
*(i
+1), g
, r
+q
, nsf
-subvect_size
*(i
+1));
222 /* Update excitation */
223 /* FIXME: We could update the excitation directly above */
225 exc
[j
]=ADD32(exc
[j
],e
[j
]);
227 /* Update target: only update target if necessary */
230 VARDECL(spx_word16_t
*r2
);
231 ALLOC(r2
, nsf
, spx_word16_t
);
233 r2
[j
] = EXTRACT16(PSHR32(e
[j
] ,6));
234 syn_percep_zero16(r2
, ak
, awk1
, awk2
, r2
, nsf
,p
, stack
);
236 target
[j
]=SUB16(target
[j
],PSHR16(r2
[j
],2));
242 void split_cb_search_shape_sign(
243 spx_word16_t target
[], /* target vector */
244 spx_coef_t ak
[], /* LPCs for this subframe */
245 spx_coef_t awk1
[], /* Weighted LPCs for this subframe */
246 spx_coef_t awk2
[], /* Weighted LPCs for this subframe */
247 const void *par
, /* Codebook/search parameters*/
248 int p
, /* number of LPC coeffs */
249 int nsf
, /* number of samples in subframe */
259 VARDECL(spx_word16_t
*resp
);
261 VARDECL(__m128
*resp2
);
265 VARDECL(spx_word32_t
*E
);
267 VARDECL(spx_word16_t
*t
);
268 VARDECL(spx_sig_t
*e
);
269 VARDECL(spx_word16_t
*tmp
);
270 VARDECL(spx_word32_t
*ndist
);
271 VARDECL(spx_word32_t
*odist
);
273 VARDECL(spx_word16_t
**ot2
);
274 VARDECL(spx_word16_t
**nt2
);
275 spx_word16_t
**ot
, **nt
;
279 const signed char *shape_cb
;
280 int shape_cb_size
, subvect_size
, nb_subvect
;
281 const split_cb_params
*params
;
283 VARDECL(int *best_index
);
284 VARDECL(spx_word32_t
*best_dist
);
285 VARDECL(int *best_nind
);
286 VARDECL(int *best_ntarget
);
291 /* Complexity isn't as important for the codebooks as it is for the pitch */
297 split_cb_search_shape_sign_N1(target
,ak
,awk1
,awk2
,par
,p
,nsf
,exc
,r
,bits
,stack
,update_target
);
300 ALLOC(ot2
, N
, spx_word16_t
*);
301 ALLOC(nt2
, N
, spx_word16_t
*);
302 ALLOC(oind
, N
, int*);
303 ALLOC(nind
, N
, int*);
305 params
= (const split_cb_params
*) par
;
306 subvect_size
= params
->subvect_size
;
307 nb_subvect
= params
->nb_subvect
;
308 shape_cb_size
= 1<<params
->shape_bits
;
309 shape_cb
= params
->shape_cb
;
310 have_sign
= params
->have_sign
;
311 ALLOC(resp
, shape_cb_size
*subvect_size
, spx_word16_t
);
313 ALLOC(resp2
, (shape_cb_size
*subvect_size
)>>2, __m128
);
314 ALLOC(E
, shape_cb_size
>>2, __m128
);
317 ALLOC(E
, shape_cb_size
, spx_word32_t
);
319 ALLOC(t
, nsf
, spx_word16_t
);
320 ALLOC(e
, nsf
, spx_sig_t
);
321 ALLOC(ind
, nb_subvect
, int);
323 ALLOC(tmp
, 2*N
*nsf
, spx_word16_t
);
327 nt2
[i
]=tmp
+(2*i
+1)*nsf
;
331 ALLOC(best_index
, N
, int);
332 ALLOC(best_dist
, N
, spx_word32_t
);
333 ALLOC(best_nind
, N
, int);
334 ALLOC(best_ntarget
, N
, int);
335 ALLOC(ndist
, N
, spx_word32_t
);
336 ALLOC(odist
, N
, spx_word32_t
);
338 ALLOC(itmp
, 2*N
*nb_subvect
, int);
341 nind
[i
]=itmp
+2*i
*nb_subvect
;
342 oind
[i
]=itmp
+(2*i
+1)*nb_subvect
;
345 SPEEX_COPY(t
, target
, nsf
);
348 SPEEX_COPY(&ot
[j
][0], t
, nsf
);
350 /* Pre-compute codewords response and energy */
351 compute_weighted_codebook(shape_cb
, r
, resp
, resp2
, E
, shape_cb_size
, subvect_size
, stack
);
356 /*For all subvectors*/
357 for (i
=0;i
<nb_subvect
;i
++)
359 /*"erase" nbest list*/
361 ndist
[j
]=VERY_LARGE32
;
362 /* This is not strictly necessary, but it provides an additonal safety
363 to prevent crashes in case something goes wrong in the previous
366 best_nind
[j
] = best_ntarget
[j
] = 0;
367 /*For all n-bests of previous subvector*/
370 spx_word16_t
*x
=ot
[j
]+subvect_size
*i
;
371 spx_word32_t tener
= 0;
372 for (m
=0;m
<subvect_size
;m
++)
373 tener
= MAC16_16(tener
, x
[m
],x
[m
]);
375 tener
= SHR32(tener
,1);
379 /*Find new n-best based on previous n-best j*/
381 vq_nbest_sign(x
, resp2
, subvect_size
, shape_cb_size
, E
, N
, best_index
, best_dist
, stack
);
383 vq_nbest(x
, resp2
, subvect_size
, shape_cb_size
, E
, N
, best_index
, best_dist
, stack
);
385 /*For all new n-bests*/
388 /* Compute total distance (including previous sub-vectors */
389 spx_word32_t err
= ADD32(ADD32(odist
[j
],best_dist
[k
]),tener
);
391 /*update n-best list*/
400 ndist
[n
] = ndist
[n
-1];
401 best_nind
[n
] = best_nind
[n
-1];
402 best_ntarget
[n
] = best_ntarget
[n
-1];
404 /* n is equal to m here, so they're interchangeable */
406 best_nind
[n
] = best_index
[k
];
418 /*previous target (we don't care what happened before*/
419 for (m
=(i
+1)*subvect_size
;m
<nsf
;m
++)
420 nt
[j
][m
]=ot
[best_ntarget
[j
]][m
];
422 /* New code: update the rest of the target only if it's worth it */
423 for (m
=0;m
<subvect_size
;m
++)
429 if (rind
>=shape_cb_size
)
437 g
=sign
*shape_cb
[rind
*subvect_size
+m
];
439 g
=sign
*0.03125*shape_cb
[rind
*subvect_size
+m
];
441 target_update(nt
[j
]+subvect_size
*(i
+1), g
, r
+q
, nsf
-subvect_size
*(i
+1));
444 for (q
=0;q
<nb_subvect
;q
++)
445 nind
[j
][q
]=oind
[best_ntarget
[j
]][q
];
446 nind
[j
][i
]=best_nind
[j
];
449 /*update old-new data*/
450 /* just swap pointers instead of a long copy */
458 for (m
=0;m
<nb_subvect
;m
++)
459 oind
[j
][m
]=nind
[j
][m
];
465 for (i
=0;i
<nb_subvect
;i
++)
468 speex_bits_pack(bits
,ind
[i
],params
->shape_bits
+have_sign
);
471 /* Put everything back together */
472 for (i
=0;i
<nb_subvect
;i
++)
477 if (rind
>=shape_cb_size
)
485 for (j
=0;j
<subvect_size
;j
++)
486 e
[subvect_size
*i
+j
]=SHL32(EXTEND32(shape_cb
[rind
*subvect_size
+j
]),SIG_SHIFT
-5);
488 for (j
=0;j
<subvect_size
;j
++)
489 e
[subvect_size
*i
+j
]=NEG32(SHL32(EXTEND32(shape_cb
[rind
*subvect_size
+j
]),SIG_SHIFT
-5));
492 for (j
=0;j
<subvect_size
;j
++)
493 e
[subvect_size
*i
+j
]=sign
*0.03125*shape_cb
[rind
*subvect_size
+j
];
496 /* Update excitation */
498 exc
[j
]=ADD32(exc
[j
],e
[j
]);
500 /* Update target: only update target if necessary */
503 VARDECL(spx_word16_t
*r2
);
504 ALLOC(r2
, nsf
, spx_word16_t
);
506 r2
[j
] = EXTRACT16(PSHR32(e
[j
] ,6));
507 syn_percep_zero16(r2
, ak
, awk1
, awk2
, r2
, nsf
,p
, stack
);
509 target
[j
]=SUB16(target
[j
],PSHR16(r2
[j
],2));
514 void split_cb_shape_sign_unquant(
516 const void *par
, /* non-overlapping codebook */
517 int nsf
, /* number of samples in subframe */
526 const signed char *shape_cb
;
527 int shape_cb_size
, subvect_size
, nb_subvect
;
528 const split_cb_params
*params
;
531 params
= (const split_cb_params
*) par
;
532 subvect_size
= params
->subvect_size
;
533 nb_subvect
= params
->nb_subvect
;
534 shape_cb_size
= 1<<params
->shape_bits
;
535 shape_cb
= params
->shape_cb
;
536 have_sign
= params
->have_sign
;
538 ALLOC(ind
, nb_subvect
, int);
539 ALLOC(signs
, nb_subvect
, int);
541 /* Decode codewords and gains */
542 for (i
=0;i
<nb_subvect
;i
++)
545 signs
[i
] = speex_bits_unpack_unsigned(bits
, 1);
548 ind
[i
] = speex_bits_unpack_unsigned(bits
, params
->shape_bits
);
550 /* Compute decoded excitation */
551 for (i
=0;i
<nb_subvect
;i
++)
559 for (j
=0;j
<subvect_size
;j
++)
560 exc
[subvect_size
*i
+j
]=SHL32(EXTEND32(shape_cb
[ind
[i
]*subvect_size
+j
]),SIG_SHIFT
-5);
562 for (j
=0;j
<subvect_size
;j
++)
563 exc
[subvect_size
*i
+j
]=NEG32(SHL32(EXTEND32(shape_cb
[ind
[i
]*subvect_size
+j
]),SIG_SHIFT
-5));
566 for (j
=0;j
<subvect_size
;j
++)
567 exc
[subvect_size
*i
+j
]+=s
*0.03125*shape_cb
[ind
[i
]*subvect_size
+j
];
572 void noise_codebook_quant(
573 spx_word16_t target
[], /* target vector */
574 spx_coef_t ak
[], /* LPCs for this subframe */
575 spx_coef_t awk1
[], /* Weighted LPCs for this subframe */
576 spx_coef_t awk2
[], /* Weighted LPCs for this subframe */
577 const void *par
, /* Codebook/search parameters*/
578 int p
, /* number of LPC coeffs */
579 int nsf
, /* number of samples in subframe */
589 VARDECL(spx_word16_t
*tmp
);
590 ALLOC(tmp
, nsf
, spx_word16_t
);
591 residue_percep_zero16(target
, ak
, awk1
, awk2
, tmp
, nsf
, p
, stack
);
594 exc
[i
]+=SHL32(EXTEND32(tmp
[i
]),8);
595 SPEEX_MEMSET(target
, 0, nsf
);
599 void noise_codebook_unquant(
601 const void *par
, /* non-overlapping codebook */
602 int nsf
, /* number of samples in subframe */
609 /* FIXME: This is bad, but I don't think the function ever gets called anyway */
611 exc
[i
]=SHL32(EXTEND32(speex_rand(1, seed
)),SIG_SHIFT
);