1 /* Copyright (C) 2002-2006 Jean-Marc Valin
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
15 - Neither the name of the Xiph.org Foundation nor the names of its
16 contributors may be used to endorse or promote products derived from
17 this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
23 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 #include "config-speex.h"
36 #include "cb_search.h"
38 #include "stack_alloc.h"
41 #include "math_approx.h"
42 #include "os_support.h"
45 #include "cb_search_sse.h"
46 #elif defined(ARM4_ASM) || defined(ARM5E_ASM)
47 #include "cb_search_arm4.h"
48 #elif defined(BFIN_ASM)
49 #include "cb_search_bfin.h"
52 #ifndef SPEEX_DISABLE_ENCODER
53 #ifndef OVERRIDE_COMPUTE_WEIGHTED_CODEBOOK
54 static void compute_weighted_codebook(const signed char *shape_cb
, const spx_word16_t
*r
, spx_word16_t
*resp
, spx_word16_t
*resp2
, spx_word32_t
*E
, int shape_cb_size
, int subvect_size
, char *stack
)
57 VARDECL(spx_word16_t
*shape
);
58 ALLOC(shape
, subvect_size
, spx_word16_t
);
59 for (i
=0;i
<shape_cb_size
;i
++)
63 res
= resp
+i
*subvect_size
;
64 for (k
=0;k
<subvect_size
;k
++)
65 shape
[k
] = (spx_word16_t
)shape_cb
[i
*subvect_size
+k
];
68 /* Compute codeword response using convolution with impulse response */
69 for(j
=0;j
<subvect_size
;j
++)
74 resj
= MAC16_16(resj
,shape
[k
],r
[j
-k
]);
76 res16
= EXTRACT16(SHR32(resj
, 13));
78 res16
= 0.03125f
*resj
;
80 /* Compute codeword energy */
81 E
[i
]=MAC16_16(E
[i
],res16
,res16
);
83 /*printf ("%d\n", (int)res[j]);*/
90 #ifndef OVERRIDE_TARGET_UPDATE
91 static inline void target_update(spx_word16_t
*t
, spx_word16_t g
, spx_word16_t
*r
, int len
)
95 t
[n
] = SUB16(t
[n
],PSHR32(MULT16_16(g
,r
[n
]),13));
101 static void split_cb_search_shape_sign_N1(
102 spx_word16_t target
[], /* target vector */
103 spx_coef_t ak
[], /* LPCs for this subframe */
104 spx_coef_t awk1
[], /* Weighted LPCs for this subframe */
105 spx_coef_t awk2
[], /* Weighted LPCs for this subframe */
106 const void *par
, /* Codebook/search parameters*/
107 int p
, /* number of LPC coeffs */
108 int nsf
, /* number of samples in subframe */
117 VARDECL(spx_word16_t
*resp
);
119 VARDECL(__m128
*resp2
);
123 VARDECL(spx_word32_t
*E
);
125 VARDECL(spx_word16_t
*t
);
126 VARDECL(spx_sig_t
*e
);
127 const signed char *shape_cb
;
128 int shape_cb_size
, subvect_size
, nb_subvect
;
129 const split_cb_params
*params
;
131 spx_word32_t best_dist
;
134 params
= (const split_cb_params
*) par
;
135 subvect_size
= params
->subvect_size
;
136 nb_subvect
= params
->nb_subvect
;
137 shape_cb_size
= 1<<params
->shape_bits
;
138 shape_cb
= params
->shape_cb
;
139 have_sign
= params
->have_sign
;
140 ALLOC(resp
, shape_cb_size
*subvect_size
, spx_word16_t
);
142 ALLOC(resp2
, (shape_cb_size
*subvect_size
)>>2, __m128
);
143 ALLOC(E
, shape_cb_size
>>2, __m128
);
146 ALLOC(E
, shape_cb_size
, spx_word32_t
);
148 ALLOC(t
, nsf
, spx_word16_t
);
149 ALLOC(e
, nsf
, spx_sig_t
);
151 /* FIXME: Do we still need to copy the target? */
152 SPEEX_COPY(t
, target
, nsf
);
154 compute_weighted_codebook(shape_cb
, r
, resp
, resp2
, E
, shape_cb_size
, subvect_size
, stack
);
156 for (i
=0;i
<nb_subvect
;i
++)
158 spx_word16_t
*x
=t
+subvect_size
*i
;
159 /*Find new n-best based on previous n-best j*/
161 vq_nbest_sign(x
, resp2
, subvect_size
, shape_cb_size
, E
, 1, &best_index
, &best_dist
, stack
);
163 vq_nbest(x
, resp2
, subvect_size
, shape_cb_size
, E
, 1, &best_index
, &best_dist
, stack
);
165 speex_bits_pack(bits
,best_index
,params
->shape_bits
+have_sign
);
172 if (rind
>=shape_cb_size
)
177 res
= resp
+rind
*subvect_size
;
179 for (m
=0;m
<subvect_size
;m
++)
180 t
[subvect_size
*i
+m
] = SUB16(t
[subvect_size
*i
+m
], res
[m
]);
182 for (m
=0;m
<subvect_size
;m
++)
183 t
[subvect_size
*i
+m
] = ADD16(t
[subvect_size
*i
+m
], res
[m
]);
188 for (j
=0;j
<subvect_size
;j
++)
189 e
[subvect_size
*i
+j
]=SHL32(EXTEND32(shape_cb
[rind
*subvect_size
+j
]),SIG_SHIFT
-5);
191 for (j
=0;j
<subvect_size
;j
++)
192 e
[subvect_size
*i
+j
]=NEG32(SHL32(EXTEND32(shape_cb
[rind
*subvect_size
+j
]),SIG_SHIFT
-5));
195 for (j
=0;j
<subvect_size
;j
++)
196 e
[subvect_size
*i
+j
]=sign
*0.03125*shape_cb
[rind
*subvect_size
+j
];
201 for (m
=0;m
<subvect_size
;m
++)
207 if (rind
>=shape_cb_size
)
215 g
=sign
*shape_cb
[rind
*subvect_size
+m
];
217 g
=sign
*0.03125*shape_cb
[rind
*subvect_size
+m
];
219 target_update(t
+subvect_size
*(i
+1), g
, r
+q
, nsf
-subvect_size
*(i
+1));
223 /* Update excitation */
224 /* FIXME: We could update the excitation directly above */
226 exc
[j
]=ADD32(exc
[j
],e
[j
]);
228 /* Update target: only update target if necessary */
231 VARDECL(spx_word16_t
*r2
);
232 ALLOC(r2
, nsf
, spx_word16_t
);
234 r2
[j
] = EXTRACT16(PSHR32(e
[j
] ,6));
235 syn_percep_zero16(r2
, ak
, awk1
, awk2
, r2
, nsf
,p
, stack
);
237 target
[j
]=SUB16(target
[j
],PSHR16(r2
[j
],2));
243 void split_cb_search_shape_sign(
244 spx_word16_t target
[], /* target vector */
245 spx_coef_t ak
[], /* LPCs for this subframe */
246 spx_coef_t awk1
[], /* Weighted LPCs for this subframe */
247 spx_coef_t awk2
[], /* Weighted LPCs for this subframe */
248 const void *par
, /* Codebook/search parameters*/
249 int p
, /* number of LPC coeffs */
250 int nsf
, /* number of samples in subframe */
260 VARDECL(spx_word16_t
*resp
);
262 VARDECL(__m128
*resp2
);
266 VARDECL(spx_word32_t
*E
);
268 VARDECL(spx_word16_t
*t
);
269 VARDECL(spx_sig_t
*e
);
270 VARDECL(spx_word16_t
*tmp
);
271 VARDECL(spx_word32_t
*ndist
);
272 VARDECL(spx_word32_t
*odist
);
274 VARDECL(spx_word16_t
**ot2
);
275 VARDECL(spx_word16_t
**nt2
);
276 spx_word16_t
**ot
, **nt
;
280 const signed char *shape_cb
;
281 int shape_cb_size
, subvect_size
, nb_subvect
;
282 const split_cb_params
*params
;
284 VARDECL(int *best_index
);
285 VARDECL(spx_word32_t
*best_dist
);
286 VARDECL(int *best_nind
);
287 VARDECL(int *best_ntarget
);
292 /* Complexity isn't as important for the codebooks as it is for the pitch */
298 split_cb_search_shape_sign_N1(target
,ak
,awk1
,awk2
,par
,p
,nsf
,exc
,r
,bits
,stack
,update_target
);
301 ALLOC(ot2
, N
, spx_word16_t
*);
302 ALLOC(nt2
, N
, spx_word16_t
*);
303 ALLOC(oind
, N
, int*);
304 ALLOC(nind
, N
, int*);
306 params
= (const split_cb_params
*) par
;
307 subvect_size
= params
->subvect_size
;
308 nb_subvect
= params
->nb_subvect
;
309 shape_cb_size
= 1<<params
->shape_bits
;
310 shape_cb
= params
->shape_cb
;
311 have_sign
= params
->have_sign
;
312 ALLOC(resp
, shape_cb_size
*subvect_size
, spx_word16_t
);
314 ALLOC(resp2
, (shape_cb_size
*subvect_size
)>>2, __m128
);
315 ALLOC(E
, shape_cb_size
>>2, __m128
);
318 ALLOC(E
, shape_cb_size
, spx_word32_t
);
320 ALLOC(t
, nsf
, spx_word16_t
);
321 ALLOC(e
, nsf
, spx_sig_t
);
322 ALLOC(ind
, nb_subvect
, int);
324 ALLOC(tmp
, 2*N
*nsf
, spx_word16_t
);
328 nt2
[i
]=tmp
+(2*i
+1)*nsf
;
332 ALLOC(best_index
, N
, int);
333 ALLOC(best_dist
, N
, spx_word32_t
);
334 ALLOC(best_nind
, N
, int);
335 ALLOC(best_ntarget
, N
, int);
336 ALLOC(ndist
, N
, spx_word32_t
);
337 ALLOC(odist
, N
, spx_word32_t
);
339 ALLOC(itmp
, 2*N
*nb_subvect
, int);
342 nind
[i
]=itmp
+2*i
*nb_subvect
;
343 oind
[i
]=itmp
+(2*i
+1)*nb_subvect
;
346 SPEEX_COPY(t
, target
, nsf
);
349 SPEEX_COPY(&ot
[j
][0], t
, nsf
);
351 /* Pre-compute codewords response and energy */
352 compute_weighted_codebook(shape_cb
, r
, resp
, resp2
, E
, shape_cb_size
, subvect_size
, stack
);
357 /*For all subvectors*/
358 for (i
=0;i
<nb_subvect
;i
++)
360 /*"erase" nbest list*/
362 ndist
[j
]=VERY_LARGE32
;
363 /* This is not strictly necessary, but it provides an additonal safety
364 to prevent crashes in case something goes wrong in the previous
367 best_nind
[j
] = best_ntarget
[j
] = 0;
368 /*For all n-bests of previous subvector*/
371 spx_word16_t
*x
=ot
[j
]+subvect_size
*i
;
372 spx_word32_t tener
= 0;
373 for (m
=0;m
<subvect_size
;m
++)
374 tener
= MAC16_16(tener
, x
[m
],x
[m
]);
376 tener
= SHR32(tener
,1);
380 /*Find new n-best based on previous n-best j*/
382 vq_nbest_sign(x
, resp2
, subvect_size
, shape_cb_size
, E
, N
, best_index
, best_dist
, stack
);
384 vq_nbest(x
, resp2
, subvect_size
, shape_cb_size
, E
, N
, best_index
, best_dist
, stack
);
386 /*For all new n-bests*/
389 /* Compute total distance (including previous sub-vectors */
390 spx_word32_t err
= ADD32(ADD32(odist
[j
],best_dist
[k
]),tener
);
392 /*update n-best list*/
401 ndist
[n
] = ndist
[n
-1];
402 best_nind
[n
] = best_nind
[n
-1];
403 best_ntarget
[n
] = best_ntarget
[n
-1];
405 /* n is equal to m here, so they're interchangeable */
407 best_nind
[n
] = best_index
[k
];
419 /*previous target (we don't care what happened before*/
420 for (m
=(i
+1)*subvect_size
;m
<nsf
;m
++)
421 nt
[j
][m
]=ot
[best_ntarget
[j
]][m
];
423 /* New code: update the rest of the target only if it's worth it */
424 for (m
=0;m
<subvect_size
;m
++)
430 if (rind
>=shape_cb_size
)
438 g
=sign
*shape_cb
[rind
*subvect_size
+m
];
440 g
=sign
*0.03125*shape_cb
[rind
*subvect_size
+m
];
442 target_update(nt
[j
]+subvect_size
*(i
+1), g
, r
+q
, nsf
-subvect_size
*(i
+1));
445 for (q
=0;q
<nb_subvect
;q
++)
446 nind
[j
][q
]=oind
[best_ntarget
[j
]][q
];
447 nind
[j
][i
]=best_nind
[j
];
450 /*update old-new data*/
451 /* just swap pointers instead of a long copy */
459 for (m
=0;m
<nb_subvect
;m
++)
460 oind
[j
][m
]=nind
[j
][m
];
466 for (i
=0;i
<nb_subvect
;i
++)
469 speex_bits_pack(bits
,ind
[i
],params
->shape_bits
+have_sign
);
472 /* Put everything back together */
473 for (i
=0;i
<nb_subvect
;i
++)
478 if (rind
>=shape_cb_size
)
486 for (j
=0;j
<subvect_size
;j
++)
487 e
[subvect_size
*i
+j
]=SHL32(EXTEND32(shape_cb
[rind
*subvect_size
+j
]),SIG_SHIFT
-5);
489 for (j
=0;j
<subvect_size
;j
++)
490 e
[subvect_size
*i
+j
]=NEG32(SHL32(EXTEND32(shape_cb
[rind
*subvect_size
+j
]),SIG_SHIFT
-5));
493 for (j
=0;j
<subvect_size
;j
++)
494 e
[subvect_size
*i
+j
]=sign
*0.03125*shape_cb
[rind
*subvect_size
+j
];
497 /* Update excitation */
499 exc
[j
]=ADD32(exc
[j
],e
[j
]);
501 /* Update target: only update target if necessary */
504 VARDECL(spx_word16_t
*r2
);
505 ALLOC(r2
, nsf
, spx_word16_t
);
507 r2
[j
] = EXTRACT16(PSHR32(e
[j
] ,6));
508 syn_percep_zero16(r2
, ak
, awk1
, awk2
, r2
, nsf
,p
, stack
);
510 target
[j
]=SUB16(target
[j
],PSHR16(r2
[j
],2));
513 #endif /* SPEEX_DISABLE_ENCODER*/
516 void split_cb_shape_sign_unquant(
518 const void *par
, /* non-overlapping codebook */
519 int nsf
, /* number of samples in subframe */
531 const signed char *shape_cb
;
532 int shape_cb_size
, subvect_size
, nb_subvect
;
533 const split_cb_params
*params
;
536 params
= (const split_cb_params
*) par
;
537 subvect_size
= params
->subvect_size
;
538 nb_subvect
= params
->nb_subvect
;
539 shape_cb_size
= 1<<params
->shape_bits
;
540 shape_cb
= params
->shape_cb
;
541 have_sign
= params
->have_sign
;
543 ALLOC(ind
, nb_subvect
, int);
544 ALLOC(signs
, nb_subvect
, int);
546 /* Decode codewords and gains */
547 for (i
=0;i
<nb_subvect
;i
++)
550 signs
[i
] = speex_bits_unpack_unsigned(bits
, 1);
553 ind
[i
] = speex_bits_unpack_unsigned(bits
, params
->shape_bits
);
555 /* Compute decoded excitation */
556 for (i
=0;i
<nb_subvect
;i
++)
564 for (j
=0;j
<subvect_size
;j
++)
565 exc
[subvect_size
*i
+j
]=SHL32(EXTEND32(shape_cb
[ind
[i
]*subvect_size
+j
]),SIG_SHIFT
-5);
567 for (j
=0;j
<subvect_size
;j
++)
568 exc
[subvect_size
*i
+j
]=NEG32(SHL32(EXTEND32(shape_cb
[ind
[i
]*subvect_size
+j
]),SIG_SHIFT
-5));
571 for (j
=0;j
<subvect_size
;j
++)
572 exc
[subvect_size
*i
+j
]+=s
*0.03125*shape_cb
[ind
[i
]*subvect_size
+j
];
577 #ifndef SPEEX_DISABLE_ENCODER
578 void noise_codebook_quant(
579 spx_word16_t target
[], /* target vector */
580 spx_coef_t ak
[], /* LPCs for this subframe */
581 spx_coef_t awk1
[], /* Weighted LPCs for this subframe */
582 spx_coef_t awk2
[], /* Weighted LPCs for this subframe */
583 const void *par
, /* Codebook/search parameters*/
584 int p
, /* number of LPC coeffs */
585 int nsf
, /* number of samples in subframe */
595 VARDECL(spx_word16_t
*tmp
);
596 ALLOC(tmp
, nsf
, spx_word16_t
);
597 residue_percep_zero16(target
, ak
, awk1
, awk2
, tmp
, nsf
, p
, stack
);
600 exc
[i
]+=SHL32(EXTEND32(tmp
[i
]),8);
601 SPEEX_MEMSET(target
, 0, nsf
);
603 #endif /* SPEEX_DISABLE_ENCODER */
606 void noise_codebook_unquant(
608 const void *par
, /* non-overlapping codebook */
609 int nsf
, /* number of samples in subframe */
619 /* FIXME: This is bad, but I don't think the function ever gets called anyway */
621 exc
[i
]=SHL32(EXTEND32(speex_rand(1, seed
)),SIG_SHIFT
);